prop-src/lexeme.cc

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //  This file is generated automatically using Prop (version 2.3.6),
   3 //  last updated on Nov 2, 1999.
   4 //  The original source file is "lexeme.pcc".
   5 ///////////////////////////////////////////////////////////////////////////////
   6
   7 #line 1 "lexeme.pcc"
   8 ///////////////////////////////////////////////////////////////////////////////
   9 //
  10 //  This file implements some support routines for lexeme and regular
  11 //  expressions handling.
  12 //
  13 ///////////////////////////////////////////////////////////////////////////////
  14 #include <string.h>
  15 #include <AD/strings/charesc.h>
  16 #include "ir.h"
  17 #include "ast.h"
  18 #include "hashtab.h"
  19 #include "type.h"
  20 #include "matchcom.h"
  21
  22 ///////////////////////////////////////////////////////////////////////////////
  23 //  Properly quote a string
  24 ///////////////////////////////////////////////////////////////////////////////
  25 const char * make_quoted_string (const char * s)
  26 {  char * new_str = str_pool[strlen(s) * 4];
  27    *new_str = '"';
  28    char * end_str = quote_string(new_str+1,s);
  29    *end_str++ = '"';
  30    *end_str   = '\0';
  31    return new_str;
  32 }
  33
  34 ///////////////////////////////////////////////////////////////////////////////
  35 //  Add contexts to a string/regexp pattern
  36 ///////////////////////////////////////////////////////////////////////////////
  37 Pat add_contexts (Conses contexts, Pat pat)
  38 {  if (contexts ==
  39 #line 31 "lexeme.pcc"
  40 #line 31 "lexeme.pcc"
  41 nil_1_
  42 #line 31 "lexeme.pcc"
  43 #line 31 "lexeme.pcc"
  44 ) return pat;
  45
  46 #line 32 "lexeme.pcc"
  47 #line 74 "lexeme.pcc"
  48 {
  49    if (pat) {
  50       switch (pat->tag__) {
  51          case a_Pat::tag_LITERALpat: {
  52             switch (((Pat_LITERALpat *)pat)->LITERALpat->tag__) {
  53                case a_Literal::tag_STRINGlit: {
  54 #line 53 "lexeme.pcc"
  55                  return add_contexts(contexts,LITERALpat(REGEXPlit(convert_regexp(((Literal_STRINGlit *)((Pat_LITERALpat *)pat)->LITERALpat)->STRINGlit))));
  56
  57 #line 54 "lexeme.pcc"
  58                   } break;
  59                case a_Literal::tag_REGEXPlit: {
  60 #line 34 "lexeme.pcc"
  61                 char buffer[4096];
  62                   char * p = buffer;
  63                   *p++ = '/'; *p++ = '<';
  64                   for (Conses l = contexts; l; l = l->_2)
  65                   {  if (l->_1) strcpy(p,l->_1->name);
  66                      p += strlen(p);
  67                      if (l->_2) *p++ = ',';
  68                   }
  69                   *p++ = '>';
  70                   strcpy(p,((Literal_REGEXPlit *)((Pat_LITERALpat *)pat)->LITERALpat)->REGEXPlit+1); p += strlen(p) - 1;
  71                   *p = '/';
  72                   debug_msg("converting %p into %s\n",
  73                             CONTEXTpat(contexts,pat), buffer);
  74                   Pat new_pat = LITERALpat(REGEXPlit(str_pool[buffer]));
  75                   new_pat->selector = pat->selector;
  76                   new_pat->ty       = pat->ty;
  77                   return new_pat;
  78
  79 #line 51 "lexeme.pcc"
  80                   } break;
  81                default: {
  82                   L1:;
  83 #line 71 "lexeme.pcc"
  84                 error("%Lillegal context(s) in pattern %p\n",
  85                   CONTEXTpat(contexts,pat));
  86                   return pat;
  87
  88 #line 74 "lexeme.pcc"
  89                   } break;
  90             }
  91             } break;
  92          case a_Pat::tag_LEXEMEpat: {
  93 #line 56 "lexeme.pcc"
  94           Pat this_pat = expand_lexeme_pat(pat,((Pat_LEXEMEpat *)pat)->_2,((Pat_LEXEMEpat *)pat)->_3,((Pat_LEXEMEpat *)pat)->_4);
  95             Pat new_pat = NOpat;
  96
  97 #line 58 "lexeme.pcc"
  98 #line 64 "lexeme.pcc"
  99             {
 100                for (;;) {
 101                   if (this_pat) {
 102                      switch (this_pat->tag__) {
 103                         case a_Pat::tag_LOGICALpat: {
 104                            switch (((Pat_LOGICALpat *)this_pat)->_1) {
 105                               case ORpat: {
 106 #line 60 "lexeme.pcc"
 107                                Pat one_pat = add_contexts(contexts,((Pat_LOGICALpat *)this_pat)->_3);
 108                                  new_pat = new_pat == NOpat ? one_pat
 109                                          : LOGICALpat(ORpat,one_pat,new_pat);
 110                                  this_pat = ((Pat_LOGICALpat *)this_pat)->_2;
 111
 112 #line 64 "lexeme.pcc"
 113                                  } break;
 114                               default: { goto L2; } break;
 115                            }
 116                            } break;
 117                         default: { goto L2; } break;
 118                      }
 119                   } else { goto L2; }
 120                }
 121                L2:;
 122             }
 123 #line 65 "lexeme.pcc"
 124 #line 65 "lexeme.pcc"
 125
 126             Pat p = add_contexts(contexts,this_pat);
 127             new_pat = new_pat == NOpat ? p : LOGICALpat(ORpat,p,new_pat);
 128             return new_pat;
 129
 130 #line 69 "lexeme.pcc"
 131             } break;
 132          default: { goto L1; } break;
 133       }
 134    } else { goto L1; }
 135 }
 136 #line 75 "lexeme.pcc"
 137 #line 75 "lexeme.pcc"
 138
 139 }
 140
 141 ///////////////////////////////////////////////////////////////////////////////
 142 //  Convert a string pattern into a regexp pattern if necessary
 143 ///////////////////////////////////////////////////////////////////////////////
 144 #line 81 "lexeme.pcc"
 145 #line 85 "lexeme.pcc"
 146 Pat convert_regexp (Pat x_1);
 147 Pat convert_regexp (Pat x_1)
 148 {
 149    if (x_1) {
 150       switch (x_1->tag__) {
 151          case a_Pat::tag_LITERALpat: {
 152             switch (((Pat_LITERALpat *)x_1)->LITERALpat->tag__) {
 153                case a_Literal::tag_STRINGlit: {
 154 #line 82 "lexeme.pcc"
 155                  return LITERALpat(REGEXPlit(convert_regexp(((Literal_STRINGlit *)((Pat_LITERALpat *)x_1)->LITERALpat)->STRINGlit)));
 156 #line 82 "lexeme.pcc"
 157                   } break;
 158                default: {
 159                   L3:;
 160 #line 84 "lexeme.pcc"
 161                  return x_1;
 162 #line 84 "lexeme.pcc"
 163                   } break;
 164             }
 165             } break;
 166          default: { goto L3; } break;
 167       }
 168    } else { goto L3; }
 169 }
 170 #line 85 "lexeme.pcc"
 171 #line 85 "lexeme.pcc"
 172
 173
 174 ///////////////////////////////////////////////////////////////////////////////
 175 //  Convert a string literal into a regular expression literal.
 176 ///////////////////////////////////////////////////////////////////////////////
 177 const char * convert_regexp (const char * string, Bool quotes)
 178 {  char buf[4096];
 179    register char * p = buf;
 180    register const char * s = string + 1;
 181    if (quotes) *p++ = '/';
 182    while (*s) {
 183       char c;
 184       s = parse_char(s,c);
 185
 186 #line 98 "lexeme.pcc"
 187 #line 103 "lexeme.pcc"
 188 {
 189    switch (c) {
 190       case '$':
 191       case '(':
 192       case ')':
 193       case '*':
 194       case '+':
 195       case '.':
 196       case '/':
 197       case '<':
 198       case '>':
 199       case '?':
 200       case '[':
 201       case '\\':
 202       case ']':
 203       case '^':
 204       case '{':
 205       case '|':
 206       case '}': {
 207          L4:;
 208 #line 102 "lexeme.pcc"
 209         *p++ = '\\'; *p++ = c;
 210 #line 102 "lexeme.pcc"
 211       } break;
 212       default: {
 213 #line 103 "lexeme.pcc"
 214         p = print_char(p,c);
 215 #line 103 "lexeme.pcc"
 216       }
 217    }
 218 }
 219 #line 104 "lexeme.pcc"
 220 #line 104 "lexeme.pcc"
 221
 222    }
 223    p -= 2;
 224    if (quotes) *p++ = '/';
 225    *p = '\0';
 226    debug_msg("converting %s into %s\n", string, buf);
 227    return str_pool[buf];
 228 }
 229
 230 ///////////////////////////////////////////////////////////////////////////////
 231 //  The global lexeme and lexeme class environments
 232 ///////////////////////////////////////////////////////////////////////////////
 233 static HashTable lexeme_env(string_hash, string_equal);
 234 static HashTable lexeme_class_env(string_hash, string_equal);
 235
 236 ///////////////////////////////////////////////////////////////////////////////
 237 //  Method to update or lookup from the lexeme class environment
 238 ///////////////////////////////////////////////////////////////////////////////
 239 void update_lexeme_class (Id id, TermDefs terms)
 240 {  if (lexeme_class_env.contains(id))
 241    { error ("%Lredefinition of lexeme class %s\n", id); }
 242    else
 243    { lexeme_class_env.insert(id, terms); }
 244 }
 245
 246 TermDefs lookup_lexeme_class (Id id)
 247 {  HashTable::Entry * e = lexeme_class_env.lookup(id);
 248    if (e) return TermDefs(e->v);
 249    else { error("%Llexeme class %s is undefined\n", id); return
 250 #line 132 "lexeme.pcc"
 251 #line 132 "lexeme.pcc"
 252 nil_1_
 253 #line 132 "lexeme.pcc"
 254 #line 132 "lexeme.pcc"
 255 ; }
 256 }
 257
 258 ///////////////////////////////////////////////////////////////////////////////
 259 //  Method to expand a pattern taking into account of lexeme definitions.
 260 ///////////////////////////////////////////////////////////////////////////////
 261 Pat mk_regexp_pat(const char * re)
 262 {  const char * p = re;
 263    char buf[4096];
 264    char * q = buf;
 265    if (strchr(re,'{') != 0)
 266    {
 267       // skip context
 268       if (*p == '<')
 269       { do { *q++ = *p; } while (*p && *p++ != '>'); }
 270
 271 #line 147 "lexeme.pcc"
 272 #line 189 "lexeme.pcc"
 273 {
 274    for (;;) {
 275       char _V1 = (*p);
 276       switch (_V1) {
 277          case '\000': { goto L5; } break;
 278          case '[': {
 279 #line 148 "lexeme.pcc"
 280           do { *q++ = *p++; }
 281             while (p[-1] && (p[-2] == '\\' || p[-1] != ']'));
 282
 283 #line 150 "lexeme.pcc"
 284          } break;
 285          case '{': {
 286 #line 151 "lexeme.pcc"
 287            char name[256]; char * r;
 288             for (r = name, p++; *p && *p != '}';) *r++ = *p++;
 289             if (*p == '}') p++;
 290             else error ("%Lmissing '}' in regular expression %s\n",re);
 291             *r = '\0';
 292             HashTable::Entry * e = lexeme_env.lookup(name);
 293             if (e)
 294             {  Pat pattern = (Pat)e->v;
 295
 296 #line 159 "lexeme.pcc"
 297 #line 169 "lexeme.pcc"
 298             {
 299                if (pattern) {
 300                   switch (pattern->tag__) {
 301                      case a_Pat::tag_LITERALpat: {
 302                         switch (((Pat_LITERALpat *)pattern)->LITERALpat->tag__) {
 303                            case a_Literal::tag_STRINGlit: {
 304 #line 165 "lexeme.pcc"
 305                             *q++ = '('; strcpy(q, convert_regexp(((Literal_STRINGlit *)((Pat_LITERALpat *)pattern)->LITERALpat)->STRINGlit,false));
 306                               q += strlen(q) - 1; *q++ = ')';
 307
 308 #line 167 "lexeme.pcc"
 309                               } break;
 310                            case a_Literal::tag_REGEXPlit: {
 311 #line 161 "lexeme.pcc"
 312                             *q++ = '(';  strcpy(q,((Literal_REGEXPlit *)((Pat_LITERALpat *)pattern)->LITERALpat)->REGEXPlit+1);  q += strlen(((Literal_REGEXPlit *)((Pat_LITERALpat *)pattern)->LITERALpat)->REGEXPlit)-2;
 313                               *q++ = ')';
 314
 315 #line 163 "lexeme.pcc"
 316                               } break;
 317                            default: {
 318                               L6:; } break;
 319                         }
 320                         } break;
 321                      default: { goto L6; } break;
 322                   }
 323                } else { goto L6; }
 324             }
 325 #line 169 "lexeme.pcc"
 326 #line 169 "lexeme.pcc"
 327
 328             } else
 329             {  error ("%Llexeme {%s} is undefined in %s\n", name, re);
 330             }
 331
 332 #line 173 "lexeme.pcc"
 333          } break;
 334          default: {
 335 #line 174 "lexeme.pcc"
 336            char ch;
 337             const char * r = parse_char(p,ch);
 338             // if (*p == '\\') *q++ = '\\';
 339             if (*p == '\\')
 340             {
 341 #line 178 "lexeme.pcc"
 342 #line 184 "lexeme.pcc"
 343             {
 344                switch (ch) {
 345                   case '$':
 346                   case '(':
 347                   case ')':
 348                   case '*':
 349                   case '+':
 350                   case '.':
 351                   case '/':
 352                   case '<':
 353                   case '>':
 354                   case '?':
 355                   case '[':
 356                   case '\\':
 357                   case ']':
 358                   case '^':
 359                   case '{':
 360                   case '|':
 361                   case '}': {
 362                      L7:;
 363 #line 182 "lexeme.pcc"
 364                    *q++ = '\\'; *q++ = ch;
 365
 366 #line 183 "lexeme.pcc"
 367                   } break;
 368                   default: {
 369 #line 184 "lexeme.pcc"
 370                    q = print_char(q,ch);
 371 #line 184 "lexeme.pcc"
 372                   }
 373                }
 374             }
 375 #line 185 "lexeme.pcc"
 376 #line 185 "lexeme.pcc"
 377
 378             } else
 379             {  q = print_char(q,ch); }
 380             p = r;
 381
 382 #line 189 "lexeme.pcc"
 383          }
 384       }
 385    }
 386    L5:;
 387 }
 388 #line 190 "lexeme.pcc"
 389 #line 190 "lexeme.pcc"
 390
 391       *q++;
 392       debug_msg("%s ==> %s\n", re, buf);
 393       re = buf;
 394    }
 395    return LITERALpat(REGEXPlit(str_pool[re]));
 396 }
 397
 398 ///////////////////////////////////////////////////////////////////////////////
 399 //  Method to update the lexeme environment.
 400 ///////////////////////////////////////////////////////////////////////////////
 401 void update_lexeme(Id id, Ids args, Pat pat)
 402 {  HashTable::Entry * e = lexeme_env.lookup(id);
 403    if (e)
 404    {  error("%Llexeme {%s} already defined as %p\n",id,(Pat)e->v);
 405    } else
 406    {  lexeme_env.insert(id,pat);
 407    }
 408 }
 409
 410 ///////////////////////////////////////////////////////////////////////////////
 411 //  Method to expand a lexeme pattern into a set of disjunctive patterns.
 412 ///////////////////////////////////////////////////////////////////////////////
 413 Pat expand_lexeme_pat(Pat pat, Ty ty, int n, Cons terms[])
 414 {
 415 #line 214 "lexeme.pcc"
 416 #line 237 "lexeme.pcc"
 417 {
 418    Ty _V2 = deref_all(ty);
 419    if (_V2) {
 420       switch (_V2->tag__) {
 421          case a_Ty::tag_TYCONty: {
 422             if (boxed(((Ty_TYCONty *)_V2)->_1)) {
 423                switch (((Ty_TYCONty *)_V2)->_1->tag__) {
 424                   case a_TyCon::tag_DATATYPEtycon: {
 425                      if (
 426 #line 216 "lexeme.pcc"
 427                      ((((TyCon_DATATYPEtycon *)((Ty_TYCONty *)_V2)->_1)->qualifiers | QUALlexeme) == 0)
 428 #line 216 "lexeme.pcc"
 429 ) {
 430
 431 #line 217 "lexeme.pcc"
 432                       error ("%Ldatatype %T is not a lexeme type\n", ty);
 433 #line 217 "lexeme.pcc"
 434                      } else {
 435
 436 #line 219 "lexeme.pcc"
 437                       Pat disj_pats = NOpat;
 438                         for (int i = 0; i < n; i++)
 439                         {
 440 #line 221 "lexeme.pcc"
 441 #line 233 "lexeme.pcc"
 442                         {
 443                            Cons _V3 = terms[i];
 444                            if (_V3) {
 445                               if (_V3->lexeme_pattern) {
 446 #line 223 "lexeme.pcc"
 447                                Pat new_pat = subst(convert_regexp(_V3->lexeme_pattern),0,true);
 448                                  disj_pats = (disj_pats == NOpat) ? new_pat
 449                                              : LOGICALpat(ORpat,disj_pats, new_pat);
 450                                  disj_pats->selector = pat->selector;
 451
 452 #line 227 "lexeme.pcc"
 453                               } else {
 454 #line 229 "lexeme.pcc"
 455                                error ("%Llexeme pattern is undefined for constructor %s\n",
 456                                  _V3->name);
 457
 458 #line 231 "lexeme.pcc"
 459                               }
 460                            } else {}
 461                         }
 462 #line 233 "lexeme.pcc"
 463 #line 233 "lexeme.pcc"
 464
 465                         }
 466                         return disj_pats;
 467
 468 #line 236 "lexeme.pcc"
 469                      }
 470                      } break;
 471                   default: {
 472                      L8:;
 473 #line 237 "lexeme.pcc"
 474                    error ("%Lnon lexeme type %T in pattern %p\n", ty, pat);
 475 #line 237 "lexeme.pcc"
 476                      } break;
 477                }
 478             } else { goto L8; }
 479             } break;
 480          default: { goto L8; } break;
 481       }
 482    } else {}
 483 }
 484 #line 238 "lexeme.pcc"
 485 #line 238 "lexeme.pcc"
 486
 487    return WILDpat();
 488 }
 489 #line 241 "lexeme.pcc"
 490 /*
 491 ------------------------------- Statistics -------------------------------
 492 Merge matching rules         = yes
 493 Number of DFA nodes merged   = 197
 494 Number of ifs generated      = 9
 495 Number of switches generated = 13
 496 Number of labels             = 6
 497 Number of gotos              = 8
 498 Adaptive matching            = enabled
 499 Fast string matching         = disabled
 500 Inline downcasts             = enabled
 501 --------------------------------------------------------------------------
 502 */