Filters/FilterSource.cs

   1 //
   2 // FilterSource.cs
   3 //
   4 // Copyright (C) 2004, 2005 Novell, Inc.
   5 //
   6
   7 //
   8 // Permission is hereby granted, free of charge, to any person obtaining a
   9 // copy of this software and associated documentation files (the "Software"),
  10 // to deal in the Software without restriction, including without limitation
  11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12 // and/or sell copies of the Software, and to permit persons to whom the
  13 // Software is furnished to do so, subject to the following conditions:
  14 //
  15 // The above copyright notice and this permission notice shall be included in
  16 // all copies or substantial portions of the Software.
  17 //
  18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24 // DEALINGS IN THE SOFTWARE.
  25 //
  26
  27
  28 using System;
  29 using System.Collections;
  30 using System.IO;
  31 using System.Text;
  32 namespace Beagle.Filters {
  33
  34         public abstract class FilterSource : Beagle.Daemon.Filter {
  35
  36                 protected enum LangType {
  37                         None,
  38                         C_Style,
  39                         C_Sharp_Style,
  40                         Python_Style,
  41                         Fortran_Style,
  42                         Pascal_Style,
  43                         Lisp_Style,
  44                         Matlab_Style,
  45                         Shell_Style
  46                 };
  47
  48                 protected LangType SrcLangType;
  49                 protected Hashtable KeyWordsHash;
  50
  51                 private enum LineType {
  52                         None,
  53                         SingleLineComment,
  54                         BlockComment,
  55                         StringConstant
  56                 };
  57
  58                 LineType SrcLineType;
  59                 string StrConstIdentifier;
  60
  61                 StringBuilder token;
  62                 public FilterSource ()
  63                 {
  64                         // Initialize the linetype member.
  65                         SrcLineType = LineType.None;
  66                         SrcLangType = LangType.None;
  67                         StrConstIdentifier = " ";
  68
  69                         KeyWordsHash = new Hashtable ();
  70
  71                         SnippetMode = true;
  72                         OriginalIsText = true;
  73                         token = new StringBuilder ();
  74                 }
  75
  76                 // Validate the character and append it to the token,
  77                 // that will be added to the text-pool of the filter.
  78                 // Returns: False, if token is not complete enough to be added to the pool.
  79                 //          True, if it is a valid word that can be added to the pool.
  80                 private bool AppendToToken (char ch, int index, int length)
  81                 {
  82                         if (ch == ' ')
  83                                 return true;
  84
  85                         if (Char.IsLetter (ch) || Char.IsDigit (ch) || ch == '_') {
  86                                 token.Append (ch);
  87                                 if ((index + 1) < length)
  88                                         return false;
  89                                 else
  90                                         return true;
  91                         }
  92
  93                         return true;
  94                 }
  95
  96                 // Tokenize the passed string and add the relevant
  97                 // tokens for indexing.
  98                 //
  99                 // FIXME: Perl has embedded "POD" (documentation) style, which needs a little processing.
 100                 //
 101                 protected void ExtractTokens (string str)
 102                 {
 103                         int index;
 104                         token.Length = 0;
 105                         string splCharSeq = "";
 106
 107                         for (index = 0; index < str.Length; index++) {
 108                                 if (((str[index] == '{'
 109                                       || str[index] == '}'
 110                                       || str[index] == '('
 111                                       || str[index] == ')'
 112                                       || str[index] == '*'
 113                                       || str[index] == '/')
 114                                      && SrcLangType == LangType.Pascal_Style)
 115                                     || ((str[index] == '/'
 116                                          || str[index] == '*')
 117                                         && (SrcLangType == LangType.C_Style
 118                                             || SrcLangType == LangType.C_Sharp_Style))) {
 119
 120                                         splCharSeq += str[index];
 121
 122                                         switch (splCharSeq) {
 123
 124                                         case "(*":
 125                                                 if (SrcLineType == LineType.None) {
 126                                                         SrcLineType = LineType.BlockComment;
 127                                                         token.Length = 0;
 128                                                     } else
 129                                                         token.Append (splCharSeq);
 130                                                 splCharSeq = "";
 131                                                 break;
 132
 133                                         case "*)":
 134                                                 if (SrcLineType == LineType.BlockComment) {
 135                                                         SrcLineType = LineType.None;
 136                                                         token.Append (" ");
 137                                                         AppendText (token.ToString());
 138                                                         token.Length = 0;
 139                                                 } else if (SrcLineType != LineType.None)
 140                                                         token.Append (splCharSeq);
 141                                                 splCharSeq = "";
 142                                                 break;
 143
 144                                         case "{":
 145                                                 if (SrcLineType == LineType.None) {
 146                                                         SrcLineType = LineType.BlockComment;
 147                                                         token.Length = 0;
 148                                                     } else
 149                                                         token.Append (splCharSeq);
 150                                                 splCharSeq = "";
 151                                                 break;
 152
 153                                         case "}":
 154                                                 if (SrcLineType == LineType.BlockComment) {
 155                                                         SrcLineType = LineType.None;
 156                                                         token.Append (" ");
 157                                                         AppendText (token.ToString());
 158                                                         token.Length = 0;
 159                                                 } else if (SrcLineType != LineType.None)
 160                                                         token.Append (splCharSeq);
 161                                                 splCharSeq = "";
 162                                                 break;
 163                                         case "//":
 164                                                 if (SrcLineType == LineType.None) {
 165                                                         SrcLineType = LineType.SingleLineComment;
 166                                                         token.Length = 0;
 167                                                 } else
 168                                                         token.Append (splCharSeq);
 169                                                 splCharSeq = "";
 170                                                 break;
 171
 172                                         case "/*":
 173                                                 if (SrcLineType == LineType.None) {
 174                                                         SrcLineType = LineType.BlockComment;
 175                                                         token.Length = 0;
 176                                                 } else
 177                                                         token.Append (splCharSeq);
 178                                                 splCharSeq = "";
 179                                                 break;
 180
 181                                         case "*/":
 182                                                 if (SrcLineType == LineType.BlockComment) {
 183                                                         SrcLineType = LineType.None;
 184                                                         token.Append (" ");
 185                                                         AppendText (token.ToString());
 186                                                         token.Length = 0;
 187                                                 } else if (SrcLineType != LineType.None)
 188                                                         token.Append (splCharSeq);
 189                                                 splCharSeq = "";
 190                                                 break;
 191                                         }
 192                                 } else if ((str[index] == '#' && (SrcLangType == LangType.Python_Style ||
 193                                                                   SrcLangType == LangType.Shell_Style)) ||
 194                                            (str[index] == '!' && SrcLangType == LangType.Fortran_Style) ||
 195                                            (str[index] == ';' && SrcLangType == LangType.Lisp_Style) ||
 196                                            (str[index] == '%' && SrcLangType == LangType.Matlab_Style)) {
 197                                         if (SrcLineType == LineType.None) {
 198                                                 SrcLineType = LineType.SingleLineComment;
 199                                                 token.Length = 0;
 200                                         } else
 201                                                 token.Append (str[index]);
 202                                 }
 203                                 // FIXME: we evaluate *ALL* escape
 204                                 // sequences on strings.  Do we really need to
 205                                 // do this for comments??? And also "\n", "\t" etc????
 206                                 else if (SrcLineType == LineType.StringConstant &&
 207                                          str[index] == '\\') {
 208                                         if ((index + 1) <= (str.Length-1))
 209                                                 token.Append (str[index + 1]);
 210                                         index ++;
 211                                 }
 212                                 // Well the typical python ''' or """ stuff
 213                                 else if ((SrcLangType == LangType.Python_Style) &&
 214                                          ((index + 2) <= (str.Length-1)) &&
 215                                          (str[index] == '\"' || str[index] == '\'') &&
 216                                          (str[index] == str[index + 1] && str[index] == str[index + 2]) &&
 217                                          StrConstIdentifier[0] == str[index]) {
 218
 219                                         if (SrcLineType == LineType.StringConstant) {
 220                                                 SrcLineType = LineType.None;
 221                                                 token.Append (" ");
 222                                                 AppendText (token.ToString());
 223                                                 token.Length = 0;
 224                                         } else {
 225                                                 StrConstIdentifier = str.Substring (index, 3);
 226                                                 SrcLineType = LineType.StringConstant;
 227                                                 token.Length = 0;
 228                                                 index += 2;
 229                                         }
 230
 231                                         splCharSeq = "";
 232                                 }
 233                                 // Lisp: ignore the single quote character; do another iteration
 234                                 else if (SrcLangType == LangType.Lisp_Style && str[index] == '\'') {
 235                                         continue;
 236                                 }
 237                                 else if (str[index] == '\"' || str[index] == '\'' ||
 238                                          (str[index] == '`' && SrcLangType == LangType.Shell_Style)) {
 239
 240                                         if (SrcLineType == LineType.StringConstant &&
 241                                             StrConstIdentifier.Length == 1 &&
 242                                             StrConstIdentifier[0] == str[index]) {
 243                                                 SrcLineType = LineType.None;
 244                                                 token.Append (" ");
 245                                                 AppendText (token.ToString());
 246                                                 token.Length = 0;
 247
 248                                         } else if (SrcLineType == LineType.None) {
 249                                                 StrConstIdentifier = str.Substring (index, 1);
 250                                                 SrcLineType = LineType.StringConstant;
 251                                                 token.Length = 0;
 252                                         } else
 253                                                 token.Append (str[index]);
 254                                         splCharSeq = "";
 255
 256                                 } else if (SrcLineType != LineType.None) {
 257                                         token.Append (splCharSeq);
 258                                         token.Append (str[index]);
 259                                         splCharSeq = "";
 260
 261                                 } else if (SrcLineType == LineType.None) {
 262                                         if (AppendToToken (str[index], index, str.Length)) {
 263                                                 if (SrcLangType == LangType.Lisp_Style) {
 264
 265                                                         // Lisp identifiers: letters, digits, and:
 266                                                         // ! $ % & * + - . / : < = > ? @ ^ _ ~
 267                                                         switch (str[index]) {
 268                                                         case '!': case '$': case '%': case '&':
 269                                                         case '*': case '+': case '-': case '.':
 270                                                         case '/': case ':': case '<': case '=':
 271                                                         case '>': case '?': case '@': case '^':
 272                                                         case '_': case '~':
 273                                                                 token.Append (str[index]);
 274                                                                 continue;
 275                                                         }
 276                                                 }
 277                                                 //token = token.Replace(" ", "");
 278                                                 if (token.Length > 0) {
 279                                                         string tok;
 280                                                         if (SrcLangType == LangType.Fortran_Style)
 281                                                                 tok = token.ToString().ToLower();
 282                                                         else
 283                                                                 tok = token.ToString ();
 284                                                         if (!KeyWordsHash.Contains (tok)) {
 285                                                                 if (!Char.IsDigit (token[0])) {
 286                                                                         AppendText (tok);
 287                                                                         AppendWhiteSpace ();
 288                                                                 }
 289                                                         }
 290                                                 }
 291                                                 // reset the token
 292                                                 token.Length = 0;
 293                                         }
 294                                         splCharSeq = "";
 295                                 }
 296                         }
 297                         if (SrcLineType != LineType.None) {
 298                                 bool trailing_backslash = false;
 299
 300                                 token.Append (splCharSeq);
 301
 302                                 if (token.Length > 0 && token [token.Length - 1] == '\\') {
 303                                         token = token.Remove (token.Length-1, 1);
 304                                         trailing_backslash = true;
 305                                 }
 306
 307                                 token.Append (" ");
 308                                 AppendText (token.ToString());
 309
 310                                 // if a single-line-comment ends with a "\",
 311                                 // the lines that follows it are also considered as a comment,
 312                                 // till a line with out a "\" is found
 313                                 // C# and Lisp don't follow this syntax.
 314                                 if (SrcLineType == LineType.SingleLineComment)
 315                                         if (!trailing_backslash
 316                                             || SrcLangType == LangType.C_Sharp_Style
 317                                             || SrcLangType == LangType.Lisp_Style)
 318                                                 SrcLineType = LineType.None;
 319                         } else if (token.Length > 0
 320                                    && !Char.IsDigit (token[0])) {
 321                                 /* we don't want any numeric const */
 322                                 token.Append (" ");
 323                                 AppendText (token.ToString());
 324                         }
 325                 }
 326         }
 327 }