2005-05-14 Gabor Kelemen <kelemeng@gnome.hu>
[beagle.git] / beagled / CalendarQueryable / ICalParser / Token.cs
blob6511e71a5c3db4fb6c7abade2d46169265231c3e
1 using System;
2 using System.Collections;
3 using System.Text.RegularExpressions;
4 using System.Text;
5 using System.Web;
7 /***
8 * <copyright>
9 * ICalParser is a general purpose .Net parser for iCalendar format files (RFC 2445)
11 * Copyright (C) 2004 J. Tim Spurway
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 * </copyright>
29 namespace Semaview.Shared.ICalParser
32 // TODO: remove the reserved keywords from the Token Base Class - this will allow
33 // generic parsing/scanning.
35 // note that order is important in this enum type simple tokens are '<= Equals' and
36 // reserved words are '>= Tcalscale' - furthermore the reserved words are broken down into
37 // a set of ranges that indicate similar 'types' of tokens (ie. the parser will treat the
38 // objects in the range similarily) - this means that if new values are inserted into the
39 // ranges, they should occur between the two tokens on each end. For example to add a new
40 // keyword to the symbolic property range, it must be lexically after 'Tcalscale' and lexically
41 // before 'Tcutype', otherwise it will not be recognized by the 'isSymbolicProperty()' method.
43 // note that there is convenience methods on the token class for these classifications
44 // (ie. isResourceProperty() isMailtoProperty() etc)
45 public enum TokenValue
47 SemiColon=0, Colon=1, Comma=2, Hyphen=3, CRLF=4, Equals=5, // simple tokens
48 QuotedString=6, Value=7, Xtension=8, ID=11, Parm=12, Error=13, // general tokens
49 // reserved words
50 Tcalscale, Taction, Tclass, Ttransp, Tstart, Tpartstat, Trsvp, Trole, Tcutype, // this range is the Symbolic Properties
51 Tstandard, Tdaylight, Tvalarm, Ttrigger, // this range is the resource properties
52 Tattendee, Torganizer, // this range is the 'mailto:' type properties
53 Tbegin, Tend, Tvalue, TrecurrenceId,
54 Tvcalendar, // this range is the BEGINEND keyword properties (up to Tvtimezone)
55 Tvevent, Tvtodo, Tvjournal, Tvfreebusy, Tvtimezone, // these are a subproperty of BEGINEND which are COMPONENTs
56 Tdtstart, Tdtstamp, Tdtend, Trrule, Texdate, // this range are the ValueProperties
59 /// <summary>
60 /// Represents the individual tokens returned from the scanner to the parser. Note that the
61 /// Token creation process is sensitive to the ScannerState. This state is defined by what context
62 /// the scanner currently is in - Parsing IDs, Parmeters, or values:
63 /// e.g. the iCalendar grammar defines the following possible states
64 /// id;id=parm:value
65 /// each string parsed out of the value has to be treated differently (eg. quoted strings are
66 /// allowed in 'parm' but not in 'id')
67 ///
68 /// </summary>
69 public class Token
71 private string tokenText;
72 private TokenValue tokenVal;
73 private ScannerState state;
74 private string errorMessage;
76 private static Hashtable reservedWords;
78 static Token()
80 // static initialization for reserved words - for quick parsing
81 reservedWords = new Hashtable( );
82 //reservedWords[ "calscale" ] = TokenValue.Tcalscale;
83 reservedWords[ "action" ] = TokenValue.Taction;
84 reservedWords[ "class" ] = TokenValue.Tclass;
85 reservedWords[ "transp" ] = TokenValue.Ttransp;
86 reservedWords[ "start" ] = TokenValue.Tstart;
87 reservedWords[ "partstat" ] = TokenValue.Tpartstat;
88 reservedWords[ "rsvp" ] = TokenValue.Trsvp;
89 reservedWords[ "role" ] = TokenValue.Trole;
90 reservedWords[ "cutype" ] = TokenValue.Tcutype;
91 reservedWords[ "standard" ] = TokenValue.Tstandard;
92 reservedWords[ "daylight" ] = TokenValue.Tdaylight;
93 reservedWords[ "valarm" ] = TokenValue.Tvalarm;
94 reservedWords[ "trigger" ] = TokenValue.Ttrigger;
95 reservedWords[ "attendee" ] = TokenValue.Tattendee;
96 reservedWords[ "organizer" ] = TokenValue.Torganizer;
97 reservedWords[ "begin" ] = TokenValue.Tbegin;
98 reservedWords[ "end" ] = TokenValue.Tend;
99 reservedWords[ "vevent" ] = TokenValue.Tvevent;
100 reservedWords[ "vtodo" ] = TokenValue.Tvtodo;
101 reservedWords[ "vjournal" ] = TokenValue.Tvjournal;
102 reservedWords[ "vfreebusy" ] = TokenValue.Tvfreebusy;
103 reservedWords[ "vtimezone" ] = TokenValue.Tvtimezone;
104 reservedWords[ "vcalendar" ] = TokenValue.Tvcalendar;
105 reservedWords[ "dtstart" ] = TokenValue.Tdtstart;
106 reservedWords[ "dtend" ] = TokenValue.Tdtend;
107 reservedWords[ "rrule" ] = TokenValue.Trrule;
108 reservedWords[ "exdate" ] = TokenValue.Texdate;
109 reservedWords[ "value" ] = TokenValue.Tvalue;
110 reservedWords[ "dtstamp" ] = TokenValue.Tdtstamp;
111 reservedWords[ "recurrence-id" ] = TokenValue.TrecurrenceId;
114 public static bool isID( string str )
116 return Regex.IsMatch( str, @"[a-zA-Z][a-zA-Z0-9_]*");
119 public static string CapsCamelCase( string str )
121 if( str.Length > 0 )
123 return CamelCase( str.Substring( 0, 1 ).ToUpper() + str.Substring( 1 ));
125 else
127 return CamelCase( str );
131 public static string CamelCase( string str )
133 bool upper = false;
134 char[] lstr = str.ToCharArray();
135 StringBuilder buff = new StringBuilder();
137 for( int i = 0; i < lstr.Length; ++i )
139 if( lstr[ i ] == '-' )
141 upper = true;
143 else
145 if( upper )
147 buff.Append( Char.ToUpper( lstr[ i ] ));
148 upper = false;
150 else
152 buff.Append( lstr[ i ] );
156 return buff.ToString();
159 public static string ParseDateTime( string icalDate )
163 if( icalDate.Length >= 15 )
165 string rval = icalDate.Substring( 0, 4 ) + '-'
166 + icalDate.Substring( 4, 2 ) + '-'
167 + icalDate.Substring( 6, 2 ) + 'T'
168 + icalDate.Substring( 9, 2 ) + ':'
169 + icalDate.Substring( 11, 2 ) + ':'
170 + icalDate.Substring( 13, 2 );
171 if( icalDate.EndsWith( "Z" ))
172 rval += "Z";
173 return rval;
175 return ParseDate( icalDate );
177 catch( Exception )
179 return ParseDate( icalDate ); // if we can't convert it, try just the date format
183 public static string ParseDate( string icalDate )
187 string rval = icalDate.Substring( 0, 4 ) + '-'
188 + icalDate.Substring( 4, 2 ) + '-'
189 + icalDate.Substring( 6, 2 ) + "T00:00:00";
190 if( icalDate.EndsWith( "Z" ))
191 rval += "Z";
192 return rval;
194 catch( Exception )
196 //return icalDate; // if we can't convert it - return maxdate string...
197 string rval = String.Format( "{0:s}", DateTime.MaxValue );
198 rval.Replace( " ", "T" );
199 return rval;
203 public Token( string _tokenText, ScannerState _state ) : this( _tokenText, _state, false )
207 public Token( string _tokenText ) : this( _tokenText, ScannerState.ParseValue, false )
211 public Token( string _tokenText, ScannerState _state, bool quoteFlag )
213 state = _state;
215 if( _tokenText == null )//|| _tokenText.Length == 0 )
217 //tokenVal = TokenValue.Error;
218 //errorMessage = "Bad Token String - 0 length";
219 _tokenText = "";
221 //else
223 switch( state )
225 case ScannerState.ParseID:
226 tokenText = _tokenText.ToLower();
227 if( reservedWords.Contains( tokenText ))
229 tokenVal = (TokenValue) reservedWords[ tokenText ];
231 else if( tokenText.StartsWith( "x-" ))
233 tokenVal = TokenValue.Xtension;
234 tokenText = "x:" + tokenText.Substring( 2 );
236 else if( isID( tokenText )) // this check may be unnecessary by virute of the scanner....
238 tokenVal = TokenValue.ID;
240 else
242 tokenVal = TokenValue.Error;
243 errorMessage = "Illegal value for ID";
245 if( isBeginEndValue() )
247 tokenText = CapsCamelCase( tokenText );
249 else
251 tokenText = CamelCase( tokenText );
253 break;
255 case ScannerState.ParseParms:
256 tokenText = HttpUtility.HtmlEncode( _tokenText );
257 if( quoteFlag )
259 tokenVal = TokenValue.QuotedString;
261 else
263 tokenVal = TokenValue.Parm;
265 break;
267 case ScannerState.ParseValue:
268 tokenText = HttpUtility.HtmlEncode( _tokenText );
269 tokenVal = TokenValue.Value;
270 break;
272 case ScannerState.ParseSimple:
273 tokenVal = TokenValue.Error;
274 errorMessage = "Bad constructor call - ParseSimple and text...";
275 break;
280 public Token( TokenValue _tokenVal )
282 tokenText = null;
283 if( _tokenVal <= TokenValue.Equals )
285 tokenVal = _tokenVal;
287 else
289 tokenVal = TokenValue.Error;
293 public bool isError( )
295 return tokenVal == TokenValue.Error;
298 public bool isSymbolicProperty( )
300 return tokenVal >= TokenValue.Tcalscale && tokenVal <= TokenValue.Tcutype;
303 public bool isResourceProperty( )
305 return tokenVal >= TokenValue.Tstandard && tokenVal <= TokenValue.Ttrigger;
308 public bool isMailtoProperty( )
310 return tokenVal >= TokenValue.Tattendee && tokenVal <= TokenValue.Torganizer;
313 public bool isBeginEndValue( )
315 return (tokenVal >= TokenValue.Tvcalendar && tokenVal <= TokenValue.Tvtimezone) || tokenVal == TokenValue.Tvalarm;
318 public bool isComponent( )
320 return tokenVal >= TokenValue.Tvevent && tokenVal <= TokenValue.Tvtimezone;
323 public bool isValueProperty( )
325 return (tokenVal >= TokenValue.Tdtstart && tokenVal <= TokenValue.Texdate) || tokenVal == TokenValue.Ttrigger;
328 public TokenValue TokenVal
330 get { return tokenVal; }
333 public string TokenText
335 get { return tokenText; }
338 public string Error
340 get{ return errorMessage; }
343 public void FormatDateTime( )
345 tokenText = ParseDateTime( tokenText );
349 #region csUnit Tests
350 #if DEBUG
351 namespace Test
353 using csUnit;
355 public class TokenTest
357 public TokenTest( )
361 public void testToken( )
363 Token t1 = new Token( "testing" );
364 Token t2 = new Token( TokenValue.Hyphen );
365 Token t3 = new Token( "x-vendor-specific-tag", ScannerState.ParseID );
366 Assert.True( t1.TokenText == "testing" && t1.TokenVal == TokenValue.Value && !t1.isError());
367 Assert.True( t2.TokenText == null && t2.TokenVal == TokenValue.Hyphen && !t2.isError());
368 Assert.True( t3.TokenText == "x:vendorSpecificTag" && t3.TokenVal == TokenValue.Xtension && !t3.isError() );
371 public void testParseID( )
373 Token t1 = new Token( "BeGin", ScannerState.ParseID );
374 Token t2 = new Token( "a123-009", ScannerState.ParseID );
375 Token t3 = new Token( "x123", ScannerState.ParseID );
376 Token t4 = new Token( "123", ScannerState.ParseID );
377 Assert.True( t1.TokenVal == TokenValue.Tbegin && !t1.isError());
378 Assert.True( t2.TokenText == "a123009" && t2.TokenVal == TokenValue.ID && !t2.isError());
379 Assert.True( t3.TokenText == "x123" && t3.TokenVal == TokenValue.ID && !t3.isError());
380 Assert.True( t4.isError() );
383 public void testReservedWords( )
385 Token t1 = new Token( "class", ScannerState.ParseID );
386 Token t2 = new Token( "AttendeE", ScannerState.ParseID );
387 Token t3 = new Token( "daylight", ScannerState.ParseID );
388 Token t4 = new Token( "vtodO", ScannerState.ParseID );
389 Assert.True( t1.TokenVal == TokenValue.Tclass && t1.isSymbolicProperty() && !t1.isError());
390 Assert.True( t2.TokenVal == TokenValue.Tattendee && t2.isMailtoProperty() && !t2.isError());
391 Assert.True( t3.TokenVal == TokenValue.Tdaylight && t3.isResourceProperty() && !t3.isError());
392 Assert.True( t4.TokenVal == TokenValue.Tvtodo && t4.isBeginEndValue() && !t4.isError());
395 public void testParseParms( )
397 Token t1 = new Token( "\"jklsdfjkldfs\"", ScannerState.ParseParms, true );
398 Token t2 = new Token( "a123-009", ScannerState.ParseParms );
399 Assert.True( t1.TokenVal == TokenValue.QuotedString && !t1.isError());
400 Assert.True( t2.TokenText == "a123-009" && t2.TokenVal == TokenValue.Parm && !t2.isError());
403 public void testParseValue( )
405 Token t2 = new Token( "a123-009", ScannerState.ParseValue );
406 Assert.True( t2.TokenText == "a123-009" && t2.TokenVal == TokenValue.Value && !t2.isError());
409 public void testCamelCase( )
411 string val = Token.CamelCase( "stuff" );
412 Assert.True( val == "stuff" );
413 val = Token.CamelCase( "1-23" );
414 Assert.True( val == "123" );
415 val = Token.CamelCase( "x-attack" );
416 Assert.True( val == "xAttack" );
417 val = Token.CamelCase( "x-gorilla-attack" );
418 Assert.True( val == "xGorillaAttack" );
419 val = Token.CamelCase( "x-" );
420 Assert.True( val == "x" );
421 val = Token.CapsCamelCase( "x-" );
422 Assert.True( val == "X" );
423 val = Token.CapsCamelCase( "Valarm" );
424 Assert.True( val == "Valarm" );
425 val = Token.CapsCamelCase( "valarm" );
426 Assert.True( val == "Valarm" );
429 public void testHtmlEncoding( )
431 Token t1 = new Token( "this is, a value", ScannerState.ParseValue );
432 Assert.True( t1.TokenVal == TokenValue.Value );
433 Assert.True( t1.TokenText == "this is, a value" );
434 t1 = new Token( "&this is, <a value>", ScannerState.ParseValue );
435 Assert.True( t1.TokenVal == TokenValue.Value );
436 Assert.True( t1.TokenText == "&amp;this is, &lt;a value&gt;" );
440 #endif
441 #endregion