Filters/HtmlAgilityPack/HtmlDocument.cs

   1 // HtmlAgilityPack V1.0 - Simon Mourier <simonm@microsoft.com>
   2
   3 /*
   4 Copyright (C) 2003 Simon Mourier <simonm@microsoft.com>
   5 All rights reserved.
   6
   7 Redistribution and use in source and binary forms, with or without
   8 modification, are permitted provided that the following conditions
   9 are met:
  10 1. Redistributions of source code must retain the above copyright
  11    notice, this list of conditions and the following disclaimer.
  12 2. Redistributions in binary form must reproduce the above copyright
  13    notice, this list of conditions and the following disclaimer in the
  14    documentation and/or other materials provided with the distribution.
  15 3. The name of the author may not be used to endorse or promote products
  16    derived from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  19 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  21 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  22 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  23 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  27 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28 */
  29
  30 using System;
  31 using System.IO;
  32 using System.Text;
  33 using System.Diagnostics;
  34 using System.Collections;
  35 using System.Text.RegularExpressions;
  36 using System.Xml;
  37 using System.Xml.XPath;
  38
  39
  40 // Legend: SLIM=Comment added describing changes to original HtmlAgilityPack
  41 //              to reduce memory consumption
  42 // Once the parser is free of bugs, the comments will be taken out
  43 namespace HtmlAgilityPack
  44 {
  45         /// <summary>
  46         /// Represents the type of parsing error.
  47         /// </summary>
  48         public enum HtmlParseErrorCode
  49         {
  50                 /// <summary>
  51                 /// A tag was not closed.
  52                 /// </summary>
  53                 TagNotClosed,
  54
  55                 /// <summary>
  56                 /// A tag was not opened.
  57                 /// </summary>
  58                 TagNotOpened,
  59
  60                 /// <summary>
  61                 /// There is a charset mismatch between stream and declared (META) encoding.
  62                 /// </summary>
  63                 CharsetMismatch,
  64
  65                 /// <summary>
  66                 /// An end tag was not required.
  67                 /// </summary>
  68                 EndTagNotRequired,
  69
  70                 /// <summary>
  71                 /// An end tag is invalid at this position.
  72                 /// </summary>
  73                 EndTagInvalidHere
  74         }
  75
  76         /// <summary>
  77         /// Represents a parsing error found during document parsing.
  78         /// </summary>
  79         public class HtmlParseError
  80         {
  81                 private HtmlParseErrorCode _code;
  82                 private int _line;
  83                 private int _linePosition;
  84                 private int _streamPosition;
  85                 private string _sourceText;
  86                 private string _reason;
  87
  88                 internal HtmlParseError(
  89                         HtmlParseErrorCode code,
  90                         int line,
  91                         int linePosition,
  92                         int streamPosition,
  93                         string sourceText,
  94                         string reason)
  95                 {
  96                         _code = code;
  97                         _line = line;
  98                         _linePosition = linePosition;
  99                         _streamPosition = streamPosition;
 100                         _sourceText = sourceText;
 101                         _reason = reason;
 102                 }
 103
 104                 /// <summary>
 105                 /// Gets the type of error.
 106                 /// </summary>
 107                 public HtmlParseErrorCode Code
 108                 {
 109                         get
 110                         {
 111                                 return _code;
 112                         }
 113                 }
 114
 115                 /// <summary>
 116                 /// Gets the line number of this error in the document.
 117                 /// </summary>
 118                 public int Line
 119                 {
 120                         get
 121                         {
 122                                 return _line;
 123                         }
 124                 }
 125
 126                 /// <summary>
 127                 /// Gets the column number of this error in the document.
 128                 /// </summary>
 129                 public int LinePosition
 130                 {
 131                         get
 132                         {
 133                                 return _linePosition;
 134                         }
 135                 }
 136
 137                 /// <summary>
 138                 /// Gets the absolstream position of this error in the document, relative to the start of the document.
 139                 /// </summary>
 140                 public int StreamPosition
 141                 {
 142                         get
 143                         {
 144                                 return _streamPosition;
 145                         }
 146                 }
 147
 148                 /// <summary>
 149                 /// Gets the the full text of the line containing the error.
 150                 /// </summary>
 151                 public string SourceText
 152                 {
 153                         get
 154                         {
 155                                 return _sourceText;
 156                                 }
 157                 }
 158
 159                 /// <summary>
 160                 /// Gets a description for the error.
 161                 /// </summary>
 162                 public string Reason
 163                 {
 164                         get
 165                         {
 166                                 return _reason;
 167                         }
 168                 }
 169         }
 170
 171         // SLIM: creating this class to wrap around a textreader
 172         //       to emulate ReadToEnd () behaviour
 173         class StreamAsArray {
 174                 private StreamReader _reader;
 175                 private int _length;
 176                 private int _position;
 177                 private bool _eof;
 178                 private char[] _buf_previous; // could have used only one array
 179                 private char[] _buf_current; // but, this is cleaner
 180                 private int _block_size;
 181
 182                 public StreamAsArray (StreamReader r)
 183                 {
 184                         _reader = r;
 185                         _length = 0;
 186                         _position = 0;
 187                         _eof = false;
 188
 189                         _block_size = 1024;
 190                         _buf_previous = new char [_block_size];
 191                         _buf_current = new char [_block_size];
 192
 193                         Read (true);
 194                 }
 195
 196                 private void Read (bool initial)
 197                 {
 198                         if ( !initial) {
 199                                 Array.Copy (_buf_current, _buf_previous, _block_size);
 200                                 _position += _block_size;
 201                         }
 202                         HtmlDocument.Debug ("Debug: Read in buffer at:" + _position);
 203
 204                         int num_read = _reader.Read (_buf_current, 0, _block_size);
 205                         if (num_read < _block_size) {
 206                                 _eof = true;
 207                                 _length = _position + num_read;
 208                         }
 209                         HtmlDocument.Debug ("[" + new string (_buf_current, 0, num_read) + "]");
 210                 }
 211
 212                 public bool Eof (int index) {
 213                         if (_eof)
 214                                 return (index == _length);
 215                         else {
 216                                 if (index >= _position + _block_size &&
 217                                     index < _position + _block_size + _block_size)
 218                                         Read (false);
 219                                 if (_eof)
 220                                         return (index == _length);
 221                                 else
 222                                         return false;
 223                         }
 224                 }
 225
 226                 public new char this[int index] {
 227                         get {
 228                                 if (index >= _position &&
 229                                     index < _position + _block_size)
 230                                         return _buf_current [index % _block_size];
 231                                 if (index >= _position - _block_size &&
 232                                     index < _position)
 233                                         return _buf_previous [ index % _block_size];
 234                                 if (index >= _position + _block_size &&
 235                                     index < _position + _block_size + _block_size) {
 236                                         Read (false);
 237                                         return _buf_current [index % _block_size];
 238                                 }
 239                                 Console.WriteLine ("EXCEPTION!!!");
 240                                 throw new Exception (String.Format ("{0} is out of current bounds:[{1}-{2}] and further than read-ahead",
 241                                                                     index,
 242                                                                     _position - _block_size,
 243                                                                     _position + _block_size - 1));
 244                         }
 245                 }
 246
 247                 // evil function ... you get what you pay for!
 248                 private string OutOfBandRead (int startindex, int length)
 249                 {
 250                         HtmlDocument.Debug ("Out of band read! From " + startindex + " to " + (startindex + length - 1));
 251                         ResetPosition (startindex);
 252                         // ahh.. now we are at the correct place
 253                         // create a buffer of required length
 254                         // who cares if the buffer size does not align well
 255                         // with page boundary
 256                         char[] temp_buf = new char [length];
 257                         int num_read = _reader.Read (temp_buf, 0, length);
 258                         if (num_read < length) {
 259                                 // Shouldnt occur!!!
 260                                 _eof = true;
 261                                 _length = startindex + num_read;
 262                         }
 263                         // discard data and reset stream position
 264                         int t = (_eof ? _length :_position + _block_size);
 265                         ResetPosition (t);
 266                         return new String (temp_buf);
 267                 }
 268
 269                 // streamreader does not allow seeking
 270                 // seek on its basestream does not reflect the position
 271                 // of the reader - it is governed by the buffer size
 272                 // of the underlying stream
 273                 // :( so, read character by character from beginning ...
 274                 private void ResetPosition (int pos)
 275                 {
 276                         _reader.DiscardBufferedData ();
 277                         _reader.BaseStream.Position = 0;
 278                         // read in chunks of block_size
 279                         int n1 = pos / _block_size;
 280                         int n2 = pos % _block_size;
 281                         char[] tmp = new char [_block_size];
 282                         // yo ho... start reading till we have reach pos
 283                         // hopefully, reader will buffer itself, so we can be mean and get one char at a time
 284                         for (int i = 0; i < n1; ++i)
 285                                 _reader.Read (tmp, 0, _block_size);
 286                         for (int i = 0; i < n2; ++i)
 287                                 _reader.Read ();
 288                         tmp = null;
 289                 }
 290
 291                 public string Substring (int startindex, int length)
 292                 {
 293                         if (length == 0) {
 294                                 HtmlDocument.Debug ("substring:" + startindex + " " + length + " " + _position + ":");
 295                                 return String.Empty;
 296                         }
 297                         if (length > _block_size || startindex < _position - _block_size) {
 298                                 return OutOfBandRead (startindex, length);
 299                         }
 300                         if (startindex + length - 1 >= _position + _block_size) {
 301                                 Read (false);
 302                         }
 303                         string substr;
 304                         if (startindex < _position) {
 305                                 int len_1 = _position - startindex;
 306                                 if (length < len_1)
 307                                         substr = new String (_buf_previous, _block_size - len_1, length);
 308                                 else {
 309                                         substr = new String (_buf_previous, _block_size - len_1, len_1);
 310                                         substr += new String (_buf_current, 0, length - len_1);
 311                                 }
 312                         } else {
 313                                 substr = new String (_buf_current, startindex - _position, length);
 314                         }
 315                         return substr;
 316                 }
 317
 318                 // FIXME: Is this costly ?
 319                 public int FullLength {
 320                         get {
 321                                 return (int)_reader.BaseStream.Length;
 322                         }
 323                 }
 324         }
 325
 326         /// <summary>
 327         /// Represents a complete HTML document.
 328         /// </summary>
 329         public class HtmlDocument: IXPathNavigable
 330         {
 331                 // SLIM: Make the parser event driven
 332                 // callback for FilterHtml
 333                 // return value is a way for the callback to signal to continue or stop parsing
 334                 public delegate bool NodeHandler (HtmlNode node);
 335                 public NodeHandler ReportNode;
 336                 // misnomer ... should be called event_driven_mode
 337                 private bool _streammode = false;
 338                 private bool _stop_parsing = false;
 339
 340                 internal static readonly string HtmlExceptionRefNotChild = "Reference node must be a child of this node";
 341                 internal static readonly string HtmlExceptionUseIdAttributeFalse = "You need to set UseIdAttribute property to true to enable this feature";
 342
 343                 internal Hashtable _openednodes;
 344                 internal Hashtable _lastnodes = new Hashtable();
 345                 internal Hashtable _nodesid;
 346                 private HtmlNode _documentnode;
 347                 //SLIM: internal string _text;
 348                 internal StreamAsArray _text;
 349                 private HtmlNode _currentnode;
 350                 private HtmlNode _lastparentnode;
 351                 private HtmlAttribute _currentattribute;
 352                 private int _index;
 353                 private int _line;
 354                 private int _lineposition, _maxlineposition;
 355                 private int _c;
 356                 private bool _fullcomment;
 357                 private System.Text.Encoding _streamencoding;
 358                 private System.Text.Encoding _declaredencoding;
 359                 private ArrayList _parseerrors = new ArrayList();
 360                 private ParseState _state, _oldstate;
 361                 private Crc32 _crc32 = null;
 362                 private bool _onlyDetectEncoding = false;
 363                 private int _pcdata_quote_char = '\0';
 364
 365                 private static bool _debug = false;
 366                 internal static void Debug (string s)
 367                 {
 368                         if (_debug)
 369                                 Console.WriteLine (s);
 370                 }
 371
 372                 // public props
 373
 374                 /// <summary>
 375                 /// Defines if a checksum must be computed for the document while parsing. Default is false.
 376                 /// </summary>
 377                 public bool OptionComputeChecksum = false;
 378
 379                 /// <summary>
 380                 /// Defines if declared encoding must be read from the document.
 381                 /// Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
 382                 /// Default is true.
 383                 /// </summary>
 384                 public bool OptionReadEncoding = true;
 385
 386
 387                 /// <summary>
 388                 /// Defines if non closed nodes will be checked at the end of parsing. Default is true.
 389                 /// </summary>
 390                 public bool OptionCheckSyntax = true;
 391
 392                 /// <summary>
 393                 /// Defines if the 'id' attribute must be specifically used. Default is true.
 394                 /// </summary>
 395                 public bool OptionUseIdAttribute = true;
 396
 397                 /// <summary>
 398                 /// Defines if empty nodes must be written as closed during output. Default is false.
 399                 /// </summary>
 400                 public bool OptionWriteEmptyNodes = false;
 401
 402                 /// <summary>
 403                 /// Defines if output must conform to XML, instead of HTML.
 404                 /// </summary>
 405                 public bool OptionOutputAsXml = false;
 406
 407                 /// <summary>
 408                 /// Defines if name must be output in uppercase. Default is false.
 409                 /// </summary>
 410                 public bool OptionOutputUpperCase = false;
 411
 412                 /// <summary>
 413                 /// Defines if attribute value output must be optimized (not bound with double quotes if it is possible). Default is false.
 414                 /// </summary>
 415                 public bool OptionOutputOptimizeAttributeValues = false;
 416
 417                 /// <summary>
 418                 /// Adds Debugging attributes to node. Default is false.
 419                 /// </summary>
 420                 public bool OptionAddDebuggingAttributes = false;
 421
 422                 /// <summary>
 423                 /// Defines if source text must be extracted while parsing errors.
 424                 /// If the document has a lot of errors, or cascading errors, parsing performance can be dramatically affected if set to true.
 425                 /// Default is false.
 426                 /// </summary>
 427                 public bool OptionExtractErrorSourceText = false; // turning this on can dramatically slow performance if a lot of errors are detected
 428
 429                 /// <summary>
 430                 /// Defines if closing for non closed nodes must be done at the end or directly in the document.
 431                 /// Setting this to true can actually change how browsers render the page. Default is false.
 432                 /// </summary>
 433                 public bool OptionAutoCloseOnEnd = false; // close errors at the end
 434
 435                 /// <summary>
 436                 /// Defines if LI, TR, TH, TD tags must be partially fixed when nesting errors are detected. Default is false.
 437                 /// </summary>
 438                 public bool OptionFixNestedTags = false; // fix li, tr, th, td tags
 439
 440                 /// <summary>
 441                 /// Defines the maximum length of source text or parse errors. Default is 100.
 442                 /// </summary>
 443                 public int OptionExtractErrorSourceTextMaxLength = 100;
 444
 445                 /// <summary>
 446                 /// Defines the default stream encoding to use. Default is System.Text.Encoding.Default.
 447                 /// </summary>
 448                 // From http://www.w3.org/TR/REC-html40/charset.html
 449                 // The HTTP protocol ([RFC2616], section 3.7.1) mentions ISO-8859-1 as a default character encoding when the "charset" parameter is absent from the "Content-Type" header field.
 450                 // So, however we are still using UTF-8 for some unknown reason
 451                 //FIXME: Fix the default encoding!
 452                 public System.Text.Encoding OptionDefaultStreamEncoding = Encoding.UTF8;
 453
 454                 /// <summary>
 455                 /// Gets a list of parse errors found in the document.
 456                 /// </summary>
 457                 public ArrayList ParseErrors
 458                 {
 459                         get
 460                         {
 461                                 return _parseerrors;
 462                         }
 463                 }
 464
 465                 /// <summary>
 466                 /// Gets the document's stream encoding.
 467                 /// </summary>
 468                 public System.Text.Encoding StreamEncoding
 469                 {
 470                         get
 471                         {
 472                                 return _streamencoding;
 473                         }
 474                 }
 475
 476                 /// <summary>
 477                 /// Gets the document's declared encoding.
 478                 /// Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
 479                 /// </summary>
 480                 public System.Text.Encoding DeclaredEncoding
 481                 {
 482                         get
 483                         {
 484                                 return _declaredencoding;
 485                         }
 486                 }
 487
 488                 /// <summary>
 489                 /// Creates an instance of an HTML document.
 490                 /// </summary>
 491                 public HtmlDocument()
 492                 {
 493                         _documentnode = CreateNode(HtmlNodeType.Document, 0);
 494                 }
 495
 496                 internal HtmlNode GetXmlDeclaration()
 497                 {
 498                         if (!_documentnode.HasChildNodes)
 499                         {
 500                                 return null;
 501                         }
 502
 503                         foreach(HtmlNode node in _documentnode._childnodes)
 504                         {
 505                                 if (node.Name == "?xml") // it's ok, names are case sensitive
 506                                 {
 507                                         return node;
 508                                 }
 509                         }
 510                         return null;
 511                 }
 512
 513                 /// <summary>
 514                 /// Applies HTML encoding to a specified string.
 515                 /// </summary>
 516                 /// <param name="html">The input string to encode. May not be null.</param>
 517                 /// <returns>The encoded string.</returns>
 518                 public static string HtmlEncode(string html)
 519                 {
 520                         if (html == null)
 521                         {
 522                                 throw new ArgumentNullException("html");
 523                         }
 524                         // replace & by &amp; but only once!
 525                         Regex rx = new Regex("&(?!(amp;)|(lt;)|(gt;)|(quot;))", RegexOptions.IgnoreCase);
 526                         return rx.Replace(html, "&amp;").Replace("<", "&lt;").Replace(">", "&gt;").Replace("\"", "&quot;");
 527                 }
 528
 529                 /// <summary>
 530                 /// Detects the encoding of an HTML stream.
 531                 /// </summary>
 532                 /// <param name="stream">The input stream. May not be null.</param>
 533                 /// <returns>The detected encoding.</returns>
 534                 public Encoding DetectEncoding(Stream stream)
 535                 {
 536                         if (stream == null)
 537                         {
 538                                 throw new ArgumentNullException("stream");
 539                         }
 540                         return DetectEncoding(new StreamReader(stream));
 541                 }
 542
 543                 /// <summary>
 544                 /// Detects the encoding of an HTML file.
 545                 /// </summary>
 546                 /// <param name="path">Path for the file containing the HTML document to detect. May not be null.</param>
 547                 /// <returns>The detected encoding.</returns>
 548                 public Encoding DetectEncoding(string path)
 549                 {
 550                         if (path == null)
 551                         {
 552                                 throw new ArgumentNullException("path");
 553                         }
 554                         StreamReader sr = new StreamReader(path, OptionDefaultStreamEncoding);
 555                         Encoding encoding = DetectEncoding(sr);
 556                         sr.Close();
 557                         return encoding;
 558                 }
 559
 560                 /// <summary>
 561                 /// Detects the encoding of an HTML text.
 562                 /// </summary>
 563                 /// <param name="html">The input html text. May not be null.</param>
 564                 /// <returns>The detected encoding.</returns>
 565                 public Encoding DetectEncodingHtml(string html)
 566                 {
 567                         if (html == null)
 568                         {
 569                                 throw new ArgumentNullException("html");
 570                         }
 571                         StringReader sr = new StringReader(html);
 572                         Encoding encoding = DetectEncoding(sr);
 573                         sr.Close();
 574                         return encoding;
 575                 }
 576
 577                 /// <summary>
 578                 /// Detects the encoding of an HTML text provided on a TextReader.
 579                 /// </summary>
 580                 /// <param name="reader">The TextReader used to feed the HTML. May not be null.</param>
 581                 /// <returns>The detected encoding.</returns>
 582                 public Encoding DetectEncoding(TextReader reader)
 583                 {
 584                         if (reader == null)
 585                         {
 586                                 throw new ArgumentNullException("reader");
 587                         }
 588                         _onlyDetectEncoding = true;
 589                         if (OptionCheckSyntax)
 590                         {
 591                                 _openednodes = new Hashtable();
 592                         }
 593                         else
 594                         {
 595                                 _openednodes = null;
 596                         }
 597
 598                         if (OptionUseIdAttribute)
 599                         {
 600                                 _nodesid = new Hashtable();
 601                         }
 602                         else
 603                         {
 604                                 _nodesid = null;
 605                         }
 606
 607                         StreamReader sr = reader as StreamReader;
 608                         if (sr != null)
 609                         {
 610                                 _streamencoding = sr.CurrentEncoding;
 611                         }
 612                         else
 613                         {
 614                                 _streamencoding = null;
 615                         }
 616                         _declaredencoding = null;
 617
 618                         // SLIM: _text = reader.ReadToEnd();
 619                         _text = new StreamAsArray (sr);
 620                         _documentnode = CreateNode(HtmlNodeType.Document, 0);
 621
 622                         // this is a hack, but it allows us not to muck with the original parsing code
 623                         try
 624                         {
 625                                 Parse();
 626                         }
 627                         catch(EncodingFoundException ex)
 628                         {
 629                                 _lastnodes.Clear();
 630                                 return ex.Encoding;
 631                         }
 632                         return null;
 633                 }
 634
 635                 /// <summary>
 636                 /// Loads an HTML document from a stream.
 637                 /// </summary>
 638                 /// <param name="stream">The input stream.</param>
 639                 public void Load(Stream stream)
 640                 {
 641                         Load(new StreamReader(stream, OptionDefaultStreamEncoding));
 642                 }
 643
 644                 /// <summary>
 645                 /// Loads an HTML document from a stream.
 646                 /// </summary>
 647                 /// <param name="stream">The input stream.</param>
 648                 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
 649                 public void Load(Stream stream, bool detectEncodingFromByteOrderMarks)
 650                 {
 651                         Load(new StreamReader(stream, detectEncodingFromByteOrderMarks));
 652                 }
 653
 654                 /// <summary>
 655                 /// Loads an HTML document from a stream.
 656                 /// </summary>
 657                 /// <param name="stream">The input stream.</param>
 658                 /// <param name="encoding">The character encoding to use.</param>
 659                 public void Load(Stream stream, Encoding encoding)
 660                 {
 661                         Load(new StreamReader(stream, encoding));
 662                 }
 663
 664                 /// <summary>
 665                 /// Loads an HTML document from a stream.
 666                 /// </summary>
 667                 /// <param name="stream">The input stream.</param>
 668                 /// <param name="encoding">The character encoding to use.</param>
 669                 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
 670                 public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
 671                 {
 672                         Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks));
 673                 }
 674
 675                 /// <summary>
 676                 /// Loads an HTML document from a stream.
 677                 /// </summary>
 678                 /// <param name="stream">The input stream.</param>
 679                 /// <param name="encoding">The character encoding to use.</param>
 680                 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
 681                 /// <param name="buffersize">The minimum buffer size.</param>
 682                 public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
 683                 {
 684                         Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize));
 685                 }
 686
 687                 /// <summary>
 688                 /// Loads an HTML document from a file.
 689                 /// </summary>
 690                 /// <param name="path">The complete file path to be read. May not be null.</param>
 691                 public void Load(string path)
 692                 {
 693                         if (path == null)
 694                         {
 695                                 throw new ArgumentNullException("path");
 696                         }
 697                         StreamReader sr = new StreamReader(path, OptionDefaultStreamEncoding);
 698                         Load(sr);
 699                         sr.Close();
 700                 }
 701
 702                 /// <summary>
 703                 /// Loads an HTML document from a file.
 704                 /// </summary>
 705                 /// <param name="path">The complete file path to be read. May not be null.</param>
 706                 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
 707                 public void Load(string path, bool detectEncodingFromByteOrderMarks)
 708                 {
 709                         if (path == null)
 710                         {
 711                                 throw new ArgumentNullException("path");
 712                         }
 713                         StreamReader sr = new StreamReader(path, detectEncodingFromByteOrderMarks);
 714                         Load(sr);
 715                         sr.Close();
 716                 }
 717
 718                 /// <summary>
 719                 /// Loads an HTML document from a file.
 720                 /// </summary>
 721                 /// <param name="path">The complete file path to be read. May not be null.</param>
 722                 /// <param name="encoding">The character encoding to use. May not be null.</param>
 723                 public void Load(string path, Encoding encoding)
 724                 {
 725                         if (path == null)
 726                         {
 727                                 throw new ArgumentNullException("path");
 728                         }
 729                         if (encoding == null)
 730                         {
 731                                 throw new ArgumentNullException("encoding");
 732                         }
 733                         StreamReader sr = new StreamReader(path, encoding);
 734                         Load(sr);
 735                         sr.Close();
 736                 }
 737
 738                 /// <summary>
 739                 /// Loads an HTML document from a file.
 740                 /// </summary>
 741                 /// <param name="path">The complete file path to be read. May not be null.</param>
 742                 /// <param name="encoding">The character encoding to use. May not be null.</param>
 743                 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
 744                 public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
 745                 {
 746                         if (path == null)
 747                         {
 748                                 throw new ArgumentNullException("path");
 749                         }
 750                         if (encoding == null)
 751                         {
 752                                 throw new ArgumentNullException("encoding");
 753                         }
 754                         StreamReader sr = new StreamReader(path, encoding, detectEncodingFromByteOrderMarks);
 755                         Load(sr);
 756                         sr.Close();
 757                 }
 758
 759                 /// <summary>
 760                 /// Loads an HTML document from a file.
 761                 /// </summary>
 762                 /// <param name="path">The complete file path to be read. May not be null.</param>
 763                 /// <param name="encoding">The character encoding to use. May not be null.</param>
 764                 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
 765                 /// <param name="buffersize">The minimum buffer size.</param>
 766                 public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
 767                 {
 768                         if (path == null)
 769                         {
 770                                 throw new ArgumentNullException("path");
 771                         }
 772                         if (encoding == null)
 773                         {
 774                                 throw new ArgumentNullException("encoding");
 775                         }
 776                         StreamReader sr = new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize);
 777                         Load(sr);
 778                         sr.Close();
 779                 }
 780
 781                 /// <summary>
 782                 /// Loads the HTML document from the specified string.
 783                 /// </summary>
 784                 /// <param name="html">String containing the HTML document to load. May not be null.</param>
 785                 public void LoadHtml(string html)
 786                 {
 787                         if (html == null)
 788                         {
 789                                 throw new ArgumentNullException("html");
 790                         }
 791                         StringReader sr = new StringReader(html);
 792                         Load(sr);
 793                         sr.Close();
 794                 }
 795
 796                 /// <summary>
 797                 /// Detects the encoding of an HTML document from a file first, and then loads the file.
 798                 /// </summary>
 799                 /// <param name="path">The complete file path to be read.</param>
 800                 public void DetectEncodingAndLoad(string path)
 801                 {
 802                         DetectEncodingAndLoad(path, true);
 803                 }
 804
 805                 /// <summary>
 806                 /// Detects the encoding of an HTML document from a file first, and then loads the file.
 807                 /// </summary>
 808                 /// <param name="path">The complete file path to be read. May not be null.</param>
 809                 /// <param name="detectEncoding">true to detect encoding, false otherwise.</param>
 810                 public void DetectEncodingAndLoad(string path, bool detectEncoding)
 811                 {
 812                         if (path == null)
 813                         {
 814                                 throw new ArgumentNullException("path");
 815                         }
 816                         System.Text.Encoding enc;
 817                         if (detectEncoding)
 818                         {
 819                                 enc = DetectEncoding(path);
 820                         }
 821                         else
 822                         {
 823                                 enc = null;
 824                         }
 825
 826                         if (enc == null)
 827                         {
 828                                 Load(path);
 829                         }
 830                         else
 831                         {
 832                                 Load(path, enc);
 833                         }
 834                 }
 835
 836                 /// <summary>
 837                 /// Loads the HTML document from the specified TextReader.
 838                 /// </summary>
 839                 /// <param name="reader">The TextReader used to feed the HTML data into the document. May not be null.</param>
 840                 public void Load(TextReader reader)
 841                 {
 842                         // all Load methods pass down to this one
 843                         if (reader == null)
 844                         {
 845                                 throw new ArgumentNullException("reader");
 846                         }
 847
 848                         _onlyDetectEncoding = false;
 849
 850                         if (OptionCheckSyntax)
 851                         {
 852                                 _openednodes = new Hashtable();
 853                         }
 854                         else
 855                         {
 856                                 _openednodes = null;
 857                         }
 858
 859                         if (OptionUseIdAttribute)
 860                         {
 861                                 _nodesid = new Hashtable();
 862                         }
 863                         else
 864                         {
 865                                 _nodesid = null;
 866                         }
 867
 868                         StreamReader sr = reader as StreamReader;
 869                         if (sr != null)
 870                         {
 871                                 try
 872                                 {
 873                                     // trigger bom read if needed
 874                                     sr.Peek();
 875                                 }
 876                                 catch
 877                                 {
 878                                     // void on purpose
 879                                 }
 880                                 _streamencoding = sr.CurrentEncoding;
 881                         }
 882                         else
 883                         {
 884                                 _streamencoding = null;
 885                         }
 886                         _declaredencoding = null;
 887
 888                         // SLIM: _text = reader.ReadToEnd();
 889                         _text = new StreamAsArray (sr);
 890                         _documentnode = CreateNode(HtmlNodeType.Document, 0);
 891                         Parse();
 892
 893                         if (OptionCheckSyntax)
 894                         {
 895                                 foreach(HtmlNode node in _openednodes.Values)
 896                                 {
 897                                         if (!node._starttag)    // already reported
 898                                         {
 899                                                 continue;
 900                                         }
 901
 902                                         string html;
 903                                         if (OptionExtractErrorSourceText)
 904                                         {
 905                                                 html = node.OuterHtml;
 906                                                 if (html.Length > OptionExtractErrorSourceTextMaxLength)
 907                                                 {
 908                                                         html = html.Substring(0, OptionExtractErrorSourceTextMaxLength);
 909                                                 }
 910                                         }
 911                                         else
 912                                         {
 913                                                 html = string.Empty;
 914                                         }
 915                                         AddError(
 916                                                 HtmlParseErrorCode.TagNotClosed,
 917                                                 node._line, node._lineposition,
 918                                                 node._streamposition, html,
 919                                                 "End tag </" + node.Name + "> was not found");
 920                                 }
 921
 922                                 // we don't need this anymore
 923                                 _openednodes.Clear();
 924                         }
 925                 }
 926
 927                 internal System.Text.Encoding GetOutEncoding()
 928                 {
 929                         // when unspecified, use the stream encoding first
 930                         if (_declaredencoding != null)
 931                         {
 932                                 return _declaredencoding;
 933                         }
 934                         else
 935                         {
 936                                 if (_streamencoding != null)
 937                                 {
 938                                         return _streamencoding;
 939                                 }
 940                         }
 941                         return OptionDefaultStreamEncoding;
 942                 }
 943
 944
 945                 /// <summary>
 946                 /// Gets the document's output encoding.
 947                 /// </summary>
 948                 public System.Text.Encoding Encoding
 949                 {
 950                         get
 951                         {
 952                                 return GetOutEncoding();
 953                         }
 954                 }
 955
 956                 /// <summary>
 957                 /// Saves the HTML document to the specified stream.
 958                 /// </summary>
 959                 /// <param name="outStream">The stream to which you want to save.</param>
 960                 public void Save(Stream outStream)
 961                 {
 962                         StreamWriter sw = new StreamWriter(outStream, GetOutEncoding());
 963                         Save(sw);
 964                 }
 965
 966                 /// <summary>
 967                 /// Saves the HTML document to the specified stream.
 968                 /// </summary>
 969                 /// <param name="outStream">The stream to which you want to save. May not be null.</param>
 970                 /// <param name="encoding">The character encoding to use. May not be null.</param>
 971                 public void Save(Stream outStream, System.Text.Encoding encoding)
 972                 {
 973                         if (outStream == null)
 974                         {
 975                                 throw new ArgumentNullException("outStream");
 976                         }
 977                         if (encoding == null)
 978                         {
 979                                 throw new ArgumentNullException("encoding");
 980                         }
 981                         StreamWriter sw = new StreamWriter(outStream, encoding);
 982                         Save(sw);
 983                 }
 984
 985                 /// <summary>
 986                 /// Saves the mixed document to the specified file.
 987                 /// </summary>
 988                 /// <param name="filename">The location of the file where you want to save the document.</param>
 989                 public void Save(string filename)
 990                 {
 991                         StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding());
 992                         Save(sw);
 993                         sw.Close();
 994                 }
 995
 996                 /// <summary>
 997                 /// Saves the mixed document to the specified file.
 998                 /// </summary>
 999                 /// <param name="filename">The location of the file where you want to save the document. May not be null.</param>
1000                 /// <param name="encoding">The character encoding to use. May not be null.</param>
1001                 public void Save(string filename, System.Text.Encoding encoding)
1002                 {
1003                         if (filename == null)
1004                         {
1005                                 throw new ArgumentNullException("filename");
1006                         }
1007                         if (encoding == null)
1008                         {
1009                                 throw new ArgumentNullException("encoding");
1010                         }
1011                         StreamWriter sw = new StreamWriter(filename, false, encoding);
1012                         Save(sw);
1013                         sw.Close();
1014                 }
1015
1016                 /// <summary>
1017                 /// Saves the HTML document to the specified StreamWriter.
1018                 /// </summary>
1019                 /// <param name="writer">The StreamWriter to which you want to save.</param>
1020                 public void Save(StreamWriter writer)
1021                 {
1022                         Save((TextWriter)writer);
1023                 }
1024
1025                 /// <summary>
1026                 /// Saves the HTML document to the specified TextWriter.
1027                 /// </summary>
1028                 /// <param name="writer">The TextWriter to which you want to save. May not be null.</param>
1029                 public void Save(TextWriter writer)
1030                 {
1031                         if (writer == null)
1032                         {
1033                                 throw new ArgumentNullException("writer");
1034                         }
1035                         DocumentNode.WriteTo(writer);
1036                 }
1037
1038                 /// <summary>
1039                 /// Saves the HTML document to the specified XmlWriter.
1040                 /// </summary>
1041                 /// <param name="writer">The XmlWriter to which you want to save.</param>
1042                 public void Save(XmlWriter writer)
1043                 {
1044                         DocumentNode.WriteTo(writer);
1045                         writer.Flush();
1046                 }
1047
1048                 /// <summary>
1049                 /// Creates a new XPathNavigator object for navigating this HTML document.
1050                 /// </summary>
1051                 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the root of the document.</returns>
1052                 public XPathNavigator CreateNavigator()
1053                 {
1054                         return new HtmlNodeNavigator(this, _documentnode);
1055                 }
1056
1057                 internal void SetIdForNode(HtmlNode node, string id)
1058                 {
1059                         if (!OptionUseIdAttribute)
1060                         {
1061                                 return;
1062                         }
1063
1064                         if ((_nodesid == null) || (id == null))
1065                         {
1066                                 return;
1067                         }
1068
1069                         if (node == null)
1070                         {
1071                                 _nodesid.Remove(id.ToLower());
1072                         }
1073                         else
1074                         {
1075                                 _nodesid[id.ToLower()] = node;
1076                         }
1077                 }
1078
1079                 /// <summary>
1080                 /// Gets the HTML node with the specified 'id' attribute value.
1081                 /// </summary>
1082                 /// <param name="id">The attribute id to match. May not be null.</param>
1083                 /// <returns>The HTML node with the matching id or null if not found.</returns>
1084                 public HtmlNode GetElementbyId(string id)
1085                 {
1086                         if (id == null)
1087                         {
1088                                 throw new ArgumentNullException("id");
1089                         }
1090                         if (_nodesid == null)
1091                         {
1092                                 throw new Exception(HtmlExceptionUseIdAttributeFalse);
1093                         }
1094
1095                         return _nodesid[id.ToLower()] as HtmlNode;
1096                 }
1097
1098                 /// <summary>
1099                 /// Creates an HTML element node with the specified name.
1100                 /// </summary>
1101                 /// <param name="name">The qualified name of the element. May not be null.</param>
1102                 /// <returns>The new HTML node.</returns>
1103                 public HtmlNode CreateElement(string name)
1104                 {
1105                         if (name == null)
1106                         {
1107                                 throw new ArgumentNullException("name");
1108                         }
1109                         HtmlNode node = CreateNode(HtmlNodeType.Element);
1110                         node._name = name;
1111                         return node;
1112                 }
1113
1114                 /// <summary>
1115                 /// Creates an HTML comment node.
1116                 /// </summary>
1117                 /// <returns>The new HTML comment node.</returns>
1118                 public HtmlCommentNode CreateComment()
1119                 {
1120                         return (HtmlCommentNode)CreateNode(HtmlNodeType.Comment);
1121                 }
1122
1123                 /// <summary>
1124                 /// Creates an HTML comment node with the specified comment text.
1125                 /// </summary>
1126                 /// <param name="comment">The comment text. May not be null.</param>
1127                 /// <returns>The new HTML comment node.</returns>
1128                 public HtmlCommentNode CreateComment(string comment)
1129                 {
1130                         if (comment == null)
1131                         {
1132                                 throw new ArgumentNullException("comment");
1133                         }
1134                         HtmlCommentNode c = CreateComment();
1135                         c.Comment = comment;
1136                         return c;
1137                 }
1138
1139                 /// <summary>
1140                 /// Creates an HTML text node.
1141                 /// </summary>
1142                 /// <returns>The new HTML text node.</returns>
1143                 public HtmlTextNode CreateTextNode()
1144                 {
1145                         return (HtmlTextNode)CreateNode(HtmlNodeType.Text);
1146                 }
1147
1148                 /// <summary>
1149                 /// Creates an HTML text node with the specified text.
1150                 /// </summary>
1151                 /// <param name="text">The text of the node. May not be null.</param>
1152                 /// <returns>The new HTML text node.</returns>
1153                 public HtmlTextNode CreateTextNode(string text)
1154                 {
1155                         if (text == null)
1156                         {
1157                                 throw new ArgumentNullException("text");
1158                         }
1159                         HtmlTextNode t = CreateTextNode();
1160                         t.Text = text;
1161                         return t;
1162                 }
1163
1164                 internal HtmlNode CreateNode(HtmlNodeType type)
1165                 {
1166                         return CreateNode(type, -1);
1167                 }
1168
1169                 internal HtmlNode CreateNode(HtmlNodeType type, int index)
1170                 {
1171                         switch (type)
1172                         {
1173                                 case HtmlNodeType.Comment:
1174                                         return new HtmlCommentNode(this, index);
1175
1176                                 case HtmlNodeType.Text:
1177                                         return new HtmlTextNode(this, index);
1178
1179                                 default:
1180                                         return new HtmlNode(type, this, index);
1181                         }
1182                 }
1183
1184                 internal HtmlAttribute CreateAttribute()
1185                 {
1186                         return new HtmlAttribute(this);
1187                 }
1188
1189                 /// <summary>
1190                 /// Creates an HTML attribute with the specified name.
1191                 /// </summary>
1192                 /// <param name="name">The name of the attribute. May not be null.</param>
1193                 /// <returns>The new HTML attribute.</returns>
1194                 public HtmlAttribute CreateAttribute(string name)
1195                 {
1196                         if (name == null)
1197                         {
1198                                 throw new ArgumentNullException("name");
1199                         }
1200                         HtmlAttribute att = CreateAttribute();
1201                         att.Name = name;
1202                         return att;
1203                 }
1204
1205                 /// <summary>
1206                 /// Creates an HTML attribute with the specified name.
1207                 /// </summary>
1208                 /// <param name="name">The name of the attribute. May not be null.</param>
1209                 /// <param name="value">The value of the attribute.</param>
1210                 /// <returns>The new HTML attribute.</returns>
1211                 public HtmlAttribute CreateAttribute(string name, string value)
1212                 {
1213                         if (name == null)
1214                         {
1215                                 throw new ArgumentNullException("name");
1216                         }
1217                         HtmlAttribute att = CreateAttribute(name);
1218                         att.Value = value;
1219                         return att;
1220                 }
1221
1222                 /// <summary>
1223                 /// Gets the root node of the document.
1224                 /// </summary>
1225                 public HtmlNode DocumentNode
1226                 {
1227                         get
1228                         {
1229                                 return _documentnode;
1230                         }
1231                 }
1232
1233                 /// <summary>
1234                 /// Gets the document CRC32 checksum if OptionComputeChecksum was set to true before parsing, 0 otherwise.
1235                 /// </summary>
1236                 public int CheckSum
1237                 {
1238                         get
1239                         {
1240                                 if (_crc32 == null)
1241                                 {
1242                                         return 0;
1243                                 }
1244                                 else
1245                                 {
1246                                         return (int)_crc32.CheckSum;
1247                                 }
1248                         }
1249                 }
1250
1251                 public bool StreamMode
1252                 {
1253                         get
1254                         {
1255                                 return _streammode;
1256                         }
1257                         set
1258                         {
1259                                 _streammode = value;
1260                         }
1261                 }
1262
1263                 private HtmlParseError AddError(
1264                                 HtmlParseErrorCode code,
1265                                 int line,
1266                                 int linePosition,
1267                                 int streamPosition,
1268                                 string sourceText,
1269                                 string reason)
1270                         {
1271                         HtmlParseError err = new HtmlParseError(code, line, linePosition, streamPosition, sourceText, reason);
1272                         _parseerrors.Add(err);
1273                         return err;
1274                 }
1275
1276                 private enum ParseState
1277                 {
1278                         Text,
1279                         WhichTag,
1280                         Tag,
1281                         BetweenAttributes,
1282                         EmptyTag,
1283                         AttributeName,
1284                         AttributeBeforeEquals,
1285                         AttributeAfterEquals,
1286                         AttributeValue,
1287                         Comment,
1288                         QuotedAttributeValue,
1289                         ServerSideCode,
1290                         PcDataQuote,
1291                         PcData
1292                 }
1293
1294                 private void IncrementPosition()
1295                 {
1296                         if (_crc32 != null)
1297                         {
1298                                 // REVIEW: should we add some checksum code in DecrementPosition too?
1299                                 _crc32.AddToCRC32(_c);
1300                         }
1301
1302                         _index++;
1303                         _maxlineposition = _lineposition;
1304                         if (_c == 10)
1305                         {
1306                                 _lineposition = 1;
1307                                 _line++;
1308                         }
1309                         else
1310                         {
1311                                 _lineposition++;
1312                         }
1313                 }
1314
1315                 private void DecrementPosition()
1316                 {
1317                         _index--;
1318                         if (_lineposition == 1)
1319                         {
1320                                 _lineposition = _maxlineposition;
1321                                 _line--;
1322                         }
1323                         else
1324                         {
1325                                 _lineposition--;
1326                         }
1327                 }
1328
1329                 private void Parse()
1330                 {
1331                         int lastquote = 0;
1332                         if (OptionComputeChecksum)
1333                         {
1334                                 _crc32 = new Crc32();
1335                         }
1336
1337                         _lastnodes = new Hashtable();
1338                         _c = 0;
1339                         _fullcomment = false;
1340                         _parseerrors = new ArrayList();
1341                         _line = 1;
1342                         _lineposition = 1;
1343                         _maxlineposition = 1;
1344
1345                         _state = ParseState.Text;
1346                         _oldstate = _state;
1347                         _documentnode._innerlength = _text.FullLength;
1348                         _documentnode._outerlength = _text.FullLength;
1349
1350                         _lastparentnode = _documentnode;
1351                         _currentnode = CreateNode(HtmlNodeType.Text, 0);
1352                         _currentattribute = null;
1353
1354                         _index = 0;
1355                         PushNodeStart(HtmlNodeType.Text, 0);
1356                         // SLIM: while (_index<_text.Length)
1357                         while (! _stop_parsing && ! _text.Eof (_index))
1358                         {
1359                                 _c = _text[_index];
1360                                 IncrementPosition();
1361
1362                                 switch(_state)
1363                                 {
1364                                         case ParseState.Text:
1365                                                 if (NewCheck())
1366                                                         continue;
1367                                                 break;
1368
1369                                         case ParseState.WhichTag:
1370                                                 if (NewCheck())
1371                                                         continue;
1372                                                 if (_c == '/')
1373                                                 {
1374                                                         PushNodeNameStart(false, _index);
1375                                                 }
1376                                                 else
1377                                                 {
1378                                                         PushNodeNameStart(true, _index-1);
1379                                                         DecrementPosition();
1380                                                 }
1381                                                 _state = ParseState.Tag;
1382                                                 break;
1383
1384                                         case ParseState.Tag:
1385                                                 if (NewCheck())
1386                                                         continue;
1387                                                 if (IsWhiteSpace(_c))
1388                                                 {
1389                                                         PushNodeNameEnd(_index-1);
1390                                                         if (_state != ParseState.Tag)
1391                                                                 continue;
1392                                                         _state = ParseState.BetweenAttributes;
1393                                                         continue;
1394                                                 }
1395                                                 if (_c == '/')
1396                                                 {
1397                                                         PushNodeNameEnd(_index-1);
1398                                                         if (_state != ParseState.Tag)
1399                                                                 continue;
1400                                                         _state = ParseState.EmptyTag;
1401                                                         continue;
1402                                                 }
1403                                                 if (_c == '>')
1404                                                 {
1405                                                         PushNodeNameEnd(_index-1);
1406                                                         if (_state != ParseState.Tag)
1407                                                                 continue;
1408                                                         PushNodeEnd(_index, false);
1409                                                         if (_state != ParseState.Tag)
1410                                                                 continue;
1411                                                         _state = ParseState.Text;
1412                                                         PushNodeStart(HtmlNodeType.Text, _index);
1413                                                 }
1414                                                 break;
1415
1416                                         case ParseState.BetweenAttributes:
1417                                                 if (NewCheck())
1418                                                         continue;
1419
1420                                                 if (IsWhiteSpace(_c))
1421                                                         continue;
1422
1423                                                 if ((_c == '/') || (_c == '?'))
1424                                                 {
1425                                                         _state = ParseState.EmptyTag;
1426                                                         continue;
1427                                                 }
1428
1429                                                 if (_c == '>')
1430                                                 {
1431                                                         PushNodeEnd(_index, false);
1432                                                         if (_state != ParseState.BetweenAttributes)
1433                                                                 continue;
1434                                                         _state = ParseState.Text;
1435                                                         PushNodeStart(HtmlNodeType.Text, _index);
1436                                                         continue;
1437                                                 }
1438
1439                                                 PushAttributeNameStart(_index-1);
1440                                                 _state = ParseState.AttributeName;
1441                                                 break;
1442
1443                                         case ParseState.EmptyTag:
1444                                                 if (NewCheck())
1445                                                         continue;
1446
1447                                                 if (_c == '>')
1448                                                 {
1449                                                         PushNodeEnd(_index, true);
1450                                                         if (_state != ParseState.EmptyTag)
1451                                                                 continue;
1452                                                         _state = ParseState.Text;
1453                                                         PushNodeStart(HtmlNodeType.Text, _index);
1454                                                         continue;
1455                                                 }
1456                                                 _state = ParseState.BetweenAttributes;
1457                                                 break;
1458
1459                                         case ParseState.AttributeName:
1460                                                 if (NewCheck())
1461                                                         continue;
1462
1463                                                 if (IsWhiteSpace(_c))
1464                                                 {
1465                                                         PushAttributeNameEnd(_index-1);
1466                                                         _state = ParseState.AttributeBeforeEquals;
1467                                                         continue;
1468                                                 }
1469                                                 if (_c == '=')
1470                                                 {
1471                                                         PushAttributeNameEnd(_index-1);
1472                                                         _state = ParseState.AttributeAfterEquals;
1473                                                         continue;
1474                                                 }
1475                                                 if (_c == '>')
1476                                                 {
1477                                                         PushAttributeNameEnd(_index-1);
1478                                                         PushNodeEnd(_index, false);
1479                                                         if (_state != ParseState.AttributeName)
1480                                                                 continue;
1481                                                         _state = ParseState.Text;
1482                                                         PushNodeStart(HtmlNodeType.Text, _index);
1483                                                         continue;
1484                                                 }
1485                                                 break;
1486
1487                                         case ParseState.AttributeBeforeEquals:
1488                                                 if (NewCheck())
1489                                                         continue;
1490
1491                                                 if (IsWhiteSpace(_c))
1492                                                         continue;
1493                                                 if (_c == '>')
1494                                                 {
1495                                                         PushNodeEnd(_index, false);
1496                                                         if (_state != ParseState.AttributeBeforeEquals)
1497                                                                 continue;
1498                                                         _state = ParseState.Text;
1499                                                         PushNodeStart(HtmlNodeType.Text, _index);
1500                                                         continue;
1501                                                 }
1502                                                 if (_c == '=')
1503                                                 {
1504                                                         _state = ParseState.AttributeAfterEquals;
1505                                                         continue;
1506                                                 }
1507                                                 // no equals, no whitespace, it's a new attrribute starting
1508                                                 _state = ParseState.BetweenAttributes;
1509                                                 DecrementPosition();
1510                                                 break;
1511
1512                                         case ParseState.AttributeAfterEquals:
1513                                                 if (NewCheck())
1514                                                         continue;
1515
1516                                                 if (IsWhiteSpace(_c))
1517                                                         continue;
1518
1519                                                 if ((_c == '\'') || (_c == '"'))
1520                                                 {
1521                                                         _state = ParseState.QuotedAttributeValue;
1522                                                         PushAttributeValueStart(_index);
1523                                                         lastquote = _c;
1524                                                         continue;
1525                                                 }
1526                                                 if (_c == '>')
1527                                                 {
1528                                                         PushNodeEnd(_index, false);
1529                                                         if (_state != ParseState.AttributeAfterEquals)
1530                                                                 continue;
1531                                                         _state = ParseState.Text;
1532                                                         PushNodeStart(HtmlNodeType.Text, _index);
1533                                                         continue;
1534                                                 }
1535                                                 PushAttributeValueStart(_index-1);
1536                                                 _state = ParseState.AttributeValue;
1537                                                 break;
1538
1539                                         case ParseState.AttributeValue:
1540                                                 if (NewCheck())
1541                                                         continue;
1542
1543                                                 if (IsWhiteSpace(_c))
1544                                                 {
1545                                                         PushAttributeValueEnd(_index-1);
1546                                                         _state = ParseState.BetweenAttributes;
1547                                                         continue;
1548                                                 }
1549
1550                                                 if (_c == '>')
1551                                                 {
1552                                                         PushAttributeValueEnd(_index-1);
1553                                                         PushNodeEnd(_index, false);
1554                                                         if (_state != ParseState.AttributeValue)
1555                                                                 continue;
1556                                                         _state = ParseState.Text;
1557                                                         PushNodeStart(HtmlNodeType.Text, _index);
1558                                                         continue;
1559                                                 }
1560                                                 break;
1561
1562                                         case ParseState.QuotedAttributeValue:
1563                                                 if (_c == lastquote)
1564                                                 {
1565                                                         PushAttributeValueEnd(_index-1);
1566                                                         _state = ParseState.BetweenAttributes;
1567                                                         continue;
1568                                                 }
1569                                                 if (_c == '<')
1570                                                 {
1571                                                         //SLIM: if (_index<_text.Length)
1572                                                         if (!_text.Eof (_index))
1573                                                         {
1574                                                                 if (_text[_index] == '%')
1575                                                                 {
1576                                                                         _oldstate = _state;
1577                                                                         _state = ParseState.ServerSideCode;
1578                                                                         continue;
1579                                                                 }
1580                                                         }
1581                                                 }
1582                                                 break;
1583
1584                                         case ParseState.Comment:
1585                                                 if (_c == '>')
1586                                                 {
1587                                                         if (_fullcomment)
1588                                                         {
1589                                                                 if ((_text[_index-2] != '-') ||
1590                                                                         (_text[_index-3] != '-'))
1591                                                                 {
1592                                                                         continue;
1593                                                                 }
1594                                                         }
1595                                                         PushNodeEnd(_index, false);
1596                                                         _state = ParseState.Text;
1597                                                         PushNodeStart(HtmlNodeType.Text, _index);
1598                                                         continue;
1599                                                 }
1600                                                 break;
1601
1602                                         case ParseState.ServerSideCode:
1603                                                 if (_c == '%')
1604                                                 {
1605                                                         //SLIM: if (_index<_text.Length)
1606                                                         if (! _text.Eof (_index))
1607                                                         {
1608                                                                 if (_text[_index] == '>')
1609                                                                 {
1610                                                                         switch(_oldstate)
1611                                                                         {
1612                                                                                 case ParseState.AttributeAfterEquals:
1613                                                                                         _state = ParseState.AttributeValue;
1614                                                                                         break;
1615
1616                                                                                 case ParseState.BetweenAttributes:
1617                                                                                         PushAttributeNameEnd(_index+1);
1618                                                                                         _state = ParseState.BetweenAttributes;
1619                                                                                         break;
1620
1621                                                                                 default:
1622                                                                                         _state = _oldstate;
1623                                                                                         break;
1624                                                                         }
1625                                                                         IncrementPosition();
1626                                                                 }
1627                                                         }
1628                                                 }
1629                                                 break;
1630
1631                                         // handle <script>a="</script>"</script>
1632                                         case ParseState.PcDataQuote:
1633                                                 if ((_c == _pcdata_quote_char) && (_text [_index - 2] != '\\')) {
1634                                                         _pcdata_quote_char = '\0';
1635                                                         _state = ParseState.PcData;
1636                                                 }
1637                                                 break;
1638
1639                                         case ParseState.PcData:
1640                                                 Debug ("PCDATA " + _currentnode.Name + " " + _text.Substring(_index-1,  _currentnode._namelength+2));
1641                                                 if (_c == '\"' || _c == '\''){
1642                                                         _pcdata_quote_char = _c;
1643                                                         _state = ParseState.PcDataQuote;
1644                                                         break;
1645                                                 }
1646                                                 // look for </tag + 1 char
1647
1648                                                 // check buffer end
1649                                                 //SLIM: if ((_currentnode._namelength+3)<=(_text.Length-(_index-1)))
1650                                                 if (! _text.Eof (_currentnode._namelength + _index + 1))
1651                                                 {
1652                                                         if (string.Compare(_text.Substring(_index-1, _currentnode._namelength+2),
1653                                                                 "</" + _currentnode.Name, true) == 0)
1654                                                         {
1655                                                                 int c = _text[_index-1 + 2 + _currentnode.Name.Length];
1656                                                                 if ((c == '>') || (IsWhiteSpace(c)))
1657                                                                 {
1658                                                                         // add the script as a text node
1659                                                                         HtmlNode script = CreateNode(HtmlNodeType.Text,
1660                                                                                 _currentnode._outerstartindex + _currentnode._outerlength);
1661                                                                         script._outerlength = _index-1 - script._outerstartindex;
1662                                                                         if (_streammode && ReportNode != null)
1663                                                                                 _stop_parsing = ReportNode (script);
1664                                                                         else
1665                                                                                 _currentnode.AppendChild(script);
1666                                                                         Debug ("Found script: [" + script.InnerText + "]");
1667
1668                                                                         PushNodeStart(HtmlNodeType.Element, _index-1);
1669                                                                         PushNodeNameStart(false, _index-1 +2);
1670                                                                         _state = ParseState.Tag;
1671                                                                         IncrementPosition();
1672                                                                 }
1673                                                         }
1674                                                 }
1675                                                 break;
1676                                 }
1677                         }
1678
1679                         // finish the current work
1680                         if (_currentnode._namestartindex > 0)
1681                         {
1682                                 PushNodeNameEnd(_index);
1683                         }
1684                         PushNodeEnd(_index, false);
1685
1686                         // we don't need this anymore
1687                         _lastnodes.Clear();
1688                 }
1689
1690                 private bool NewCheck()
1691                 {
1692                         if (_c != '<')
1693                         {
1694                                 return false;
1695                         }
1696                         //SLIM: if (_index<_text.Length)
1697                         if (! _text.Eof (_index))
1698                         {
1699                                 if (_text[_index] == '%')
1700                                 {
1701                                         switch(_state)
1702                                         {
1703                                                 case ParseState.AttributeAfterEquals:
1704                             PushAttributeValueStart(_index-1);
1705                                                         break;
1706
1707                                                 case ParseState.BetweenAttributes:
1708                                                         PushAttributeNameStart(_index-1);
1709                                                         break;
1710
1711                                                 case ParseState.WhichTag:
1712                                                         PushNodeNameStart(true, _index-1);
1713                                                         _state = ParseState.Tag;
1714                                                         break;
1715                                         }
1716                                         _oldstate = _state;
1717                                         _state = ParseState.ServerSideCode;
1718                                         return true;
1719                                 }
1720                         }
1721
1722                         PushNodeEnd(_index-1, true);
1723                         _state = ParseState.WhichTag;
1724                         //SLIM: if ((_index-1) <= (_text.Length-2))
1725                         if (!_text.Eof (_index))
1726                         {
1727                                 if (_text[_index] == '!')
1728                                 {
1729                                         PushNodeStart(HtmlNodeType.Comment, _index-1);
1730                                         PushNodeNameStart(true, _index);
1731                                         PushNodeNameEnd(_index+1);
1732                                         _state = ParseState.Comment;
1733                                         //SLIM: if (_index<(_text.Length-2))
1734                                         if (! _text.Eof (_index + 2))
1735                                         {
1736                                                 if ((_text[_index+1] == '-') &&
1737                                                         (_text[_index+2] == '-'))
1738                                                 {
1739                                                         _fullcomment = true;
1740                                                 }
1741                                                 else
1742                                                 {
1743                                                         _fullcomment = false;
1744                                                 }
1745                                         }
1746                                         return true;
1747                                 }
1748                         }
1749                         PushNodeStart(HtmlNodeType.Element, _index-1);
1750                         return true;
1751                 }
1752
1753                 private void ReadDocumentEncoding(HtmlNode node)
1754                 {
1755                         if (!OptionReadEncoding)
1756                                 return;
1757                         // format is
1758                         // <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" />
1759
1760                         // when we append a child, we are in node end, so attributes are already populated
1761                         if (node._namelength == 4)      // quick check, avoids string alloc
1762                         {
1763                                 // only these nodes can occur before meta
1764                                 // if we started seeing any other node, we will never see a meta node
1765                                 if (node.NodeType == HtmlNodeType.Element &&
1766                                              (node.Name != "head" && node.Name != "script" &&
1767                                               node.Name != "style" && node.Name != "title" &&
1768                                               node.Name != "head" && node.Name != "link" &&
1769                                               node.Name != "html" && node.Name != "meta"))
1770                                     throw new EncodingFoundException (null);
1771                                 else if (node.Name == "meta") // all nodes names are lowercase
1772                                 {
1773                                         HtmlAttribute att = node.Attributes["http-equiv"];
1774                                         if (att != null)
1775                                         {
1776                                                 if (string.Compare(att.Value, "content-type", true) == 0)
1777                                                 {
1778                                                         HtmlAttribute content = node.Attributes["content"];
1779                                                         if (content != null)
1780                                                         {
1781                                                                 string charset = NameValuePairList.GetNameValuePairsValue(content.Value, "charset");
1782                                                                 if (charset != null)
1783                                                                 {
1784                                                                         _declaredencoding = Encoding.GetEncoding(charset);
1785                                                                         if (_onlyDetectEncoding)
1786                                                                         {
1787                                                                                 throw new EncodingFoundException(_declaredencoding);
1788                                                                         }
1789
1790                                                                         if (_streamencoding != null)
1791                                                                         {
1792                                                                                 if (_declaredencoding.WindowsCodePage != _streamencoding.WindowsCodePage)
1793                                                                                 {
1794                                                                                         AddError(
1795                                                                                                 HtmlParseErrorCode.CharsetMismatch,
1796                                                                                                 _line, _lineposition,
1797                                                                                                 _index, node.OuterHtml,
1798                                                                                                 "Encoding mismatch between StreamEncoding: " +
1799                                                                                                 _streamencoding.WebName + " and DeclaredEncoding: " + _declaredencoding.WebName);
1800                                                                                 }
1801                                                                         }
1802                                                                 }
1803                                                         }
1804                                                 }
1805                                         }
1806                                 }
1807                         }
1808                 }
1809
1810                 private void PushAttributeNameStart(int index)
1811                 {
1812                         _currentattribute = CreateAttribute();
1813                         _currentattribute._namestartindex = index;
1814                         _currentattribute._line = _line;
1815                         _currentattribute._lineposition = _lineposition;
1816                         _currentattribute._streamposition = index;
1817                 }
1818
1819                 private void PushAttributeNameEnd(int index)
1820                 {
1821                         _currentattribute._namelength = index - _currentattribute._namestartindex;
1822                         _currentnode.Attributes.Append(_currentattribute);
1823                 }
1824
1825                 private void PushAttributeValueStart(int index)
1826                 {
1827                         _currentattribute._valuestartindex = index;
1828                 }
1829
1830                 private void PushAttributeValueEnd(int index)
1831                 {
1832                         _currentattribute._valuelength = index - _currentattribute._valuestartindex;
1833                 }
1834
1835                 private void PushNodeStart(HtmlNodeType type, int index)
1836                 {
1837                         _currentnode = CreateNode(type, index);
1838                         _currentnode._line = _line;
1839                         _currentnode._lineposition = _lineposition;
1840                         if (type == HtmlNodeType.Element)
1841                         {
1842                                 _currentnode._lineposition--;
1843                         }
1844                         _currentnode._streamposition = index;
1845                 }
1846
1847                 private void PushNodeEnd(int index, bool close)
1848                 {
1849                         _currentnode._outerlength = index - _currentnode._outerstartindex;
1850
1851                         //SLIM: inform caller
1852                         if (_streammode && ReportNode != null)
1853                                 _stop_parsing = ReportNode (_currentnode);
1854
1855                         if (_debug) {
1856                                 if (_currentnode._nodetype == HtmlNodeType.Text)
1857                                         Debug ("Text:" + _currentnode.InnerText);
1858                                 else
1859                                         Debug ((_currentnode.StartTag ? "Start-" : "End-") + _currentnode.Name);
1860                         }
1861                         if ((_currentnode._nodetype == HtmlNodeType.Text) ||
1862                                 (_currentnode._nodetype == HtmlNodeType.Comment))
1863                         {
1864                                 // forget about void nodes
1865                                 if (_currentnode._outerlength>0)
1866                                 {
1867                                         _currentnode._innerlength = _currentnode._outerlength;
1868                                         _currentnode._innerstartindex = _currentnode._outerstartindex;
1869                                         // SLIM: no need to append child in stream mode
1870                                         // SLIM: whatever the caller needs to do, tell it to do now
1871                                         if (!_streammode && _lastparentnode != null)
1872                                         {
1873                                            _lastparentnode.AppendChild(_currentnode);
1874                                         }
1875                                 }
1876                         }
1877                         else
1878                         {
1879                                 if ((_currentnode._starttag) && (_lastparentnode != _currentnode))
1880                                 {
1881                                         // add to parent node
1882                                         // SLIM: no need to append child in stream mode
1883                                         // SLIM: whatever the caller needs to do, tell it to do now
1884                                         if (!_streammode && _lastparentnode != null)
1885                                         {
1886                                            _lastparentnode.AppendChild(_currentnode);
1887                                         }
1888
1889                                         ReadDocumentEncoding(_currentnode);
1890
1891                                         // remember last node of this kind
1892                                         // SLIM: we still to store _currentnode to help other tags in the same level
1893                                         HtmlNode prev = (HtmlNode)_lastnodes[_currentnode.Name];
1894                                         _currentnode._prevwithsamename = prev;
1895                                         _lastnodes[_currentnode.Name] = _currentnode;
1896
1897                                         // change parent?
1898                                         if ((_currentnode.NodeType == HtmlNodeType.Document) ||
1899                                                 (_currentnode.NodeType == HtmlNodeType.Element))
1900                                         {
1901                                                 _lastparentnode = _currentnode;
1902                                         }
1903
1904                                         if (HtmlNode.IsCDataElement(CurrentNodeName()))
1905                                         {
1906                                                 _state = ParseState.PcData;
1907                                                 return;
1908                                         }
1909
1910                                         if ((HtmlNode.IsClosedElement(_currentnode.Name)) ||
1911                                                 (HtmlNode.IsEmptyElement(_currentnode.Name)))
1912                                         {
1913                                                 close = true;
1914                                         }
1915                                 }
1916                         }
1917
1918                         if ((close) || (!_currentnode._starttag))
1919                         {
1920                                 CloseCurrentNode();
1921                                 if ((_currentnode._nodetype == HtmlNodeType.Text) ||
1922                                     (_currentnode._nodetype == HtmlNodeType.Comment))
1923                                         _currentnode = null;
1924                         }
1925                 }
1926
1927                 private void PushNodeNameStart(bool starttag, int index)
1928                 {
1929                         _currentnode._starttag = starttag;
1930                         _currentnode._namestartindex = index;
1931                 }
1932
1933                 private string[] GetResetters(string name)
1934                 {
1935                         switch (name)
1936                         {
1937                                 case "li":
1938                                         return new string[]{"ul"};
1939
1940                                 case "tr":
1941                                         return new string[]{"table"};
1942
1943                                 case "th":
1944                                 case "td":
1945                                         return new string[]{"tr", "table"};
1946
1947                                 default:
1948                                         return null;
1949                         }
1950                 }
1951
1952                 private void FixNestedTags()
1953                 {
1954                         // we are only interested by start tags, not closing tags
1955                         if (!_currentnode._starttag)
1956                                 return;
1957
1958                         string name = CurrentNodeName().ToLower();
1959                         FixNestedTag(name, GetResetters(name));
1960                 }
1961
1962                 private void FixNestedTag(string name, string[] resetters)
1963                 {
1964                         if (resetters == null)
1965                                 return;
1966
1967                         HtmlNode prev;
1968
1969                         // if we find a previous unclosed same name node, without a resetter node between, we must close it
1970                         prev = (HtmlNode)_lastnodes[name];
1971                         if ((prev != null) && (!prev.Closed))
1972                         {
1973
1974                                 // try to find a resetter node, if found, we do nothing
1975                                 if (FindResetterNodes(prev, resetters))
1976                                 {
1977                                         return;
1978                                 }
1979
1980                                 // ok we need to close the prev now
1981                                 // create a fake closer node
1982                                 HtmlNode close = new HtmlNode(prev.NodeType, this, -1);
1983                                 close._endnode = close;
1984                                 prev.CloseNode(close);
1985
1986                         }
1987                 }
1988
1989                 private bool FindResetterNodes(HtmlNode node, string[] names)
1990                 {
1991                         if (names == null)
1992                         {
1993                                 return false;
1994                         }
1995                         for(int i=0;i<names.Length;i++)
1996                         {
1997                                 if (FindResetterNode(node, names[i]) != null)
1998                                 {
1999                                         return true;
2000                                 }
2001                         }
2002                         return false;
2003                 }
2004
2005                 private HtmlNode FindResetterNode(HtmlNode node, string name)
2006                 {
2007                         HtmlNode resetter = (HtmlNode)_lastnodes[name];
2008                         if (resetter == null)
2009                                 return null;
2010                         if (resetter.Closed)
2011                         {
2012                                 return null;
2013                         }
2014                         if (resetter._streamposition<node._streamposition)
2015                         {
2016                                 return null;
2017                         }
2018                         return resetter;
2019                 }
2020
2021                 private void PushNodeNameEnd(int index)
2022                 {
2023                         _currentnode._namelength = index - _currentnode._namestartindex;
2024                         if (OptionFixNestedTags)
2025                         {
2026                                 FixNestedTags();
2027                         }
2028                 }
2029
2030                 private void CloseCurrentNode()
2031                 {
2032                         if (_currentnode.Closed) // text or document are by def closed
2033                                 return;
2034
2035                         bool error = false;
2036
2037                         // find last node of this kind
2038                         HtmlNode prev = (HtmlNode)_lastnodes[_currentnode.Name];
2039                         if (prev == null)
2040                         {
2041                                 if (HtmlNode.IsClosedElement(_currentnode.Name))
2042                                 {
2043                                         // </br> will be seen as <br>
2044                                         _currentnode.CloseNode(_currentnode);
2045
2046                                         // add to parent node
2047                                         if (_lastparentnode != null)
2048                                         {
2049                                                 HtmlNode foundNode = null;
2050                                                 Stack futureChild = new Stack();
2051                                                 for (HtmlNode node = _lastparentnode.LastChild; node != null; node = node.PreviousSibling)
2052                                                 {
2053                                                         if ((node.Name == _currentnode.Name) && (! node.HasChildNodes))
2054                                                         {
2055                                                                 foundNode = node;
2056                                                                 break;
2057                                                         }
2058                                                         futureChild.Push(node);
2059                                                 }
2060                                                 if (foundNode != null)
2061                                                 {
2062                                                         HtmlNode node = null;
2063                                                         while(futureChild.Count != 0)
2064                                                         {
2065                                                                 node = (HtmlNode)futureChild.Pop();
2066                                                                 _lastparentnode.RemoveChild(node);
2067                                                                 foundNode.AppendChild(node);
2068                                                         }
2069                                                 }
2070                                                 else
2071                                                 {
2072                                                         _lastparentnode.AppendChild(_currentnode);
2073                                                 }
2074
2075                                         }
2076                                 }
2077                                 else
2078                                 {
2079                                         // node has no parent
2080                                         // node is not a closed node
2081
2082                                         if (HtmlNode.CanOverlapElement(_currentnode.Name))
2083                                         {
2084                                                 // this is a hack: add it as a text node
2085                                                 HtmlNode closenode = CreateNode(HtmlNodeType.Text, _currentnode._outerstartindex);
2086                                                 closenode._outerlength = _currentnode._outerlength;
2087                                                 ((HtmlTextNode)closenode).Text = ((HtmlTextNode)closenode).Text.ToLower();
2088                                                 if (_lastparentnode != null)
2089                                                 {
2090                                                         _lastparentnode.AppendChild(closenode);
2091                                                 }
2092
2093                                         }
2094                                         else
2095                                         {
2096                                                 if (HtmlNode.IsEmptyElement(_currentnode.Name))
2097                                                 {
2098                                                         AddError(
2099                                                                 HtmlParseErrorCode.EndTagNotRequired,
2100                                                                 _currentnode._line, _currentnode._lineposition,
2101                                                                 _currentnode._streamposition, _currentnode.OuterHtml,
2102                                                                 "End tag </" + _currentnode.Name + "> is not required");
2103                                                 }
2104                                                 else
2105                                                 {
2106                                                         // node cannot overlap, node is not empty
2107                                                         AddError(
2108                                                                 HtmlParseErrorCode.TagNotOpened,
2109                                                                 _currentnode._line, _currentnode._lineposition,
2110                                                                 _currentnode._streamposition, _currentnode.OuterHtml,
2111                                                                 "Start tag <" + _currentnode.Name + "> was not found");
2112                                                         error = true;
2113                                                 }
2114                                         }
2115                                 }
2116                         }
2117                         else
2118                         {
2119                                 if (OptionFixNestedTags)
2120                                 {
2121                                         if (FindResetterNodes(prev, GetResetters(_currentnode.Name)))
2122                                         {
2123                                                 AddError(
2124                                                         HtmlParseErrorCode.EndTagInvalidHere,
2125                                                         _currentnode._line, _currentnode._lineposition,
2126                                                         _currentnode._streamposition, _currentnode.OuterHtml,
2127                                                         "End tag </" + _currentnode.Name + "> invalid here");
2128                                                 error = true;
2129                                         }
2130                                 }
2131
2132                                 if (!error)
2133                                 {
2134                                         _lastnodes[_currentnode.Name] = prev._prevwithsamename;
2135                                         prev.CloseNode(_currentnode);
2136                                 }
2137                         }
2138
2139
2140                         // we close this node, get grandparent
2141                         if (!error)
2142                         {
2143                                 if ((_lastparentnode != null) &&
2144                                         ((!HtmlNode.IsClosedElement(_currentnode.Name)) ||
2145                                         (_currentnode._starttag)))
2146                                 {
2147                                         UpdateLastParentNode();
2148                                 }
2149                         }
2150                 }
2151
2152                 internal void UpdateLastParentNode()
2153                 {
2154                         do
2155                         {
2156                                 if (_lastparentnode.Closed)
2157                                 {
2158                                         _lastparentnode = _lastparentnode.ParentNode;
2159                                 }
2160                         }
2161                         while ((_lastparentnode != null) && (_lastparentnode.Closed));
2162                         if (_lastparentnode == null)
2163                         {
2164                                 _lastparentnode = _documentnode;
2165                         }
2166                 }
2167
2168                 private string CurrentAttributeName()
2169                 {
2170                         return _text.Substring(_currentattribute._namestartindex, _currentattribute._namelength);
2171                 }
2172
2173                 private string CurrentAttributeValue()
2174                 {
2175                         return _text.Substring(_currentattribute._valuestartindex, _currentattribute._valuelength);
2176                 }
2177
2178                 private string CurrentNodeName()
2179                 {
2180                         return _text.Substring(_currentnode._namestartindex, _currentnode._namelength);
2181                 }
2182
2183                 private string CurrentNodeOuter()
2184                 {
2185                         return _text.Substring(_currentnode._outerstartindex, _currentnode._outerlength);
2186                 }
2187
2188                 private string CurrentNodeInner()
2189                 {
2190                         return _text.Substring(_currentnode._innerstartindex, _currentnode._innerlength);
2191                 }
2192
2193                 /// <summary>
2194                 /// Determines if the specified character is considered as a whitespace character.
2195                 /// </summary>
2196                 /// <param name="c">The character to check.</param>
2197                 /// <returns>true if if the specified character is considered as a whitespace character.</returns>
2198                 public static bool IsWhiteSpace(int c)
2199                 {
2200                         if ((c == 10) || (c == 13) || (c == 32) || (c == 9))
2201                         {
2202                                 return true;
2203                         }
2204                         return false;
2205                 }
2206
2207         }
2208
2209         internal class EncodingFoundException: Exception
2210         {
2211                 private Encoding _encoding;
2212
2213                 internal EncodingFoundException(Encoding encoding)
2214                 {
2215                         _encoding = encoding;
2216                 }
2217
2218                 internal Encoding Encoding
2219                 {
2220                         get
2221                         {
2222                                 return _encoding;
2223                         }
2224                 }
2225         }
2226 }