1 // HtmlAgilityPack V1.0
4 Copyright (C) 2003 Simon Mourier <simonm@microsoft.com>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
10 1. Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3. The name of the author may not be used to endorse or promote products
16 derived from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 using System
.Collections
;
35 namespace HtmlAgilityPack
38 /// Represents the type of fragement in a mixed code document.
40 public enum MixedCodeDocumentFragmentType
43 /// The fragment contains code.
48 /// The fragment contains text.
54 /// Represents a fragment of code in a mixed code document.
56 public class MixedCodeDocumentCodeFragment
: MixedCodeDocumentFragment
58 internal string _code
;
60 internal MixedCodeDocumentCodeFragment(MixedCodeDocument doc
):
61 base(doc
, MixedCodeDocumentFragmentType
.Code
)
66 /// Gets the fragment code text.
74 _code
= FragmentText
.Substring(_doc
.TokenCodeStart
.Length
,
75 FragmentText
.Length
- _doc
.TokenCodeEnd
.Length
- _doc
.TokenCodeStart
.Length
-1).Trim();
76 if (_code
.StartsWith("="))
78 _code
= _doc
.TokenResponseWrite
+ _code
.Substring(1, _code
.Length
-1);
91 /// Represents a fragment of text in a mixed code document.
93 public class MixedCodeDocumentTextFragment
: MixedCodeDocumentFragment
95 internal MixedCodeDocumentTextFragment(MixedCodeDocument doc
):
96 base(doc
, MixedCodeDocumentFragmentType
.Text
)
101 /// Gets the fragment text.
111 base._fragmenttext
= value;
117 /// Represents a base class for fragments in a mixed code document.
119 public abstract class MixedCodeDocumentFragment
121 internal MixedCodeDocumentFragmentType _type
;
122 internal MixedCodeDocument _doc
;
124 internal int _length
;
126 internal int _lineposition
;
127 internal string _fragmenttext
;
129 internal MixedCodeDocumentFragment(MixedCodeDocument doc
, MixedCodeDocumentFragmentType type
)
135 case MixedCodeDocumentFragmentType
.Text
:
136 _doc
._textfragments
.Append(this);
139 case MixedCodeDocumentFragmentType
.Code
:
140 _doc
._codefragments
.Append(this);
143 _doc
._fragments
.Append(this);
147 /// Gets the type of fragment.
149 public MixedCodeDocumentFragmentType FragmentType
158 /// Gets the fragment position in the document's stream.
160 public int StreamPosition
169 /// Gets the line number of the fragment.
180 /// Gets the line position (column) of the fragment.
182 public int LinePosition
186 return _lineposition
;
191 /// Gets the fragement text.
193 public string FragmentText
197 if (_fragmenttext
== null)
199 _fragmenttext
= _doc
._text
.Substring(_index
, _length
);
201 return _fragmenttext
;
207 /// Represents a list of mixed code fragments.
209 public class MixedCodeDocumentFragmentList
: IEnumerable
211 private MixedCodeDocument _doc
;
212 private ArrayList _items
= new ArrayList();
214 internal MixedCodeDocumentFragmentList(MixedCodeDocument doc
)
220 /// Appends a fragment to the list of fragments.
222 /// <param name="newFragment">The fragment to append. May not be null.</param>
223 public void Append(MixedCodeDocumentFragment newFragment
)
225 if (newFragment
== null)
227 throw new ArgumentNullException("newFragment");
229 _items
.Add(newFragment
);
233 /// Prepends a fragment to the list of fragments.
235 /// <param name="newFragment">The fragment to append. May not be null.</param>
236 public void Prepend(MixedCodeDocumentFragment newFragment
)
238 if (newFragment
== null)
240 throw new ArgumentNullException("newFragment");
242 _items
.Insert(0, newFragment
);
246 /// Remove a fragment from the list of fragments. If this fragment was not in the list, an exception will be raised.
248 /// <param name="fragment">The fragment to remove. May not be null.</param>
249 public void Remove(MixedCodeDocumentFragment fragment
)
251 if (fragment
== null)
253 throw new ArgumentNullException("fragment");
255 int index
= GetFragmentIndex(fragment
);
258 throw new IndexOutOfRangeException();
264 /// Remove a fragment from the list of fragments, using its index in the list.
266 /// <param name="index">The index of the fragment to remove.</param>
267 public void RemoveAt(int index
)
269 _items
.RemoveAt(index
);
273 /// Remove all fragments from the list.
275 public void RemoveAll()
281 /// Gets the number of fragments contained in the list.
291 internal int GetFragmentIndex(MixedCodeDocumentFragment fragment
)
293 if (fragment
== null)
295 throw new ArgumentNullException("fragment");
297 for(int i
=0;i
<_items
.Count
;i
++)
299 if (((MixedCodeDocumentFragment
)_items
[i
])==fragment
)
308 /// Gets a fragment from the list using its index.
310 public MixedCodeDocumentFragment
this[int index
]
314 return _items
[index
] as MixedCodeDocumentFragment
;
318 internal void Clear()
324 /// Gets an enumerator that can iterate through the fragment list.
326 public MixedCodeDocumentFragmentEnumerator
GetEnumerator()
328 return new MixedCodeDocumentFragmentEnumerator(_items
);
332 /// Gets an enumerator that can iterate through the fragment list.
334 IEnumerator IEnumerable
.GetEnumerator()
336 return GetEnumerator();
340 /// Represents a fragment enumerator.
342 public class MixedCodeDocumentFragmentEnumerator
: IEnumerator
347 internal MixedCodeDocumentFragmentEnumerator(ArrayList items
)
354 /// Sets the enumerator to its initial position, which is before the first element in the collection.
362 /// Advances the enumerator to the next element of the collection.
364 /// <returns>true if the enumerator was successfully advanced to the next element; false if the enumerator has passed the end of the collection.</returns>
365 public bool MoveNext()
368 return (_index
<_items
.Count
);
372 /// Gets the current element in the collection.
374 public MixedCodeDocumentFragment Current
378 return (MixedCodeDocumentFragment
)(_items
[_index
]);
383 /// Gets the current element in the collection.
385 object IEnumerator
.Current
396 /// Represents a document with mixed code and text. ASP, ASPX, JSP, are good example of such documents.
398 public class MixedCodeDocument
400 private System
.Text
.Encoding _streamencoding
= null;
401 internal string _text
;
402 internal MixedCodeDocumentFragmentList _fragments
;
403 internal MixedCodeDocumentFragmentList _codefragments
;
404 internal MixedCodeDocumentFragmentList _textfragments
;
405 private ParseState _state
;
409 private int _lineposition
;
410 private MixedCodeDocumentFragment _currentfragment
;
413 /// Gets or sets the token representing code start.
415 public string TokenCodeStart
= "<%";
418 /// Gets or sets the token representing code end.
420 public string TokenCodeEnd
= "%>";
423 /// Gets or sets the token representing code directive.
425 public string TokenDirective
= "@";
428 /// Gets or sets the token representing response write directive.
430 public string TokenResponseWrite
= "Response.Write ";
433 private string TokenTextBlock
= "TextBlock({0})";
436 /// Creates a mixed code document instance.
438 public MixedCodeDocument()
440 _codefragments
= new MixedCodeDocumentFragmentList(this);
441 _textfragments
= new MixedCodeDocumentFragmentList(this);
442 _fragments
= new MixedCodeDocumentFragmentList(this);
446 /// Loads a mixed code document from a stream.
448 /// <param name="stream">The input stream.</param>
449 public void Load(Stream stream
)
451 Load(new StreamReader(stream
));
455 /// Loads a mixed code document from a stream.
457 /// <param name="stream">The input stream.</param>
458 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
459 public void Load(Stream stream
, bool detectEncodingFromByteOrderMarks
)
461 Load(new StreamReader(stream
, detectEncodingFromByteOrderMarks
));
465 /// Loads a mixed code document from a stream.
467 /// <param name="stream">The input stream.</param>
468 /// <param name="encoding">The character encoding to use.</param>
469 public void Load(Stream stream
, Encoding encoding
)
471 Load(new StreamReader(stream
, encoding
));
475 /// Loads a mixed code document from a stream.
477 /// <param name="stream">The input stream.</param>
478 /// <param name="encoding">The character encoding to use.</param>
479 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
480 public void Load(Stream stream
, Encoding encoding
, bool detectEncodingFromByteOrderMarks
)
482 Load(new StreamReader(stream
, encoding
, detectEncodingFromByteOrderMarks
));
486 /// Loads a mixed code document from a stream.
488 /// <param name="stream">The input stream.</param>
489 /// <param name="encoding">The character encoding to use.</param>
490 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
491 /// <param name="buffersize">The minimum buffer size.</param>
492 public void Load(Stream stream
, Encoding encoding
, bool detectEncodingFromByteOrderMarks
, int buffersize
)
494 Load(new StreamReader(stream
, encoding
, detectEncodingFromByteOrderMarks
, buffersize
));
498 /// Loads a mixed code document from a file.
500 /// <param name="path">The complete file path to be read.</param>
501 public void Load(string path
)
503 Load(new StreamReader(path
));
507 /// Loads a mixed code document from a file.
509 /// <param name="path">The complete file path to be read.</param>
510 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
511 public void Load(string path
, bool detectEncodingFromByteOrderMarks
)
513 Load(new StreamReader(path
, detectEncodingFromByteOrderMarks
));
517 /// Loads a mixed code document from a file.
519 /// <param name="path">The complete file path to be read.</param>
520 /// <param name="encoding">The character encoding to use.</param>
521 public void Load(string path
, Encoding encoding
)
523 Load(new StreamReader(path
, encoding
));
527 /// Loads a mixed code document from a file.
529 /// <param name="path">The complete file path to be read.</param>
530 /// <param name="encoding">The character encoding to use.</param>
531 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
532 public void Load(string path
, Encoding encoding
, bool detectEncodingFromByteOrderMarks
)
534 Load(new StreamReader(path
, encoding
, detectEncodingFromByteOrderMarks
));
538 /// Loads a mixed code document from a file.
540 /// <param name="path">The complete file path to be read.</param>
541 /// <param name="encoding">The character encoding to use.</param>
542 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
543 /// <param name="buffersize">The minimum buffer size.</param>
544 public void Load(string path
, Encoding encoding
, bool detectEncodingFromByteOrderMarks
, int buffersize
)
546 Load(new StreamReader(path
, encoding
, detectEncodingFromByteOrderMarks
, buffersize
));
550 /// Loads a mixed document from a text
552 /// <param name="html">The text to load.</param>
553 public void LoadHtml(string html
)
555 Load(new StringReader(html
));
559 /// Loads the mixed code document from the specified TextReader.
561 /// <param name="reader">The TextReader used to feed the HTML data into the document.</param>
562 public void Load(TextReader reader
)
564 _codefragments
.Clear();
565 _textfragments
.Clear();
567 // all pseudo constructors get down to this one
568 StreamReader sr
= reader
as StreamReader
;
571 _streamencoding
= sr
.CurrentEncoding
;
574 _text
= reader
.ReadToEnd();
579 internal System
.Text
.Encoding
GetOutEncoding()
581 if (_streamencoding
!= null)
582 return _streamencoding
;
583 return System
.Text
.Encoding
.Default
;
587 /// Gets the encoding of the stream used to read the document.
589 public System
.Text
.Encoding StreamEncoding
593 return _streamencoding
;
598 /// Gets the list of code fragments in the document.
600 public MixedCodeDocumentFragmentList CodeFragments
604 return _codefragments
;
609 /// Gets the list of text fragments in the document.
611 public MixedCodeDocumentFragmentList TextFragments
615 return _textfragments
;
620 /// Gets the list of all fragments in the document.
622 public MixedCodeDocumentFragmentList Fragments
631 /// Saves the mixed document to the specified stream.
633 /// <param name="outStream">The stream to which you want to save.</param>
634 public void Save(Stream outStream
)
636 StreamWriter sw
= new StreamWriter(outStream
, GetOutEncoding());
641 /// Saves the mixed document to the specified stream.
643 /// <param name="outStream">The stream to which you want to save.</param>
644 /// <param name="encoding">The character encoding to use.</param>
645 public void Save(Stream outStream
, System
.Text
.Encoding encoding
)
647 StreamWriter sw
= new StreamWriter(outStream
, encoding
);
652 /// Saves the mixed document to the specified file.
654 /// <param name="filename">The location of the file where you want to save the document.</param>
655 public void Save(string filename
)
657 StreamWriter sw
= new StreamWriter(filename
, false, GetOutEncoding());
662 /// Saves the mixed document to the specified file.
664 /// <param name="filename">The location of the file where you want to save the document.</param>
665 /// <param name="encoding">The character encoding to use.</param>
666 public void Save(string filename
, System
.Text
.Encoding encoding
)
668 StreamWriter sw
= new StreamWriter(filename
, false, encoding
);
673 /// Saves the mixed document to the specified StreamWriter.
675 /// <param name="writer">The StreamWriter to which you want to save.</param>
676 public void Save(StreamWriter writer
)
678 Save((TextWriter
)writer
);
682 /// Saves the mixed document to the specified TextWriter.
684 /// <param name="writer">The TextWriter to which you want to save.</param>
685 public void Save(TextWriter writer
)
692 /// Gets the code represented by the mixed code document seen as a template.
700 foreach(MixedCodeDocumentFragment frag
in _fragments
)
704 case MixedCodeDocumentFragmentType
.Text
:
705 s
+= TokenResponseWrite
+ string.Format(TokenTextBlock
, i
) + "\n";
709 case MixedCodeDocumentFragmentType
.Code
:
710 s
+= ((MixedCodeDocumentCodeFragment
)frag
).Code
+ "\n";
719 /// Create a text fragment instances.
721 /// <returns>The newly created text fragment instance.</returns>
722 public MixedCodeDocumentTextFragment
CreateTextFragment()
724 return (MixedCodeDocumentTextFragment
)CreateFragment(MixedCodeDocumentFragmentType
.Text
);
728 /// Create a code fragment instances.
730 /// <returns>The newly created code fragment instance.</returns>
731 public MixedCodeDocumentCodeFragment
CreateCodeFragment()
733 return (MixedCodeDocumentCodeFragment
)CreateFragment(MixedCodeDocumentFragmentType
.Code
);
736 internal MixedCodeDocumentFragment
CreateFragment(MixedCodeDocumentFragmentType type
)
740 case MixedCodeDocumentFragmentType
.Text
:
741 return new MixedCodeDocumentTextFragment(this);
743 case MixedCodeDocumentFragmentType
.Code
:
744 return new MixedCodeDocumentCodeFragment(this);
747 throw new NotSupportedException();
751 private void SetPosition()
753 _currentfragment
._line
= _line
;
754 _currentfragment
._lineposition
= _lineposition
;
755 _currentfragment
._index
= _index
- 1;
756 _currentfragment
._length
= 0;
759 private void IncrementPosition()
771 private enum ParseState
779 _state
= ParseState
.Text
;
781 _currentfragment
= CreateFragment(MixedCodeDocumentFragmentType
.Text
);
783 while (_index
<_text
.Length
)
790 case ParseState
.Text
:
791 if (_index
+TokenCodeStart
.Length
<_text
.Length
)
793 if (_text
.Substring(_index
-1, TokenCodeStart
.Length
) == TokenCodeStart
)
795 _state
= ParseState
.Code
;
796 _currentfragment
._length
= _index
-1 - _currentfragment
._index
;
797 _currentfragment
= CreateFragment(MixedCodeDocumentFragmentType
.Code
);
804 case ParseState
.Code
:
805 if (_index
+TokenCodeEnd
.Length
<_text
.Length
)
807 if (_text
.Substring(_index
-1, TokenCodeEnd
.Length
) == TokenCodeEnd
)
809 _state
= ParseState
.Text
;
810 _currentfragment
._length
= _index
+ TokenCodeEnd
.Length
- _currentfragment
._index
;
811 _index
+= TokenCodeEnd
.Length
;
812 _lineposition
+= TokenCodeEnd
.Length
;
813 _currentfragment
= CreateFragment(MixedCodeDocumentFragmentType
.Text
);
822 _currentfragment
._length
= _index
- _currentfragment
._index
;