Compute lucene-style scores for our hits.
[beagle.git] / Filters / HtmlAgilityPack / HtmlNode.cs
blob5ed2b3e25696283f43fe5d40c134576b4539f10e
1 /*
2 Copyright (C) 2003 Simon Mourier <simonm@microsoft.com>
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 using System;
30 using System.Collections;
31 using System.IO;
32 using System.Xml;
33 using System.Xml.XPath;
35 namespace HtmlAgilityPack
37 /// <summary>
38 /// Flags that describe the behavior of an Element node.
39 /// </summary>
40 public enum HtmlElementFlag
42 /// <summary>
43 /// The node is a CDATA node.
44 /// </summary>
45 CData = 1,
47 /// <summary>
48 /// The node is empty. META or IMG are example of such nodes.
49 /// </summary>
50 Empty = 2,
52 /// <summary>
53 /// The node will automatically be closed during parsing.
54 /// </summary>
55 Closed = 4,
57 /// <summary>
58 /// The node can overlap.
59 /// </summary>
60 CanOverlap = 8
63 /// <summary>
64 /// Represents the type of a node.
65 /// </summary>
66 public enum HtmlNodeType
68 /// <summary>
69 /// The root of a document.
70 /// </summary>
71 Document,
73 /// <summary>
74 /// An HTML element.
75 /// </summary>
76 Element,
78 /// <summary>
79 /// An HTML comment.
80 /// </summary>
81 Comment,
83 /// <summary>
84 /// A text node is always the child of an element or a document node.
85 /// </summary>
86 Text,
89 /// <summary>
90 /// Represents an HTML node.
91 /// </summary>
92 public class HtmlNode: IXPathNavigable
94 /// <summary>
95 /// Gets the name of a comment node. It is actually defined as '#comment'.
96 /// </summary>
97 public static readonly string HtmlNodeTypeNameComment = "#comment";
99 /// <summary>
100 /// Gets the name of the document node. It is actually defined as '#document'.
101 /// </summary>
102 public static readonly string HtmlNodeTypeNameDocument = "#document";
104 /// <summary>
105 /// Gets the name of a text node. It is actually defined as '#text'.
106 /// </summary>
107 public static readonly string HtmlNodeTypeNameText = "#text";
109 /// <summary>
110 /// Gets a collection of flags that define specific behaviors for specific element nodes.
111 /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
112 /// </summary>
113 public static Hashtable ElementsFlags;
115 internal HtmlNodeType _nodetype;
116 internal HtmlNode _nextnode;
117 internal HtmlNode _prevnode;
118 internal HtmlNode _parentnode;
119 internal HtmlDocument _ownerdocument;
120 internal HtmlNodeCollection _childnodes;
121 internal HtmlAttributeCollection _attributes;
122 internal int _line = 0;
123 internal int _lineposition = 0;
124 internal int _streamposition = 0;
125 internal int _innerstartindex = 0;
126 internal int _innerlength = 0;
127 internal int _outerstartindex = 0;
128 internal int _outerlength = 0;
129 internal int _namestartindex = 0;
130 internal int _namelength = 0;
131 internal bool _starttag = false;
132 internal string _name;
133 internal HtmlNode _prevwithsamename = null;
134 internal HtmlNode _endnode;
136 internal bool _innerchanged = false;
137 internal bool _outerchanged = false;
138 internal string _innerhtml;
139 internal string _outerhtml;
141 static HtmlNode()
143 // tags whose content may be anything
144 ElementsFlags = new Hashtable();
145 ElementsFlags.Add("script", HtmlElementFlag.CData);
146 ElementsFlags.Add("style", HtmlElementFlag.CData);
147 ElementsFlags.Add("noxhtml", HtmlElementFlag.CData);
149 // tags that can not contain other tags
150 ElementsFlags.Add("base", HtmlElementFlag.Empty);
151 ElementsFlags.Add("link", HtmlElementFlag.Empty);
152 ElementsFlags.Add("meta", HtmlElementFlag.Empty);
153 ElementsFlags.Add("isindex", HtmlElementFlag.Empty);
154 ElementsFlags.Add("hr", HtmlElementFlag.Empty);
155 ElementsFlags.Add("col", HtmlElementFlag.Empty);
156 ElementsFlags.Add("img", HtmlElementFlag.Empty);
157 ElementsFlags.Add("param", HtmlElementFlag.Empty);
158 ElementsFlags.Add("embed", HtmlElementFlag.Empty);
159 ElementsFlags.Add("frame", HtmlElementFlag.Empty);
160 ElementsFlags.Add("wbr", HtmlElementFlag.Empty);
161 ElementsFlags.Add("bgsound", HtmlElementFlag.Empty);
162 ElementsFlags.Add("spacer", HtmlElementFlag.Empty);
163 ElementsFlags.Add("keygen", HtmlElementFlag.Empty);
164 ElementsFlags.Add("area", HtmlElementFlag.Empty);
165 ElementsFlags.Add("input", HtmlElementFlag.Empty);
166 ElementsFlags.Add("basefont", HtmlElementFlag.Empty);
168 //ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
169 ElementsFlags.Add("form", HtmlElementFlag.CanOverlap);
171 // they sometimes contain, and sometimes they don 't...
172 ElementsFlags.Add("option", HtmlElementFlag.Empty);
174 // tag whose closing tag is equivalent to open tag:
175 // <p>bla</p>bla will be transformed into <p>bla</p>bla
176 // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
177 //<br> see above
178 ElementsFlags.Add("br", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
179 ElementsFlags.Add("p", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
182 /// <summary>
183 /// Determines if an element node is closed.
184 /// </summary>
185 /// <param name="name">The name of the element node to check. May not be null.</param>
186 /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
187 public static bool IsClosedElement(string name)
189 if (name == null)
191 throw new ArgumentNullException("name");
194 object flag = ElementsFlags[name.ToLower()];
195 if (flag == null)
197 return false;
199 return (((HtmlElementFlag)flag)&HtmlElementFlag.Closed) != 0;
202 /// <summary>
203 /// Determines if an element node can be kept overlapped.
204 /// </summary>
205 /// <param name="name">The name of the element node to check. May not be null.</param>
206 /// <returns>true if the name is the name of an element node that can be kept overlapped, false otherwise.</returns>
207 public static bool CanOverlapElement(string name)
209 if (name == null)
211 throw new ArgumentNullException("name");
214 object flag = ElementsFlags[name.ToLower()];
215 if (flag == null)
217 return false;
219 return (((HtmlElementFlag)flag)&HtmlElementFlag.CanOverlap) != 0;
222 /// <summary>
223 /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
224 /// </summary>
225 /// <param name="text">The text to check. May not be null.</param>
226 /// <returns>true or false.</returns>
227 public static bool IsOverlappedClosingElement(string text)
229 if (text == null)
231 throw new ArgumentNullException("text");
233 // min is </x>: 4
234 if (text.Length <= 4)
235 return false;
237 if ((text[0] != '<') ||
238 (text[text.Length - 1] != '>') ||
239 (text[1] != '/'))
240 return false;
242 string name = text.Substring(2, text.Length - 3);
243 return CanOverlapElement(name);
246 /// <summary>
247 /// Determines if an element node is a CDATA element node.
248 /// </summary>
249 /// <param name="name">The name of the element node to check. May not be null.</param>
250 /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
251 public static bool IsCDataElement(string name)
253 if (name == null)
255 throw new ArgumentNullException("name");
258 object flag = ElementsFlags[name.ToLower()];
259 if (flag == null)
261 return false;
263 return (((HtmlElementFlag)flag)&HtmlElementFlag.CData) != 0;
266 /// <summary>
267 /// Determines if an element node is defined as empty.
268 /// </summary>
269 /// <param name="name">The name of the element node to check. May not be null.</param>
270 /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
271 public static bool IsEmptyElement(string name)
273 if (name == null)
275 throw new ArgumentNullException("name");
278 if (name.Length == 0)
280 return true;
283 // <!DOCTYPE ...
284 if ('!' == name[0])
286 return true;
289 // <?xml ...
290 if ('?' == name[0])
292 return true;
295 object flag = ElementsFlags[name.ToLower()];
296 if (flag == null)
298 return false;
300 return (((HtmlElementFlag)flag)&HtmlElementFlag.Empty) != 0;
303 /// <summary>
304 /// Creates an HTML node from a string representing literal HTML.
305 /// </summary>
306 /// <param name="html">The HTML text.</param>
307 /// <returns>The newly created node instance.</returns>
308 public static HtmlNode CreateNode(string html)
310 // REVIEW: this is *not* optimum...
311 HtmlDocument doc = new HtmlDocument();
312 doc.LoadHtml(html);
313 return doc.DocumentNode.FirstChild;
316 /// <summary>
317 /// Creates a duplicate of the node and the subtree under it.
318 /// </summary>
319 /// <param name="node">The node to duplicate. May not be null.</param>
320 public void CopyFrom(HtmlNode node)
322 CopyFrom(node, true);
325 /// <summary>
326 /// Creates a duplicate of the node.
327 /// </summary>
328 /// <param name="node">The node to duplicate. May not be null.</param>
329 /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
330 public void CopyFrom(HtmlNode node, bool deep)
332 if (node == null)
334 throw new ArgumentNullException("node");
337 Attributes.RemoveAll();
338 if (node.HasAttributes)
340 foreach(HtmlAttribute att in node.Attributes)
342 SetAttributeValue(att.Name, att.Value);
346 if (!deep)
348 RemoveAllChildren();
349 if (node.HasChildNodes)
351 foreach(HtmlNode child in node.ChildNodes)
353 AppendChild(child.CloneNode(true));
359 internal HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
361 _nodetype = type;
362 _ownerdocument = ownerdocument;
363 _outerstartindex = index;
365 switch(type)
367 case HtmlNodeType.Comment:
368 _name = HtmlNodeTypeNameComment;
369 _endnode = this;
370 break;
372 case HtmlNodeType.Document:
373 _name = HtmlNodeTypeNameDocument;
374 _endnode = this;
375 break;
377 case HtmlNodeType.Text:
378 _name = HtmlNodeTypeNameText;
379 _endnode = this;
380 break;
383 if (_ownerdocument._openednodes != null)
385 if (!Closed)
387 // we use the index as the key
389 // -1 means the node comes from public
390 if (-1 != index)
392 _ownerdocument._openednodes.Add(index, this);
397 if ((-1 == index) && (type != HtmlNodeType.Comment) && (type != HtmlNodeType.Text))
399 // innerhtml and outerhtml must be calculated
400 _outerchanged = true;
401 _innerchanged = true;
405 internal void CloseNode(HtmlNode endnode)
407 if (!_ownerdocument.OptionAutoCloseOnEnd)
409 // close all children
410 if (_childnodes != null)
412 foreach(HtmlNode child in _childnodes)
414 if (child.Closed)
415 continue;
417 // create a fake closer node
418 HtmlNode close = new HtmlNode(NodeType, _ownerdocument, -1);
419 close._endnode = close;
420 child.CloseNode(close);
425 if (!Closed)
427 _endnode = endnode;
429 if (_ownerdocument._openednodes != null)
431 _ownerdocument._openednodes.Remove(_outerstartindex);
434 HtmlNode self = _ownerdocument._lastnodes[Name] as HtmlNode;
435 if (self == this)
437 _ownerdocument._lastnodes.Remove(Name);
438 _ownerdocument.UpdateLastParentNode();
441 if (endnode == this)
442 return;
444 // create an inner section
445 _innerstartindex = _outerstartindex + _outerlength;
446 _innerlength = endnode._outerstartindex - _innerstartindex;
448 // update full length
449 _outerlength = (endnode._outerstartindex + endnode._outerlength) - _outerstartindex;
453 internal HtmlNode EndNode
457 return _endnode;
461 internal string GetId()
463 HtmlAttribute att = Attributes["id"];
464 if (att == null)
466 return null;
468 return att.Value;
471 internal void SetId(string id)
473 HtmlAttribute att = Attributes["id"];
474 if (att == null)
476 att = _ownerdocument.CreateAttribute("id");
478 att.Value = id;
479 _ownerdocument.SetIdForNode(this, att.Value);
480 _outerchanged = true;
483 /// <summary>
484 /// Creates a new XPathNavigator object for navigating this HTML node.
485 /// </summary>
486 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
487 public XPathNavigator CreateNavigator()
489 return new HtmlNodeNavigator(_ownerdocument, this);
492 /// <summary>
493 /// Selects the first XmlNode that matches the XPath expression.
494 /// </summary>
495 /// <param name="xpath">The XPath expression. May not be null.</param>
496 /// <returns>The first HtmlNode that matches the XPath query or a null reference if no matching node was found.</returns>
497 public HtmlNode SelectSingleNode(string xpath)
499 if (xpath == null)
501 throw new ArgumentNullException("xpath");
504 HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
505 XPathNodeIterator it = nav.Select(xpath);
506 if (!it.MoveNext())
508 return null;
511 HtmlNodeNavigator node = (HtmlNodeNavigator)it.Current;
512 return node.CurrentNode;
515 /// <summary>
516 /// Selects a list of nodes matching the XPath expression.
517 /// </summary>
518 /// <param name="xpath">The XPath expression.</param>
519 /// <returns>An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.</returns>
520 public HtmlNodeCollection SelectNodes(string xpath)
522 HtmlNodeCollection list = new HtmlNodeCollection(null);
524 HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
525 XPathNodeIterator it = nav.Select(xpath);
526 while (it.MoveNext())
528 HtmlNodeNavigator n = (HtmlNodeNavigator)it.Current;
529 list.Add(n.CurrentNode);
531 if (list.Count == 0)
533 return null;
535 return list;
538 /// <summary>
539 /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
540 /// </summary>
541 public string Id
545 if (_ownerdocument._nodesid == null)
547 throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
549 return GetId();
553 if (_ownerdocument._nodesid == null)
555 throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
558 if (value == null)
560 throw new ArgumentNullException("value");
562 SetId(value);
566 /// <summary>
567 /// Gets the line number of this node in the document.
568 /// </summary>
569 public int Line
573 return _line;
577 /// <summary>
578 /// Gets the column number of this node in the document.
579 /// </summary>
580 public int LinePosition
584 return _lineposition;
588 /// <summary>
589 /// Gets the stream position of this node in the document, relative to the start of the document.
590 /// </summary>
591 public int StreamPosition
595 return _streamposition;
599 /// <summary>
600 /// Gets a value indicating if this node has been closed or not.
601 /// </summary>
602 public bool Closed
606 return (_endnode != null);
610 /// <summary>
611 /// Gets or sets this node's name.
612 /// </summary>
613 public string Name
617 if (_name == null)
619 _name = _ownerdocument._text.Substring(_namestartindex, _namelength).ToLower();
621 return _name;
625 _name = value;
629 /// <summary>
630 /// Gets or Sets the text between the start and end tags of the object.
631 /// </summary>
632 public virtual string InnerText
636 if (_nodetype == HtmlNodeType.Text)
638 return ((HtmlTextNode)this).Text;
641 if (_nodetype == HtmlNodeType.Comment)
643 return ((HtmlCommentNode)this).Comment;
646 // note: right now, this method is *slow*, because we recompute everything.
647 // it could be optimised like innerhtml
648 if (!HasChildNodes)
650 return string.Empty;
653 string s = null;
654 foreach(HtmlNode node in ChildNodes)
656 s += node.InnerText;
658 return s;
662 /// <summary>
663 /// Gets or Sets the HTML between the start and end tags of the object.
664 /// </summary>
665 public virtual string InnerHtml
669 if (_innerchanged)
671 _innerhtml = WriteContentTo();
672 _innerchanged = false;
673 return _innerhtml;
675 if (_innerhtml != null)
677 return _innerhtml;
680 if (_innerstartindex < 0)
682 return string.Empty;
685 return _ownerdocument._text.Substring(_innerstartindex, _innerlength);
689 HtmlDocument doc = new HtmlDocument();
690 doc.LoadHtml(value);
692 RemoveAllChildren();
693 AppendChildren(doc.DocumentNode.ChildNodes);
697 /// <summary>
698 /// Gets or Sets the object and its content in HTML.
699 /// </summary>
700 public virtual string OuterHtml
704 if (_outerchanged)
706 _outerhtml = WriteTo();
707 _outerchanged = false;
708 return _outerhtml;
711 if (_outerhtml != null)
713 return _outerhtml;
716 if (_outerstartindex < 0)
718 return string.Empty;
721 return _ownerdocument._text.Substring(_outerstartindex, _outerlength);
725 /// <summary>
726 /// Creates a duplicate of the node
727 /// </summary>
728 /// <returns></returns>
729 public HtmlNode Clone()
731 return CloneNode(true);
734 /// <summary>
735 /// Creates a duplicate of the node and changes its name at the same time.
736 /// </summary>
737 /// <param name="newName">The new name of the cloned node. May not be null.</param>
738 /// <returns>The cloned node.</returns>
739 public HtmlNode CloneNode(string newName)
741 return CloneNode(newName, true);
744 /// <summary>
745 /// Creates a duplicate of the node and changes its name at the same time.
746 /// </summary>
747 /// <param name="newName">The new name of the cloned node. May not be null.</param>
748 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
749 /// <returns>The cloned node.</returns>
750 public HtmlNode CloneNode(string newName, bool deep)
752 if (newName == null)
754 throw new ArgumentNullException("newName");
757 HtmlNode node = CloneNode(deep);
758 node._name = newName;
759 return node;
762 /// <summary>
763 /// Creates a duplicate of the node.
764 /// </summary>
765 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
766 /// <returns>The cloned node.</returns>
767 public HtmlNode CloneNode(bool deep)
769 HtmlNode node = _ownerdocument.CreateNode(_nodetype);
770 node._name = Name;
772 switch(_nodetype)
774 case HtmlNodeType.Comment:
775 ((HtmlCommentNode)node).Comment = ((HtmlCommentNode)this).Comment;
776 return node;
778 case HtmlNodeType.Text:
779 ((HtmlTextNode)node).Text = ((HtmlTextNode)this).Text;
780 return node;
783 // attributes
784 if (HasAttributes)
786 foreach(HtmlAttribute att in _attributes)
788 HtmlAttribute newatt = att.Clone();
789 node.Attributes.Append(newatt);
793 // closing attributes
794 if (HasClosingAttributes)
796 node._endnode = _endnode.CloneNode(false);
797 foreach(HtmlAttribute att in _endnode._attributes)
799 HtmlAttribute newatt = att.Clone();
800 node._endnode._attributes.Append(newatt);
803 if (!deep)
805 return node;
808 if (!HasChildNodes)
810 return node;
813 // child nodes
814 foreach(HtmlNode child in _childnodes)
816 HtmlNode newchild = child.Clone();
817 node.AppendChild(newchild);
819 return node;
822 /// <summary>
823 /// Gets the HTML node immediately following this element.
824 /// </summary>
825 public HtmlNode NextSibling
829 return _nextnode;
833 /// <summary>
834 /// Gets the node immediately preceding this node.
835 /// </summary>
836 public HtmlNode PreviousSibling
840 return _prevnode;
844 /// <summary>
845 /// Removes all the children and/or attributes of the current node.
846 /// </summary>
847 public void RemoveAll()
849 RemoveAllChildren();
851 if (HasAttributes)
853 _attributes.Clear();
856 if ((_endnode != null) && (_endnode != this))
858 if (_endnode._attributes != null)
860 _endnode._attributes.Clear();
863 _outerchanged = true;
864 _innerchanged = true;
867 /// <summary>
868 /// Removes all the children of the current node.
869 /// </summary>
870 public void RemoveAllChildren()
872 if (!HasChildNodes)
874 return;
877 if (_ownerdocument.OptionUseIdAttribute)
879 // remove nodes from id list
880 foreach(HtmlNode node in _childnodes)
882 _ownerdocument.SetIdForNode(null, node.GetId());
885 _childnodes.Clear();
886 _outerchanged = true;
887 _innerchanged = true;
890 /// <summary>
891 /// Removes the specified child node.
892 /// </summary>
893 /// <param name="oldChild">The node being removed. May not be null.</param>
894 /// <returns>The node removed.</returns>
895 public HtmlNode RemoveChild(HtmlNode oldChild)
897 if (oldChild == null)
899 throw new ArgumentNullException("oldChild");
902 int index = -1;
904 if (_childnodes != null)
906 index = _childnodes[oldChild];
909 if (index == -1)
911 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
914 _childnodes.Remove(index);
916 _ownerdocument.SetIdForNode(null, oldChild.GetId());
917 _outerchanged = true;
918 _innerchanged = true;
919 return oldChild;
922 /// <summary>
923 /// Removes the specified child node.
924 /// </summary>
925 /// <param name="oldChild">The node being removed. May not be null.</param>
926 /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
927 /// <returns>The node removed.</returns>
928 public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
930 if (oldChild == null)
932 throw new ArgumentNullException("oldChild");
935 if ((oldChild._childnodes != null) && keepGrandChildren)
937 // get prev sibling
938 HtmlNode prev = oldChild.PreviousSibling;
940 // reroute grand children to ourselves
941 foreach(HtmlNode grandchild in oldChild._childnodes)
943 InsertAfter(grandchild, prev);
946 RemoveChild(oldChild);
947 _outerchanged = true;
948 _innerchanged = true;
949 return oldChild;
952 /// <summary>
953 /// Replaces the child node oldChild with newChild node.
954 /// </summary>
955 /// <param name="newChild">The new node to put in the child list.</param>
956 /// <param name="oldChild">The node being replaced in the list.</param>
957 /// <returns>The node replaced.</returns>
958 public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
960 if (newChild == null)
962 return RemoveChild(oldChild);
965 if (oldChild == null)
967 return AppendChild(newChild);
970 int index = -1;
972 if (_childnodes != null)
974 index = _childnodes[oldChild];
977 if (index == -1)
979 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
982 _childnodes.Replace(index, newChild);
984 _ownerdocument.SetIdForNode(null, oldChild.GetId());
985 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
986 _outerchanged = true;
987 _innerchanged = true;
988 return newChild;
991 /// <summary>
992 /// Inserts the specified node immediately before the specified reference node.
993 /// </summary>
994 /// <param name="newChild">The node to insert. May not be null.</param>
995 /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
996 /// <returns>The node being inserted.</returns>
997 public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
999 if (newChild == null)
1001 throw new ArgumentNullException("newChild");
1004 if (refChild == null)
1006 return AppendChild(newChild);
1009 if (newChild == refChild)
1011 return newChild;
1014 int index = -1;
1016 if (_childnodes != null)
1018 index = _childnodes[refChild];
1021 if (index == -1)
1023 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
1026 _childnodes.Insert(index, newChild);
1028 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1029 _outerchanged = true;
1030 _innerchanged = true;
1031 return newChild;
1034 /// <summary>
1035 /// Inserts the specified node immediately after the specified reference node.
1036 /// </summary>
1037 /// <param name="newChild">The node to insert. May not be null.</param>
1038 /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
1039 /// <returns>The node being inserted.</returns>
1040 public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)
1042 if (newChild == null)
1044 throw new ArgumentNullException("newChild");
1047 if (refChild == null)
1049 return PrependChild(newChild);
1052 if (newChild == refChild)
1054 return newChild;
1057 int index = -1;
1059 if (_childnodes != null)
1061 index = _childnodes[refChild];
1063 if (index == -1)
1065 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
1068 _childnodes.Insert(index + 1, newChild);
1070 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1071 _outerchanged = true;
1072 _innerchanged = true;
1073 return newChild;
1076 /// <summary>
1077 /// Gets the first child of the node.
1078 /// </summary>
1079 public HtmlNode FirstChild
1083 if (!HasChildNodes)
1085 return null;
1087 return _childnodes[0];
1091 /// <summary>
1092 /// Gets the last child of the node.
1093 /// </summary>
1094 public HtmlNode LastChild
1098 if (!HasChildNodes)
1100 return null;
1102 return _childnodes[_childnodes.Count-1];
1106 /// <summary>
1107 /// Gets the type of this node.
1108 /// </summary>
1109 public HtmlNodeType NodeType
1113 return _nodetype;
1117 /// <summary>
1118 /// Gets the parent of this node (for nodes that can have parents).
1119 /// </summary>
1120 public HtmlNode ParentNode
1124 return _parentnode;
1128 /// <summary>
1129 /// Gets the HtmlDocument to which this node belongs.
1130 /// </summary>
1131 public HtmlDocument OwnerDocument
1135 return _ownerdocument;
1139 /// <summary>
1140 /// Gets all the children of the node.
1141 /// </summary>
1142 public HtmlNodeCollection ChildNodes
1146 if (_childnodes == null)
1148 _childnodes = new HtmlNodeCollection(this);
1150 return _childnodes;
1154 /// <summary>
1155 /// Adds the specified node to the beginning of the list of children of this node.
1156 /// </summary>
1157 /// <param name="newChild">The node to add. May not be null.</param>
1158 /// <returns>The node added.</returns>
1159 public HtmlNode PrependChild(HtmlNode newChild)
1161 if (newChild == null)
1163 throw new ArgumentNullException("newChild");
1165 ChildNodes.Prepend(newChild);
1166 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1167 _outerchanged = true;
1168 _innerchanged = true;
1169 return newChild;
1172 /// <summary>
1173 /// Adds the specified node list to the beginning of the list of children of this node.
1174 /// </summary>
1175 /// <param name="newChildren">The node list to add. May not be null.</param>
1176 public void PrependChildren(HtmlNodeCollection newChildren)
1178 if (newChildren == null)
1180 throw new ArgumentNullException("newChildren");
1183 foreach(HtmlNode newChild in newChildren)
1185 PrependChild(newChild);
1189 /// <summary>
1190 /// Adds the specified node to the end of the list of children of this node.
1191 /// </summary>
1192 /// <param name="newChild">The node to add. May not be null.</param>
1193 /// <returns>The node added.</returns>
1194 public HtmlNode AppendChild(HtmlNode newChild)
1196 if (newChild == null)
1198 throw new ArgumentNullException("newChild");
1201 ChildNodes.Append(newChild);
1202 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1203 _outerchanged = true;
1204 _innerchanged = true;
1205 return newChild;
1208 /// <summary>
1209 /// Adds the specified node to the end of the list of children of this node.
1210 /// </summary>
1211 /// <param name="newChildren">The node list to add. May not be null.</param>
1212 public void AppendChildren(HtmlNodeCollection newChildren)
1214 if (newChildren == null)
1215 throw new ArgumentNullException("newChildrend");
1217 foreach(HtmlNode newChild in newChildren)
1219 AppendChild(newChild);
1223 /// <summary>
1224 /// Gets a value indicating whether the current node has any attributes.
1225 /// </summary>
1226 public bool HasAttributes
1230 if (_attributes == null)
1232 return false;
1235 if (_attributes.Count <= 0)
1237 return false;
1239 return true;
1243 /// <summary>
1244 /// Gets a value indicating whether the current node has any attributes on the closing tag.
1245 /// </summary>
1246 public bool HasClosingAttributes
1250 if ((_endnode == null) || (_endnode == this))
1252 return false;
1255 if (_endnode._attributes == null)
1257 return false;
1260 if (_endnode._attributes.Count <= 0)
1262 return false;
1264 return true;
1268 /// <summary>
1269 /// Gets a value indicating whether this node has any child nodes.
1270 /// </summary>
1271 public bool HasChildNodes
1275 if (_childnodes == null)
1277 return false;
1280 if (_childnodes.Count <= 0)
1282 return false;
1284 return true;
1288 /// <summary>
1289 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1290 /// </summary>
1291 /// <param name="name">The name of the attribute to get. May not be null.</param>
1292 /// <param name="def">The default value to return if not found.</param>
1293 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1294 public string GetAttributeValue(string name, string def)
1296 if (name == null)
1298 throw new ArgumentNullException("name");
1301 if (!HasAttributes)
1303 return def;
1305 HtmlAttribute att = Attributes[name];
1306 if (att == null)
1308 return def;
1310 return att.Value;
1313 /// <summary>
1314 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1315 /// </summary>
1316 /// <param name="name">The name of the attribute to get. May not be null.</param>
1317 /// <param name="def">The default value to return if not found.</param>
1318 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1319 public int GetAttributeValue(string name, int def)
1321 if (name == null)
1323 throw new ArgumentNullException("name");
1326 if (!HasAttributes)
1328 return def;
1330 HtmlAttribute att = Attributes[name];
1331 if (att == null)
1333 return def;
1337 return Convert.ToInt32(att.Value);
1339 catch
1341 return def;
1345 /// <summary>
1346 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1347 /// </summary>
1348 /// <param name="name">The name of the attribute to get. May not be null.</param>
1349 /// <param name="def">The default value to return if not found.</param>
1350 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1351 public bool GetAttributeValue(string name, bool def)
1353 if (name == null)
1355 throw new ArgumentNullException("name");
1358 if (!HasAttributes)
1360 return def;
1362 HtmlAttribute att = Attributes[name];
1363 if (att == null)
1365 return def;
1369 return Convert.ToBoolean(att.Value);
1371 catch
1373 return def;
1377 /// <summary>
1378 /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
1379 /// </summary>
1380 /// <param name="name">The name of the attribute to set. May not be null.</param>
1381 /// <param name="value">The value for the attribute.</param>
1382 /// <returns>The corresponding attribute instance.</returns>
1383 public HtmlAttribute SetAttributeValue(string name, string value)
1385 if (name == null)
1387 throw new ArgumentNullException("name");
1389 HtmlAttribute att = Attributes[name];
1390 if (att == null)
1392 return Attributes.Append(_ownerdocument.CreateAttribute(name, value));
1394 att.Value = value;
1395 return att;
1398 /// <summary>
1399 /// Gets the collection of HTML attributes for this node. May not be null.
1400 /// </summary>
1401 public HtmlAttributeCollection Attributes
1405 if (!HasAttributes)
1407 _attributes = new HtmlAttributeCollection(this);
1409 return _attributes;
1413 /// <summary>
1414 /// Gets the collection of HTML attributes for the closing tag. May not be null.
1415 /// </summary>
1416 public HtmlAttributeCollection ClosingAttributes
1420 if (!HasClosingAttributes)
1422 return new HtmlAttributeCollection(this);
1424 return _endnode.Attributes;
1428 internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
1430 string name;
1432 if (_ownerdocument.OptionOutputAsXml)
1434 if (_ownerdocument.OptionOutputUpperCase)
1436 name = att.XmlName.ToUpper();
1438 else
1440 name = att.XmlName;
1443 outText.Write(" " + name + "=\"" + HtmlDocument.HtmlEncode(att.XmlValue) + "\"");
1445 else
1447 if (_ownerdocument.OptionOutputUpperCase)
1449 name = att.Name.ToUpper();
1451 else
1453 name = att.Name;
1456 if (att.Name.Length >= 4)
1458 if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
1459 (att.Name[att.Name.Length-1] == '>') && (att.Name[att.Name.Length-2] == '%'))
1461 outText.Write(" " + name);
1462 return;
1465 if (_ownerdocument.OptionOutputOptimizeAttributeValues)
1467 if (att.Value.IndexOfAny(new Char[]{(char)10, (char)13, (char)9, ' '}) < 0)
1469 outText.Write(" " + name + "=" + att.Value);
1471 else
1473 outText.Write(" " + name + "=\"" + att.Value + "\"");
1476 else
1478 outText.Write(" " + name + "=\"" + att.Value + "\"");
1483 internal static void WriteAttributes(XmlWriter writer, HtmlNode node)
1485 if (!node.HasAttributes)
1487 return;
1489 // we use _hashitems to make sure attributes are written only once
1490 foreach(HtmlAttribute att in node.Attributes._hashitems.Values)
1492 writer.WriteAttributeString(att.XmlName, att.Value);
1496 internal void WriteAttributes(TextWriter outText, bool closing)
1498 if (_ownerdocument.OptionOutputAsXml)
1500 if (_attributes == null)
1502 return;
1504 // we use _hashitems to make sure attributes are written only once
1505 foreach(HtmlAttribute att in _attributes._hashitems.Values)
1507 WriteAttribute(outText, att);
1509 return;
1512 if (!closing)
1514 if (_attributes != null)
1517 foreach(HtmlAttribute att in _attributes)
1519 WriteAttribute(outText, att);
1522 if (_ownerdocument.OptionAddDebuggingAttributes)
1524 WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
1525 WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
1527 int i = 0;
1528 foreach(HtmlNode n in ChildNodes)
1530 WriteAttribute(outText, _ownerdocument.CreateAttribute("_child_" + i,
1531 n.Name));
1532 i++;
1536 else
1538 if (_endnode == null)
1540 return;
1543 if (_endnode._attributes == null)
1545 return;
1548 if (_endnode == this)
1550 return;
1553 foreach(HtmlAttribute att in _endnode._attributes)
1555 WriteAttribute(outText, att);
1557 if (_ownerdocument.OptionAddDebuggingAttributes)
1559 WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
1560 WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
1565 internal static string GetXmlComment(HtmlCommentNode comment)
1567 string s = comment.Comment;
1568 return s.Substring(4, s.Length-7).Replace("--", " - -");
1571 /// <summary>
1572 /// Saves the current node to the specified TextWriter.
1573 /// </summary>
1574 /// <param name="outText">The TextWriter to which you want to save.</param>
1575 public void WriteTo(TextWriter outText)
1577 string html;
1578 switch(_nodetype)
1580 case HtmlNodeType.Comment:
1581 html = ((HtmlCommentNode)this).Comment;
1582 if (_ownerdocument.OptionOutputAsXml)
1584 outText.Write("<!--" + GetXmlComment((HtmlCommentNode)this) + " -->");
1586 else
1588 outText.Write(html);
1590 break;
1592 case HtmlNodeType.Document:
1593 if (_ownerdocument.OptionOutputAsXml)
1595 outText.Write("<?xml version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName + "\"?>");
1597 // check there is a root element
1598 if (_ownerdocument.DocumentNode.HasChildNodes)
1600 int rootnodes = _ownerdocument.DocumentNode._childnodes.Count;
1601 if (rootnodes > 0)
1603 HtmlNode xml = _ownerdocument.GetXmlDeclaration();
1604 if (xml != null)
1606 rootnodes --;
1609 if (rootnodes > 1)
1611 if (_ownerdocument.OptionOutputUpperCase)
1613 outText.Write("<SPAN>");
1614 WriteContentTo(outText);
1615 outText.Write("</SPAN>");
1617 else
1619 outText.Write("<span>");
1620 WriteContentTo(outText);
1621 outText.Write("</span>");
1623 break;
1628 WriteContentTo(outText);
1629 break;
1631 case HtmlNodeType.Text:
1632 html = ((HtmlTextNode)this).Text;
1633 if (_ownerdocument.OptionOutputAsXml)
1635 outText.Write(HtmlDocument.HtmlEncode(html));
1637 else
1639 outText.Write(html);
1641 break;
1643 case HtmlNodeType.Element:
1644 string name;
1645 if (_ownerdocument.OptionOutputUpperCase)
1647 name = Name.ToUpper();
1649 else
1651 name = Name;
1654 if (_ownerdocument.OptionOutputAsXml)
1656 if (name.Length > 0)
1658 if (name[0] == '?')
1660 // forget this one, it's been done at the document level
1661 break;
1664 if (name.Trim().Length == 0)
1666 break;
1668 name = HtmlAttribute.GetXmlName(name);
1670 else
1672 break;
1676 outText.Write("<" + name);
1677 WriteAttributes(outText, false);
1679 if (!HasChildNodes)
1681 if (HtmlNode.IsEmptyElement(Name))
1683 if ((_ownerdocument.OptionWriteEmptyNodes) || (_ownerdocument.OptionOutputAsXml))
1685 outText.Write(" />");
1687 else
1689 if (Name.Length > 0)
1691 if (Name[0] == '?')
1693 outText.Write("?");
1697 outText.Write(">");
1700 else
1702 outText.Write("></" + name + ">");
1705 else
1707 outText.Write(">");
1708 bool cdata = false;
1709 if (_ownerdocument.OptionOutputAsXml)
1711 if (HtmlNode.IsCDataElement(Name))
1713 // this code and the following tries to output things as nicely as possible for old browsers.
1714 cdata = true;
1715 outText.Write("\r\n//<![CDATA[\r\n");
1719 if (cdata)
1721 if (HasChildNodes)
1723 // child must be a text
1724 ChildNodes[0].WriteTo(outText);
1726 outText.Write("\r\n//]]>//\r\n");
1728 else
1730 WriteContentTo(outText);
1733 outText.Write("</" + name);
1734 if (!_ownerdocument.OptionOutputAsXml)
1736 WriteAttributes(outText, true);
1738 outText.Write(">");
1740 break;
1744 /// <summary>
1745 /// Saves the current node to the specified XmlWriter.
1746 /// </summary>
1747 /// <param name="writer">The XmlWriter to which you want to save.</param>
1748 public void WriteTo(XmlWriter writer)
1750 string html;
1751 switch(_nodetype)
1753 case HtmlNodeType.Comment:
1754 writer.WriteComment(GetXmlComment((HtmlCommentNode)this));
1755 break;
1757 case HtmlNodeType.Document:
1758 writer.WriteProcessingInstruction("xml", "version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName + "\"");
1759 if (HasChildNodes)
1761 foreach(HtmlNode subnode in ChildNodes)
1763 subnode.WriteTo(writer);
1766 break;
1768 case HtmlNodeType.Text:
1769 html = ((HtmlTextNode)this).Text;
1770 writer.WriteString(html);
1771 break;
1773 case HtmlNodeType.Element:
1774 string name;
1775 if (_ownerdocument.OptionOutputUpperCase)
1777 name = Name.ToUpper();
1779 else
1781 name = Name;
1783 writer.WriteStartElement(name);
1784 WriteAttributes(writer, this);
1786 if (HasChildNodes)
1788 foreach(HtmlNode subnode in ChildNodes)
1790 subnode.WriteTo(writer);
1793 writer.WriteEndElement();
1794 break;
1798 /// <summary>
1799 /// Saves all the children of the node to the specified TextWriter.
1800 /// </summary>
1801 /// <param name="outText">The TextWriter to which you want to save.</param>
1802 public void WriteContentTo(TextWriter outText)
1804 if (_childnodes == null)
1806 return;
1809 foreach(HtmlNode node in _childnodes)
1811 node.WriteTo(outText);
1815 /// <summary>
1816 /// Saves the current node to a string.
1817 /// </summary>
1818 /// <returns>The saved string.</returns>
1819 public string WriteTo()
1821 StringWriter sw = new StringWriter();
1822 WriteTo(sw);
1823 sw.Flush();
1824 return sw.ToString();
1827 /// <summary>
1828 /// Saves all the children of the node to a string.
1829 /// </summary>
1830 /// <returns>The saved string.</returns>
1831 public string WriteContentTo()
1833 StringWriter sw = new StringWriter();
1834 WriteContentTo(sw);
1835 sw.Flush();
1836 return sw.ToString();
1840 /// <summary>
1841 /// Represents a combined list and collection of HTML nodes.
1842 /// </summary>
1843 public class HtmlNodeCollection: IEnumerable
1845 private ArrayList _items = new ArrayList();
1846 private HtmlNode _parentnode;
1848 internal HtmlNodeCollection(HtmlNode parentnode)
1850 _parentnode = parentnode; // may be null
1853 /// <summary>
1854 /// Gets the number of elements actually contained in the list.
1855 /// </summary>
1856 public int Count
1860 return _items.Count;
1864 internal void Clear()
1866 foreach(HtmlNode node in _items)
1868 node._parentnode = null;
1869 node._nextnode = null;
1870 node._prevnode = null;
1872 _items.Clear();
1875 internal void Remove(int index)
1877 HtmlNode next = null;
1878 HtmlNode prev = null;
1879 HtmlNode oldnode = (HtmlNode)_items[index];
1881 if (index > 0)
1883 prev = (HtmlNode)_items[index-1];
1886 if (index < (_items.Count-1))
1888 next = (HtmlNode)_items[index+1];
1891 _items.RemoveAt(index);
1893 if (prev != null)
1895 if (next == prev)
1897 throw new InvalidProgramException("Unexpected error.");
1899 prev._nextnode = next;
1902 if (next != null)
1904 next._prevnode = prev;
1907 oldnode._prevnode = null;
1908 oldnode._nextnode = null;
1909 oldnode._parentnode = null;
1912 internal void Replace(int index, HtmlNode node)
1914 HtmlNode next = null;
1915 HtmlNode prev = null;
1916 HtmlNode oldnode = (HtmlNode)_items[index];
1918 if (index>0)
1920 prev = (HtmlNode)_items[index-1];
1923 if (index<(_items.Count-1))
1925 next = (HtmlNode)_items[index+1];
1928 _items[index] = node;
1930 if (prev != null)
1932 if (node == prev)
1934 throw new InvalidProgramException("Unexpected error.");
1936 prev._nextnode = node;
1939 if (next!=null)
1941 next._prevnode = node;
1944 node._prevnode = prev;
1945 if (next == node)
1947 throw new InvalidProgramException("Unexpected error.");
1949 node._nextnode = next;
1950 node._parentnode = _parentnode;
1952 oldnode._prevnode = null;
1953 oldnode._nextnode = null;
1954 oldnode._parentnode = null;
1957 internal void Insert(int index, HtmlNode node)
1959 HtmlNode next = null;
1960 HtmlNode prev = null;
1962 if (index>0)
1964 prev = (HtmlNode)_items[index-1];
1967 if (index<_items.Count)
1969 next = (HtmlNode)_items[index];
1972 _items.Insert(index, node);
1974 if (prev != null)
1976 if (node == prev)
1978 throw new InvalidProgramException("Unexpected error.");
1980 prev._nextnode = node;
1983 if (next != null)
1985 next._prevnode = node;
1988 node._prevnode = prev;
1990 if (next == node)
1992 throw new InvalidProgramException("Unexpected error.");
1995 node._nextnode = next;
1996 node._parentnode = _parentnode;
1999 internal void Append(HtmlNode node)
2001 HtmlNode last = null;
2002 if (_items.Count > 0)
2004 last = (HtmlNode)_items[_items.Count-1];
2007 _items.Add(node);
2008 node._prevnode = last;
2009 node._nextnode = null;
2010 node._parentnode = _parentnode;
2011 if (last != null)
2013 if (last == node)
2015 throw new InvalidProgramException("Unexpected error.");
2017 last._nextnode = node;
2021 internal void Prepend(HtmlNode node)
2023 HtmlNode first = null;
2024 if (_items.Count > 0)
2026 first = (HtmlNode)_items[0];
2029 _items.Insert(0, node);
2031 if (node == first)
2033 throw new InvalidProgramException("Unexpected error.");
2035 node._nextnode = first;
2036 node._prevnode = null;
2037 node._parentnode = _parentnode;
2038 if (first != null)
2040 first._prevnode = node;
2044 internal void Add(HtmlNode node)
2046 _items.Add(node);
2049 /// <summary>
2050 /// Gets the node at the specified index.
2051 /// </summary>
2052 public HtmlNode this[int index]
2056 return _items[index] as HtmlNode;
2060 internal int GetNodeIndex(HtmlNode node)
2062 // TODO: should we rewrite this? what would be the key of a node?
2063 for(int i=0;i<_items.Count;i++)
2065 if (node == ((HtmlNode)_items[i]))
2067 return i;
2070 return -1;
2073 /// <summary>
2074 /// Gets a given node from the list.
2075 /// </summary>
2076 public int this[HtmlNode node]
2080 int index = GetNodeIndex(node);
2081 if (index == -1)
2083 throw new ArgumentOutOfRangeException("node", "Node \"" + node.CloneNode(false).OuterHtml + "\" was not found in the collection");
2085 return index;
2089 /// <summary>
2090 /// Returns an enumerator that can iterate through the list.
2091 /// </summary>
2092 /// <returns>An IEnumerator for the entire list.</returns>
2093 public HtmlNodeEnumerator GetEnumerator()
2095 return new HtmlNodeEnumerator(_items);
2098 IEnumerator IEnumerable.GetEnumerator()
2100 return GetEnumerator();
2103 /// <summary>
2104 /// Represents an enumerator that can iterate through the list.
2105 /// </summary>
2106 public class HtmlNodeEnumerator: IEnumerator
2108 int _index;
2109 ArrayList _items;
2111 internal HtmlNodeEnumerator(ArrayList items)
2113 _items = items;
2114 _index = -1;
2117 /// <summary>
2118 /// Sets the enumerator to its initial position, which is before the first element in the collection.
2119 /// </summary>
2120 public void Reset()
2122 _index = -1;
2125 /// <summary>
2126 /// Advances the enumerator to the next element of the collection.
2127 /// </summary>
2128 /// <returns>true if the enumerator was successfully advanced to the next element, false if the enumerator has passed the end of the collection.</returns>
2129 public bool MoveNext()
2131 _index++;
2132 return (_index<_items.Count);
2135 /// <summary>
2136 /// Gets the current element in the collection.
2137 /// </summary>
2138 public HtmlNode Current
2140 get
2142 return (HtmlNode)(_items[_index]);
2146 /// <summary>
2147 /// Gets the current element in the collection.
2148 /// </summary>
2149 object IEnumerator.Current
2151 get
2153 return (Current);
2159 /// <summary>
2160 /// Represents an HTML text node.
2161 /// </summary>
2162 public class HtmlTextNode: HtmlNode
2164 private string _text;
2166 internal HtmlTextNode(HtmlDocument ownerdocument, int index):
2167 base(HtmlNodeType.Text, ownerdocument, index)
2171 /// <summary>
2172 /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
2173 /// </summary>
2174 public override string InnerHtml
2178 return OuterHtml;
2182 _text = value;
2186 /// <summary>
2187 /// Gets or Sets the object and its content in HTML.
2188 /// </summary>
2189 public override string OuterHtml
2193 if (_text == null)
2195 return base.OuterHtml;
2197 return _text;
2201 /// <summary>
2202 /// Gets or Sets the text of the node.
2203 /// </summary>
2204 public string Text
2208 if (_text == null)
2210 return base.OuterHtml;
2212 return _text;
2216 _text = value;
2221 /// <summary>
2222 /// Represents an HTML comment.
2223 /// </summary>
2224 public class HtmlCommentNode: HtmlNode
2226 private string _comment;
2228 internal HtmlCommentNode(HtmlDocument ownerdocument, int index):
2229 base(HtmlNodeType.Comment, ownerdocument, index)
2233 /// <summary>
2234 /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
2235 /// </summary>
2236 public override string InnerHtml
2240 if (_comment == null)
2242 return base.InnerHtml;
2244 return _comment;
2248 _comment = value;
2252 /// <summary>
2253 /// Gets or Sets the object and its content in HTML.
2254 /// </summary>
2255 public override string OuterHtml
2259 if (_comment == null)
2261 return base.OuterHtml;
2263 return "<!--" + _comment + "-->";
2267 /// <summary>
2268 /// Gets or Sets the comment text of the node.
2269 /// </summary>
2270 public string Comment
2274 if (_comment == null)
2276 return base.InnerHtml;
2278 return _comment;
2282 _comment = value;