Dont throw EncodingFoundException unless asked to. Should remove the occassional...
[beagle.git] / Filters / HtmlAgilityPack / HtmlNode.cs
blobf952c52a9367752256ab2b0093f9240c00d173c4
1 /*
2 Copyright (C) 2003 Simon Mourier <simonm@microsoft.com>
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 using System;
30 using System.Collections;
31 using System.IO;
32 using System.Xml;
33 using System.Xml.XPath;
35 namespace HtmlAgilityPack
37 /// <summary>
38 /// Flags that describe the behavior of an Element node.
39 /// </summary>
40 public enum HtmlElementFlag
42 /// <summary>
43 /// The node is a CDATA node.
44 /// </summary>
45 CData = 1,
47 /// <summary>
48 /// The node is empty. META or IMG are example of such nodes.
49 /// </summary>
50 Empty = 2,
52 /// <summary>
53 /// The node will automatically be closed during parsing.
54 /// </summary>
55 Closed = 4,
57 /// <summary>
58 /// The node can overlap.
59 /// </summary>
60 CanOverlap = 8
63 /// <summary>
64 /// Represents the type of a node.
65 /// </summary>
66 public enum HtmlNodeType
68 /// <summary>
69 /// The root of a document.
70 /// </summary>
71 Document,
73 /// <summary>
74 /// An HTML element.
75 /// </summary>
76 Element,
78 /// <summary>
79 /// An HTML comment.
80 /// </summary>
81 Comment,
83 /// <summary>
84 /// A text node is always the child of an element or a document node.
85 /// </summary>
86 Text,
89 /// <summary>
90 /// Represents an HTML node.
91 /// </summary>
92 public class HtmlNode: IXPathNavigable
94 /// <summary>
95 /// Gets the name of a comment node. It is actually defined as '#comment'.
96 /// </summary>
97 public static readonly string HtmlNodeTypeNameComment = "#comment";
99 /// <summary>
100 /// Gets the name of the document node. It is actually defined as '#document'.
101 /// </summary>
102 public static readonly string HtmlNodeTypeNameDocument = "#document";
104 /// <summary>
105 /// Gets the name of a text node. It is actually defined as '#text'.
106 /// </summary>
107 public static readonly string HtmlNodeTypeNameText = "#text";
109 /// <summary>
110 /// Gets a collection of flags that define specific behaviors for specific element nodes.
111 /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
112 /// </summary>
113 public static Hashtable ElementsFlags;
115 internal HtmlNodeType _nodetype;
116 internal HtmlNode _nextnode;
117 internal HtmlNode _prevnode;
118 internal HtmlNode _parentnode;
119 internal HtmlDocument _ownerdocument;
120 internal HtmlNodeCollection _childnodes;
121 internal HtmlAttributeCollection _attributes;
122 internal int _line = 0;
123 internal int _lineposition = 0;
124 internal int _streamposition = 0;
125 internal int _innerstartindex = 0;
126 internal int _innerlength = 0;
127 internal int _outerstartindex = 0;
128 internal int _outerlength = 0;
129 internal int _namestartindex = 0;
130 internal int _namelength = 0;
131 internal bool _starttag = false;
132 internal string _name;
133 internal HtmlNode _prevwithsamename = null;
134 internal HtmlNode _endnode;
136 internal bool _innerchanged = false;
137 internal bool _outerchanged = false;
138 internal string _innerhtml;
139 internal string _outerhtml;
141 static HtmlNode()
143 // tags whose content may be anything
144 ElementsFlags = new Hashtable();
145 ElementsFlags.Add("script", HtmlElementFlag.CData);
146 ElementsFlags.Add("style", HtmlElementFlag.CData);
147 ElementsFlags.Add("noxhtml", HtmlElementFlag.CData);
149 // tags that can not contain other tags
150 ElementsFlags.Add("base", HtmlElementFlag.Empty);
151 ElementsFlags.Add("link", HtmlElementFlag.Empty);
152 ElementsFlags.Add("meta", HtmlElementFlag.Empty);
153 ElementsFlags.Add("isindex", HtmlElementFlag.Empty);
154 ElementsFlags.Add("hr", HtmlElementFlag.Empty);
155 ElementsFlags.Add("col", HtmlElementFlag.Empty);
156 ElementsFlags.Add("img", HtmlElementFlag.Empty);
157 ElementsFlags.Add("param", HtmlElementFlag.Empty);
158 ElementsFlags.Add("embed", HtmlElementFlag.Empty);
159 ElementsFlags.Add("frame", HtmlElementFlag.Empty);
160 ElementsFlags.Add("wbr", HtmlElementFlag.Empty);
161 ElementsFlags.Add("bgsound", HtmlElementFlag.Empty);
162 ElementsFlags.Add("spacer", HtmlElementFlag.Empty);
163 ElementsFlags.Add("keygen", HtmlElementFlag.Empty);
164 ElementsFlags.Add("area", HtmlElementFlag.Empty);
165 ElementsFlags.Add("input", HtmlElementFlag.Empty);
166 ElementsFlags.Add("basefont", HtmlElementFlag.Empty);
168 //ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
169 ElementsFlags.Add("form", HtmlElementFlag.CanOverlap);
171 // they sometimes contain, and sometimes they don 't...
172 ElementsFlags.Add("option", HtmlElementFlag.Empty);
174 // tag whose closing tag is equivalent to open tag:
175 // <p>bla</p>bla will be transformed into <p>bla</p>bla
176 // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
177 //<br> see above
178 ElementsFlags.Add("br", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
179 ElementsFlags.Add("p", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
182 /// <summary>
183 /// Determines if an element node is closed.
184 /// </summary>
185 /// <param name="name">The name of the element node to check. May not be null.</param>
186 /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
187 public static bool IsClosedElement(string name)
189 if (name == null)
191 throw new ArgumentNullException("name");
194 object flag = ElementsFlags[name.ToLower()];
195 if (flag == null)
197 return false;
199 return (((HtmlElementFlag)flag)&HtmlElementFlag.Closed) != 0;
202 /// <summary>
203 /// Determines if an element node can be kept overlapped.
204 /// </summary>
205 /// <param name="name">The name of the element node to check. May not be null.</param>
206 /// <returns>true if the name is the name of an element node that can be kept overlapped, false otherwise.</returns>
207 public static bool CanOverlapElement(string name)
209 if (name == null)
211 throw new ArgumentNullException("name");
214 object flag = ElementsFlags[name.ToLower()];
215 if (flag == null)
217 return false;
219 return (((HtmlElementFlag)flag)&HtmlElementFlag.CanOverlap) != 0;
222 /// <summary>
223 /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
224 /// </summary>
225 /// <param name="text">The text to check. May not be null.</param>
226 /// <returns>true or false.</returns>
227 public static bool IsOverlappedClosingElement(string text)
229 if (text == null)
231 throw new ArgumentNullException("text");
233 // min is </x>: 4
234 if (text.Length <= 4)
235 return false;
237 if ((text[0] != '<') ||
238 (text[text.Length - 1] != '>') ||
239 (text[1] != '/'))
240 return false;
242 string name = text.Substring(2, text.Length - 3);
243 return CanOverlapElement(name);
246 /// <summary>
247 /// Determines if an element node is a CDATA element node.
248 /// </summary>
249 /// <param name="name">The name of the element node to check. May not be null.</param>
250 /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
251 public static bool IsCDataElement(string name)
253 if (name == null)
255 throw new ArgumentNullException("name");
258 object flag = ElementsFlags[name.ToLower()];
259 if (flag == null)
261 return false;
263 return (((HtmlElementFlag)flag)&HtmlElementFlag.CData) != 0;
266 /// <summary>
267 /// Determines if an element node is defined as empty.
268 /// </summary>
269 /// <param name="name">The name of the element node to check. May not be null.</param>
270 /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
271 public static bool IsEmptyElement(string name)
273 if (name == null)
275 throw new ArgumentNullException("name");
278 if (name.Length == 0)
280 return true;
283 // <!DOCTYPE ...
284 if ('!' == name[0])
286 return true;
289 // <?xml ...
290 if ('?' == name[0])
292 return true;
295 object flag = ElementsFlags[name.ToLower()];
296 if (flag == null)
298 return false;
300 return (((HtmlElementFlag)flag)&HtmlElementFlag.Empty) != 0;
303 /// <summary>
304 /// Creates an HTML node from a string representing literal HTML.
305 /// </summary>
306 /// <param name="html">The HTML text.</param>
307 /// <returns>The newly created node instance.</returns>
308 public static HtmlNode CreateNode(string html)
310 // REVIEW: this is *not* optimum...
311 HtmlDocument doc = new HtmlDocument();
312 doc.LoadHtml(html);
313 return doc.DocumentNode.FirstChild;
316 /// <summary>
317 /// Creates a duplicate of the node and the subtree under it.
318 /// </summary>
319 /// <param name="node">The node to duplicate. May not be null.</param>
320 public void CopyFrom(HtmlNode node)
322 CopyFrom(node, true);
325 /// <summary>
326 /// Creates a duplicate of the node.
327 /// </summary>
328 /// <param name="node">The node to duplicate. May not be null.</param>
329 /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
330 public void CopyFrom(HtmlNode node, bool deep)
332 if (node == null)
334 throw new ArgumentNullException("node");
337 Attributes.RemoveAll();
338 if (node.HasAttributes)
340 foreach(HtmlAttribute att in node.Attributes)
342 SetAttributeValue(att.Name, att.Value);
346 if (!deep)
348 RemoveAllChildren();
349 if (node.HasChildNodes)
351 foreach(HtmlNode child in node.ChildNodes)
353 AppendChild(child.CloneNode(true));
359 internal HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
361 _nodetype = type;
362 _ownerdocument = ownerdocument;
363 _outerstartindex = index;
365 switch(type)
367 case HtmlNodeType.Comment:
368 _name = HtmlNodeTypeNameComment;
369 _endnode = this;
370 break;
372 case HtmlNodeType.Document:
373 _name = HtmlNodeTypeNameDocument;
374 _endnode = this;
375 break;
377 case HtmlNodeType.Text:
378 _name = HtmlNodeTypeNameText;
379 _endnode = this;
380 break;
383 if (_ownerdocument._openednodes != null)
385 if (!Closed)
387 // we use the index as the key
389 // -1 means the node comes from public
390 if (-1 != index)
392 _ownerdocument._openednodes.Add(index, this);
397 if ((-1 == index) && (type != HtmlNodeType.Comment) && (type != HtmlNodeType.Text))
399 // innerhtml and outerhtml must be calculated
400 _outerchanged = true;
401 _innerchanged = true;
405 internal void CloseNode(HtmlNode endnode)
407 if (!_ownerdocument.OptionAutoCloseOnEnd)
409 // close all children
410 if (_childnodes != null)
412 foreach(HtmlNode child in _childnodes)
414 if (child.Closed)
415 continue;
417 // create a fake closer node
418 HtmlNode close = new HtmlNode(NodeType, _ownerdocument, -1);
419 close._endnode = close;
420 child.CloseNode(close);
425 if (!Closed)
427 _endnode = endnode;
429 if (_ownerdocument._openednodes != null)
431 _ownerdocument._openednodes.Remove(_outerstartindex);
434 HtmlNode self = _ownerdocument._lastnodes[Name] as HtmlNode;
435 if (self == this)
437 _ownerdocument._lastnodes.Remove(Name);
438 _ownerdocument.UpdateLastParentNode();
441 if (endnode == this)
442 return;
444 // create an inner section
445 _innerstartindex = _outerstartindex + _outerlength;
446 _innerlength = endnode._outerstartindex - _innerstartindex;
448 // update full length
449 _outerlength = (endnode._outerstartindex + endnode._outerlength) - _outerstartindex;
453 internal HtmlNode EndNode
457 return _endnode;
461 internal string GetId()
463 HtmlAttribute att = Attributes["id"];
464 if (att == null)
466 return null;
468 return att.Value;
471 internal void SetId(string id)
473 HtmlAttribute att = Attributes["id"];
474 if (att == null)
476 att = _ownerdocument.CreateAttribute("id");
478 att.Value = id;
479 _ownerdocument.SetIdForNode(this, att.Value);
480 _outerchanged = true;
483 /// <summary>
484 /// Creates a new XPathNavigator object for navigating this HTML node.
485 /// </summary>
486 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
487 public XPathNavigator CreateNavigator()
489 return new HtmlNodeNavigator(_ownerdocument, this);
492 /// <summary>
493 /// Selects the first XmlNode that matches the XPath expression.
494 /// </summary>
495 /// <param name="xpath">The XPath expression. May not be null.</param>
496 /// <returns>The first HtmlNode that matches the XPath query or a null reference if no matching node was found.</returns>
497 public HtmlNode SelectSingleNode(string xpath)
499 if (xpath == null)
501 throw new ArgumentNullException("xpath");
504 HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
505 XPathNodeIterator it = nav.Select(xpath);
506 if (!it.MoveNext())
508 return null;
511 HtmlNodeNavigator node = (HtmlNodeNavigator)it.Current;
512 return node.CurrentNode;
515 /// <summary>
516 /// Selects a list of nodes matching the XPath expression.
517 /// </summary>
518 /// <param name="xpath">The XPath expression.</param>
519 /// <returns>An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.</returns>
520 public HtmlNodeCollection SelectNodes(string xpath)
522 HtmlNodeCollection list = new HtmlNodeCollection(null);
524 HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
525 XPathNodeIterator it = nav.Select(xpath);
526 while (it.MoveNext())
528 HtmlNodeNavigator n = (HtmlNodeNavigator)it.Current;
529 list.Add(n.CurrentNode);
531 if (list.Count == 0)
533 return null;
535 return list;
538 /// <summary>
539 /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
540 /// </summary>
541 public string Id
545 if (_ownerdocument._nodesid == null)
547 throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
549 return GetId();
553 if (_ownerdocument._nodesid == null)
555 throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
558 if (value == null)
560 throw new ArgumentNullException("value");
562 SetId(value);
566 public bool StartTag
570 return _starttag;
574 /// <summary>
575 /// Gets the line number of this node in the document.
576 /// </summary>
577 public int Line
581 return _line;
585 /// <summary>
586 /// Gets the column number of this node in the document.
587 /// </summary>
588 public int LinePosition
592 return _lineposition;
596 /// <summary>
597 /// Gets the stream position of this node in the document, relative to the start of the document.
598 /// </summary>
599 public int StreamPosition
603 return _streamposition;
607 /// <summary>
608 /// Gets a value indicating if this node has been closed or not.
609 /// </summary>
610 public bool Closed
614 return (_endnode != null);
618 /// <summary>
619 /// Gets or sets this node's name.
620 /// </summary>
621 public string Name
625 if (_name == null)
627 _name = _ownerdocument._text.Substring(_namestartindex, _namelength).ToLower();
629 return _name;
633 _name = value;
637 /// <summary>
638 /// Gets or Sets the text between the start and end tags of the object.
639 /// </summary>
640 public virtual string InnerText
644 if (_nodetype == HtmlNodeType.Text)
646 return ((HtmlTextNode)this).Text;
649 if (_nodetype == HtmlNodeType.Comment)
651 return ((HtmlCommentNode)this).Comment;
654 // note: right now, this method is *slow*, because we recompute everything.
655 // it could be optimised like innerhtml
656 if (!HasChildNodes)
658 return string.Empty;
661 string s = null;
662 foreach(HtmlNode node in ChildNodes)
664 s += node.InnerText;
666 return s;
670 /// <summary>
671 /// Gets or Sets the HTML between the start and end tags of the object.
672 /// </summary>
673 public virtual string InnerHtml
677 if (_innerchanged)
679 _innerhtml = WriteContentTo();
680 _innerchanged = false;
681 return _innerhtml;
683 if (_innerhtml != null)
685 return _innerhtml;
688 if (_innerstartindex < 0)
690 return string.Empty;
693 return _ownerdocument._text.Substring(_innerstartindex, _innerlength);
697 HtmlDocument doc = new HtmlDocument();
698 doc.LoadHtml(value);
700 RemoveAllChildren();
701 AppendChildren(doc.DocumentNode.ChildNodes);
705 /// <summary>
706 /// Gets or Sets the object and its content in HTML.
707 /// </summary>
708 public virtual string OuterHtml
712 if (_outerchanged)
714 _outerhtml = WriteTo();
715 _outerchanged = false;
716 return _outerhtml;
719 if (_outerhtml != null)
721 return _outerhtml;
724 if (_outerstartindex < 0)
726 return string.Empty;
729 return _ownerdocument._text.Substring(_outerstartindex, _outerlength);
733 /// <summary>
734 /// Creates a duplicate of the node
735 /// </summary>
736 /// <returns></returns>
737 public HtmlNode Clone()
739 return CloneNode(true);
742 /// <summary>
743 /// Creates a duplicate of the node and changes its name at the same time.
744 /// </summary>
745 /// <param name="newName">The new name of the cloned node. May not be null.</param>
746 /// <returns>The cloned node.</returns>
747 public HtmlNode CloneNode(string newName)
749 return CloneNode(newName, true);
752 /// <summary>
753 /// Creates a duplicate of the node and changes its name at the same time.
754 /// </summary>
755 /// <param name="newName">The new name of the cloned node. May not be null.</param>
756 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
757 /// <returns>The cloned node.</returns>
758 public HtmlNode CloneNode(string newName, bool deep)
760 if (newName == null)
762 throw new ArgumentNullException("newName");
765 HtmlNode node = CloneNode(deep);
766 node._name = newName;
767 return node;
770 /// <summary>
771 /// Creates a duplicate of the node.
772 /// </summary>
773 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
774 /// <returns>The cloned node.</returns>
775 public HtmlNode CloneNode(bool deep)
777 HtmlNode node = _ownerdocument.CreateNode(_nodetype);
778 node._name = Name;
780 switch(_nodetype)
782 case HtmlNodeType.Comment:
783 ((HtmlCommentNode)node).Comment = ((HtmlCommentNode)this).Comment;
784 return node;
786 case HtmlNodeType.Text:
787 ((HtmlTextNode)node).Text = ((HtmlTextNode)this).Text;
788 return node;
791 // attributes
792 if (HasAttributes)
794 foreach(HtmlAttribute att in _attributes)
796 HtmlAttribute newatt = att.Clone();
797 node.Attributes.Append(newatt);
801 // closing attributes
802 if (HasClosingAttributes)
804 node._endnode = _endnode.CloneNode(false);
805 foreach(HtmlAttribute att in _endnode._attributes)
807 HtmlAttribute newatt = att.Clone();
808 node._endnode._attributes.Append(newatt);
811 if (!deep)
813 return node;
816 if (!HasChildNodes)
818 return node;
821 // child nodes
822 foreach(HtmlNode child in _childnodes)
824 HtmlNode newchild = child.Clone();
825 node.AppendChild(newchild);
827 return node;
830 /// <summary>
831 /// Gets the HTML node immediately following this element.
832 /// </summary>
833 public HtmlNode NextSibling
837 return _nextnode;
841 /// <summary>
842 /// Gets the node immediately preceding this node.
843 /// </summary>
844 public HtmlNode PreviousSibling
848 return _prevnode;
852 /// <summary>
853 /// Removes all the children and/or attributes of the current node.
854 /// </summary>
855 public void RemoveAll()
857 RemoveAllChildren();
859 if (HasAttributes)
861 _attributes.Clear();
864 if ((_endnode != null) && (_endnode != this))
866 if (_endnode._attributes != null)
868 _endnode._attributes.Clear();
871 _outerchanged = true;
872 _innerchanged = true;
875 /// <summary>
876 /// Removes all the children of the current node.
877 /// </summary>
878 public void RemoveAllChildren()
880 if (!HasChildNodes)
882 return;
885 if (_ownerdocument.OptionUseIdAttribute)
887 // remove nodes from id list
888 foreach(HtmlNode node in _childnodes)
890 _ownerdocument.SetIdForNode(null, node.GetId());
893 _childnodes.Clear();
894 _outerchanged = true;
895 _innerchanged = true;
898 /// <summary>
899 /// Removes the specified child node.
900 /// </summary>
901 /// <param name="oldChild">The node being removed. May not be null.</param>
902 /// <returns>The node removed.</returns>
903 public HtmlNode RemoveChild(HtmlNode oldChild)
905 if (oldChild == null)
907 throw new ArgumentNullException("oldChild");
910 int index = -1;
912 if (_childnodes != null)
914 index = _childnodes[oldChild];
917 if (index == -1)
919 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
922 _childnodes.Remove(index);
924 _ownerdocument.SetIdForNode(null, oldChild.GetId());
925 _outerchanged = true;
926 _innerchanged = true;
927 return oldChild;
930 /// <summary>
931 /// Removes the specified child node.
932 /// </summary>
933 /// <param name="oldChild">The node being removed. May not be null.</param>
934 /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
935 /// <returns>The node removed.</returns>
936 public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
938 if (oldChild == null)
940 throw new ArgumentNullException("oldChild");
943 if ((oldChild._childnodes != null) && keepGrandChildren)
945 // get prev sibling
946 HtmlNode prev = oldChild.PreviousSibling;
948 // reroute grand children to ourselves
949 foreach(HtmlNode grandchild in oldChild._childnodes)
951 InsertAfter(grandchild, prev);
954 RemoveChild(oldChild);
955 _outerchanged = true;
956 _innerchanged = true;
957 return oldChild;
960 /// <summary>
961 /// Replaces the child node oldChild with newChild node.
962 /// </summary>
963 /// <param name="newChild">The new node to put in the child list.</param>
964 /// <param name="oldChild">The node being replaced in the list.</param>
965 /// <returns>The node replaced.</returns>
966 public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
968 if (newChild == null)
970 return RemoveChild(oldChild);
973 if (oldChild == null)
975 return AppendChild(newChild);
978 int index = -1;
980 if (_childnodes != null)
982 index = _childnodes[oldChild];
985 if (index == -1)
987 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
990 _childnodes.Replace(index, newChild);
992 _ownerdocument.SetIdForNode(null, oldChild.GetId());
993 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
994 _outerchanged = true;
995 _innerchanged = true;
996 return newChild;
999 /// <summary>
1000 /// Inserts the specified node immediately before the specified reference node.
1001 /// </summary>
1002 /// <param name="newChild">The node to insert. May not be null.</param>
1003 /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
1004 /// <returns>The node being inserted.</returns>
1005 public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
1007 if (newChild == null)
1009 throw new ArgumentNullException("newChild");
1012 if (refChild == null)
1014 return AppendChild(newChild);
1017 if (newChild == refChild)
1019 return newChild;
1022 int index = -1;
1024 if (_childnodes != null)
1026 index = _childnodes[refChild];
1029 if (index == -1)
1031 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
1034 _childnodes.Insert(index, newChild);
1036 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1037 _outerchanged = true;
1038 _innerchanged = true;
1039 return newChild;
1042 /// <summary>
1043 /// Inserts the specified node immediately after the specified reference node.
1044 /// </summary>
1045 /// <param name="newChild">The node to insert. May not be null.</param>
1046 /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
1047 /// <returns>The node being inserted.</returns>
1048 public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)
1050 if (newChild == null)
1052 throw new ArgumentNullException("newChild");
1055 if (refChild == null)
1057 return PrependChild(newChild);
1060 if (newChild == refChild)
1062 return newChild;
1065 int index = -1;
1067 if (_childnodes != null)
1069 index = _childnodes[refChild];
1071 if (index == -1)
1073 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
1076 _childnodes.Insert(index + 1, newChild);
1078 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1079 _outerchanged = true;
1080 _innerchanged = true;
1081 return newChild;
1084 /// <summary>
1085 /// Gets the first child of the node.
1086 /// </summary>
1087 public HtmlNode FirstChild
1091 if (!HasChildNodes)
1093 return null;
1095 return _childnodes[0];
1099 /// <summary>
1100 /// Gets the last child of the node.
1101 /// </summary>
1102 public HtmlNode LastChild
1106 if (!HasChildNodes)
1108 return null;
1110 return _childnodes[_childnodes.Count-1];
1114 /// <summary>
1115 /// Gets the type of this node.
1116 /// </summary>
1117 public HtmlNodeType NodeType
1121 return _nodetype;
1125 /// <summary>
1126 /// Gets the parent of this node (for nodes that can have parents).
1127 /// </summary>
1128 public HtmlNode ParentNode
1132 return _parentnode;
1136 /// <summary>
1137 /// Gets the HtmlDocument to which this node belongs.
1138 /// </summary>
1139 public HtmlDocument OwnerDocument
1143 return _ownerdocument;
1147 /// <summary>
1148 /// Gets all the children of the node.
1149 /// </summary>
1150 public HtmlNodeCollection ChildNodes
1154 if (_childnodes == null)
1156 _childnodes = new HtmlNodeCollection(this);
1158 return _childnodes;
1162 /// <summary>
1163 /// Adds the specified node to the beginning of the list of children of this node.
1164 /// </summary>
1165 /// <param name="newChild">The node to add. May not be null.</param>
1166 /// <returns>The node added.</returns>
1167 public HtmlNode PrependChild(HtmlNode newChild)
1169 if (newChild == null)
1171 throw new ArgumentNullException("newChild");
1173 ChildNodes.Prepend(newChild);
1174 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1175 _outerchanged = true;
1176 _innerchanged = true;
1177 return newChild;
1180 /// <summary>
1181 /// Adds the specified node list to the beginning of the list of children of this node.
1182 /// </summary>
1183 /// <param name="newChildren">The node list to add. May not be null.</param>
1184 public void PrependChildren(HtmlNodeCollection newChildren)
1186 if (newChildren == null)
1188 throw new ArgumentNullException("newChildren");
1191 foreach(HtmlNode newChild in newChildren)
1193 PrependChild(newChild);
1197 /// <summary>
1198 /// Adds the specified node to the end of the list of children of this node.
1199 /// </summary>
1200 /// <param name="newChild">The node to add. May not be null.</param>
1201 /// <returns>The node added.</returns>
1202 public HtmlNode AppendChild(HtmlNode newChild)
1204 if (newChild == null)
1206 throw new ArgumentNullException("newChild");
1209 ChildNodes.Append(newChild);
1210 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1211 _outerchanged = true;
1212 _innerchanged = true;
1213 return newChild;
1216 /// <summary>
1217 /// Adds the specified node to the end of the list of children of this node.
1218 /// </summary>
1219 /// <param name="newChildren">The node list to add. May not be null.</param>
1220 public void AppendChildren(HtmlNodeCollection newChildren)
1222 if (newChildren == null)
1223 throw new ArgumentNullException("newChildrend");
1225 foreach(HtmlNode newChild in newChildren)
1227 AppendChild(newChild);
1231 /// <summary>
1232 /// Gets a value indicating whether the current node has any attributes.
1233 /// </summary>
1234 public bool HasAttributes
1238 if (_attributes == null)
1240 return false;
1243 if (_attributes.Count <= 0)
1245 return false;
1247 return true;
1251 /// <summary>
1252 /// Gets a value indicating whether the current node has any attributes on the closing tag.
1253 /// </summary>
1254 public bool HasClosingAttributes
1258 if ((_endnode == null) || (_endnode == this))
1260 return false;
1263 if (_endnode._attributes == null)
1265 return false;
1268 if (_endnode._attributes.Count <= 0)
1270 return false;
1272 return true;
1276 /// <summary>
1277 /// Gets a value indicating whether this node has any child nodes.
1278 /// </summary>
1279 public bool HasChildNodes
1283 if (_childnodes == null)
1285 return false;
1288 if (_childnodes.Count <= 0)
1290 return false;
1292 return true;
1296 /// <summary>
1297 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1298 /// </summary>
1299 /// <param name="name">The name of the attribute to get. May not be null.</param>
1300 /// <param name="def">The default value to return if not found.</param>
1301 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1302 public string GetAttributeValue(string name, string def)
1304 if (name == null)
1306 throw new ArgumentNullException("name");
1309 if (!HasAttributes)
1311 return def;
1313 HtmlAttribute att = Attributes[name];
1314 if (att == null)
1316 return def;
1318 return att.Value;
1321 /// <summary>
1322 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1323 /// </summary>
1324 /// <param name="name">The name of the attribute to get. May not be null.</param>
1325 /// <param name="def">The default value to return if not found.</param>
1326 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1327 public int GetAttributeValue(string name, int def)
1329 if (name == null)
1331 throw new ArgumentNullException("name");
1334 if (!HasAttributes)
1336 return def;
1338 HtmlAttribute att = Attributes[name];
1339 if (att == null)
1341 return def;
1345 return Convert.ToInt32(att.Value);
1347 catch
1349 return def;
1353 /// <summary>
1354 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1355 /// </summary>
1356 /// <param name="name">The name of the attribute to get. May not be null.</param>
1357 /// <param name="def">The default value to return if not found.</param>
1358 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1359 public bool GetAttributeValue(string name, bool def)
1361 if (name == null)
1363 throw new ArgumentNullException("name");
1366 if (!HasAttributes)
1368 return def;
1370 HtmlAttribute att = Attributes[name];
1371 if (att == null)
1373 return def;
1377 return Convert.ToBoolean(att.Value);
1379 catch
1381 return def;
1385 /// <summary>
1386 /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
1387 /// </summary>
1388 /// <param name="name">The name of the attribute to set. May not be null.</param>
1389 /// <param name="value">The value for the attribute.</param>
1390 /// <returns>The corresponding attribute instance.</returns>
1391 public HtmlAttribute SetAttributeValue(string name, string value)
1393 if (name == null)
1395 throw new ArgumentNullException("name");
1397 HtmlAttribute att = Attributes[name];
1398 if (att == null)
1400 return Attributes.Append(_ownerdocument.CreateAttribute(name, value));
1402 att.Value = value;
1403 return att;
1406 /// <summary>
1407 /// Gets the collection of HTML attributes for this node. May not be null.
1408 /// </summary>
1409 public HtmlAttributeCollection Attributes
1413 if (!HasAttributes)
1415 _attributes = new HtmlAttributeCollection(this);
1417 return _attributes;
1421 /// <summary>
1422 /// Gets the collection of HTML attributes for the closing tag. May not be null.
1423 /// </summary>
1424 public HtmlAttributeCollection ClosingAttributes
1428 if (!HasClosingAttributes)
1430 return new HtmlAttributeCollection(this);
1432 return _endnode.Attributes;
1436 internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
1438 string name;
1440 if (_ownerdocument.OptionOutputAsXml)
1442 if (_ownerdocument.OptionOutputUpperCase)
1444 name = att.XmlName.ToUpper();
1446 else
1448 name = att.XmlName;
1451 outText.Write(" " + name + "=\"" + HtmlDocument.HtmlEncode(att.XmlValue) + "\"");
1453 else
1455 if (_ownerdocument.OptionOutputUpperCase)
1457 name = att.Name.ToUpper();
1459 else
1461 name = att.Name;
1464 if (att.Name.Length >= 4)
1466 if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
1467 (att.Name[att.Name.Length-1] == '>') && (att.Name[att.Name.Length-2] == '%'))
1469 outText.Write(" " + name);
1470 return;
1473 if (_ownerdocument.OptionOutputOptimizeAttributeValues)
1475 if (att.Value.IndexOfAny(new Char[]{(char)10, (char)13, (char)9, ' '}) < 0)
1477 outText.Write(" " + name + "=" + att.Value);
1479 else
1481 outText.Write(" " + name + "=\"" + att.Value + "\"");
1484 else
1486 outText.Write(" " + name + "=\"" + att.Value + "\"");
1491 internal static void WriteAttributes(XmlWriter writer, HtmlNode node)
1493 if (!node.HasAttributes)
1495 return;
1497 // we use _hashitems to make sure attributes are written only once
1498 foreach(HtmlAttribute att in node.Attributes._hashitems.Values)
1500 writer.WriteAttributeString(att.XmlName, att.Value);
1504 internal void WriteAttributes(TextWriter outText, bool closing)
1506 if (_ownerdocument.OptionOutputAsXml)
1508 if (_attributes == null)
1510 return;
1512 // we use _hashitems to make sure attributes are written only once
1513 foreach(HtmlAttribute att in _attributes._hashitems.Values)
1515 WriteAttribute(outText, att);
1517 return;
1520 if (!closing)
1522 if (_attributes != null)
1525 foreach(HtmlAttribute att in _attributes)
1527 WriteAttribute(outText, att);
1530 if (_ownerdocument.OptionAddDebuggingAttributes)
1532 WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
1533 WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
1535 int i = 0;
1536 foreach(HtmlNode n in ChildNodes)
1538 WriteAttribute(outText, _ownerdocument.CreateAttribute("_child_" + i,
1539 n.Name));
1540 i++;
1544 else
1546 if (_endnode == null)
1548 return;
1551 if (_endnode._attributes == null)
1553 return;
1556 if (_endnode == this)
1558 return;
1561 foreach(HtmlAttribute att in _endnode._attributes)
1563 WriteAttribute(outText, att);
1565 if (_ownerdocument.OptionAddDebuggingAttributes)
1567 WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
1568 WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
1573 internal static string GetXmlComment(HtmlCommentNode comment)
1575 string s = comment.Comment;
1576 return s.Substring(4, s.Length-7).Replace("--", " - -");
1579 /// <summary>
1580 /// Saves the current node to the specified TextWriter.
1581 /// </summary>
1582 /// <param name="outText">The TextWriter to which you want to save.</param>
1583 public void WriteTo(TextWriter outText)
1585 string html;
1586 switch(_nodetype)
1588 case HtmlNodeType.Comment:
1589 html = ((HtmlCommentNode)this).Comment;
1590 if (_ownerdocument.OptionOutputAsXml)
1592 outText.Write("<!--" + GetXmlComment((HtmlCommentNode)this) + " -->");
1594 else
1596 outText.Write(html);
1598 break;
1600 case HtmlNodeType.Document:
1601 if (_ownerdocument.OptionOutputAsXml)
1603 outText.Write("<?xml version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName + "\"?>");
1605 // check there is a root element
1606 if (_ownerdocument.DocumentNode.HasChildNodes)
1608 int rootnodes = _ownerdocument.DocumentNode._childnodes.Count;
1609 if (rootnodes > 0)
1611 HtmlNode xml = _ownerdocument.GetXmlDeclaration();
1612 if (xml != null)
1614 rootnodes --;
1617 if (rootnodes > 1)
1619 if (_ownerdocument.OptionOutputUpperCase)
1621 outText.Write("<SPAN>");
1622 WriteContentTo(outText);
1623 outText.Write("</SPAN>");
1625 else
1627 outText.Write("<span>");
1628 WriteContentTo(outText);
1629 outText.Write("</span>");
1631 break;
1636 WriteContentTo(outText);
1637 break;
1639 case HtmlNodeType.Text:
1640 html = ((HtmlTextNode)this).Text;
1641 if (_ownerdocument.OptionOutputAsXml)
1643 outText.Write(HtmlDocument.HtmlEncode(html));
1645 else
1647 outText.Write(html);
1649 break;
1651 case HtmlNodeType.Element:
1652 string name;
1653 if (_ownerdocument.OptionOutputUpperCase)
1655 name = Name.ToUpper();
1657 else
1659 name = Name;
1662 if (_ownerdocument.OptionOutputAsXml)
1664 if (name.Length > 0)
1666 if (name[0] == '?')
1668 // forget this one, it's been done at the document level
1669 break;
1672 if (name.Trim().Length == 0)
1674 break;
1676 name = HtmlAttribute.GetXmlName(name);
1678 else
1680 break;
1684 outText.Write("<" + name);
1685 WriteAttributes(outText, false);
1687 if (!HasChildNodes)
1689 if (HtmlNode.IsEmptyElement(Name))
1691 if ((_ownerdocument.OptionWriteEmptyNodes) || (_ownerdocument.OptionOutputAsXml))
1693 outText.Write(" />");
1695 else
1697 if (Name.Length > 0)
1699 if (Name[0] == '?')
1701 outText.Write("?");
1705 outText.Write(">");
1708 else
1710 outText.Write("></" + name + ">");
1713 else
1715 outText.Write(">");
1716 bool cdata = false;
1717 if (_ownerdocument.OptionOutputAsXml)
1719 if (HtmlNode.IsCDataElement(Name))
1721 // this code and the following tries to output things as nicely as possible for old browsers.
1722 cdata = true;
1723 outText.Write("\r\n//<![CDATA[\r\n");
1727 if (cdata)
1729 if (HasChildNodes)
1731 // child must be a text
1732 ChildNodes[0].WriteTo(outText);
1734 outText.Write("\r\n//]]>//\r\n");
1736 else
1738 WriteContentTo(outText);
1741 outText.Write("</" + name);
1742 if (!_ownerdocument.OptionOutputAsXml)
1744 WriteAttributes(outText, true);
1746 outText.Write(">");
1748 break;
1752 /// <summary>
1753 /// Saves the current node to the specified XmlWriter.
1754 /// </summary>
1755 /// <param name="writer">The XmlWriter to which you want to save.</param>
1756 public void WriteTo(XmlWriter writer)
1758 string html;
1759 switch(_nodetype)
1761 case HtmlNodeType.Comment:
1762 writer.WriteComment(GetXmlComment((HtmlCommentNode)this));
1763 break;
1765 case HtmlNodeType.Document:
1766 writer.WriteProcessingInstruction("xml", "version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName + "\"");
1767 if (HasChildNodes)
1769 foreach(HtmlNode subnode in ChildNodes)
1771 subnode.WriteTo(writer);
1774 break;
1776 case HtmlNodeType.Text:
1777 html = ((HtmlTextNode)this).Text;
1778 writer.WriteString(html);
1779 break;
1781 case HtmlNodeType.Element:
1782 string name;
1783 if (_ownerdocument.OptionOutputUpperCase)
1785 name = Name.ToUpper();
1787 else
1789 name = Name;
1791 writer.WriteStartElement(name);
1792 WriteAttributes(writer, this);
1794 if (HasChildNodes)
1796 foreach(HtmlNode subnode in ChildNodes)
1798 subnode.WriteTo(writer);
1801 writer.WriteEndElement();
1802 break;
1806 /// <summary>
1807 /// Saves all the children of the node to the specified TextWriter.
1808 /// </summary>
1809 /// <param name="outText">The TextWriter to which you want to save.</param>
1810 public void WriteContentTo(TextWriter outText)
1812 if (_childnodes == null)
1814 return;
1817 foreach(HtmlNode node in _childnodes)
1819 node.WriteTo(outText);
1823 /// <summary>
1824 /// Saves the current node to a string.
1825 /// </summary>
1826 /// <returns>The saved string.</returns>
1827 public string WriteTo()
1829 StringWriter sw = new StringWriter();
1830 WriteTo(sw);
1831 sw.Flush();
1832 return sw.ToString();
1835 /// <summary>
1836 /// Saves all the children of the node to a string.
1837 /// </summary>
1838 /// <returns>The saved string.</returns>
1839 public string WriteContentTo()
1841 StringWriter sw = new StringWriter();
1842 WriteContentTo(sw);
1843 sw.Flush();
1844 return sw.ToString();
1848 /// <summary>
1849 /// Represents a combined list and collection of HTML nodes.
1850 /// </summary>
1851 public class HtmlNodeCollection: IEnumerable
1853 private ArrayList _items = new ArrayList();
1854 private HtmlNode _parentnode;
1856 internal HtmlNodeCollection(HtmlNode parentnode)
1858 _parentnode = parentnode; // may be null
1861 /// <summary>
1862 /// Gets the number of elements actually contained in the list.
1863 /// </summary>
1864 public int Count
1868 return _items.Count;
1872 internal void Clear()
1874 foreach(HtmlNode node in _items)
1876 node._parentnode = null;
1877 node._nextnode = null;
1878 node._prevnode = null;
1880 _items.Clear();
1883 internal void Remove(int index)
1885 HtmlNode next = null;
1886 HtmlNode prev = null;
1887 HtmlNode oldnode = (HtmlNode)_items[index];
1889 if (index > 0)
1891 prev = (HtmlNode)_items[index-1];
1894 if (index < (_items.Count-1))
1896 next = (HtmlNode)_items[index+1];
1899 _items.RemoveAt(index);
1901 if (prev != null)
1903 if (next == prev)
1905 throw new InvalidProgramException("Unexpected error.");
1907 prev._nextnode = next;
1910 if (next != null)
1912 next._prevnode = prev;
1915 oldnode._prevnode = null;
1916 oldnode._nextnode = null;
1917 oldnode._parentnode = null;
1920 internal void Replace(int index, HtmlNode node)
1922 HtmlNode next = null;
1923 HtmlNode prev = null;
1924 HtmlNode oldnode = (HtmlNode)_items[index];
1926 if (index>0)
1928 prev = (HtmlNode)_items[index-1];
1931 if (index<(_items.Count-1))
1933 next = (HtmlNode)_items[index+1];
1936 _items[index] = node;
1938 if (prev != null)
1940 if (node == prev)
1942 throw new InvalidProgramException("Unexpected error.");
1944 prev._nextnode = node;
1947 if (next!=null)
1949 next._prevnode = node;
1952 node._prevnode = prev;
1953 if (next == node)
1955 throw new InvalidProgramException("Unexpected error.");
1957 node._nextnode = next;
1958 node._parentnode = _parentnode;
1960 oldnode._prevnode = null;
1961 oldnode._nextnode = null;
1962 oldnode._parentnode = null;
1965 internal void Insert(int index, HtmlNode node)
1967 HtmlNode next = null;
1968 HtmlNode prev = null;
1970 if (index>0)
1972 prev = (HtmlNode)_items[index-1];
1975 if (index<_items.Count)
1977 next = (HtmlNode)_items[index];
1980 _items.Insert(index, node);
1982 if (prev != null)
1984 if (node == prev)
1986 throw new InvalidProgramException("Unexpected error.");
1988 prev._nextnode = node;
1991 if (next != null)
1993 next._prevnode = node;
1996 node._prevnode = prev;
1998 if (next == node)
2000 throw new InvalidProgramException("Unexpected error.");
2003 node._nextnode = next;
2004 node._parentnode = _parentnode;
2007 internal void Append(HtmlNode node)
2009 HtmlNode last = null;
2010 if (_items.Count > 0)
2012 last = (HtmlNode)_items[_items.Count-1];
2015 _items.Add(node);
2016 node._prevnode = last;
2017 node._nextnode = null;
2018 node._parentnode = _parentnode;
2019 if (last != null)
2021 if (last == node)
2023 throw new InvalidProgramException("Unexpected error.");
2025 last._nextnode = node;
2029 internal void Prepend(HtmlNode node)
2031 HtmlNode first = null;
2032 if (_items.Count > 0)
2034 first = (HtmlNode)_items[0];
2037 _items.Insert(0, node);
2039 if (node == first)
2041 throw new InvalidProgramException("Unexpected error.");
2043 node._nextnode = first;
2044 node._prevnode = null;
2045 node._parentnode = _parentnode;
2046 if (first != null)
2048 first._prevnode = node;
2052 internal void Add(HtmlNode node)
2054 _items.Add(node);
2057 /// <summary>
2058 /// Gets the node at the specified index.
2059 /// </summary>
2060 public HtmlNode this[int index]
2064 return _items[index] as HtmlNode;
2068 internal int GetNodeIndex(HtmlNode node)
2070 // TODO: should we rewrite this? what would be the key of a node?
2071 for(int i=0;i<_items.Count;i++)
2073 if (node == ((HtmlNode)_items[i]))
2075 return i;
2078 return -1;
2081 /// <summary>
2082 /// Gets a given node from the list.
2083 /// </summary>
2084 public int this[HtmlNode node]
2088 int index = GetNodeIndex(node);
2089 if (index == -1)
2091 throw new ArgumentOutOfRangeException("node", "Node \"" + node.CloneNode(false).OuterHtml + "\" was not found in the collection");
2093 return index;
2097 /// <summary>
2098 /// Returns an enumerator that can iterate through the list.
2099 /// </summary>
2100 /// <returns>An IEnumerator for the entire list.</returns>
2101 public HtmlNodeEnumerator GetEnumerator()
2103 return new HtmlNodeEnumerator(_items);
2106 IEnumerator IEnumerable.GetEnumerator()
2108 return GetEnumerator();
2111 /// <summary>
2112 /// Represents an enumerator that can iterate through the list.
2113 /// </summary>
2114 public class HtmlNodeEnumerator: IEnumerator
2116 int _index;
2117 ArrayList _items;
2119 internal HtmlNodeEnumerator(ArrayList items)
2121 _items = items;
2122 _index = -1;
2125 /// <summary>
2126 /// Sets the enumerator to its initial position, which is before the first element in the collection.
2127 /// </summary>
2128 public void Reset()
2130 _index = -1;
2133 /// <summary>
2134 /// Advances the enumerator to the next element of the collection.
2135 /// </summary>
2136 /// <returns>true if the enumerator was successfully advanced to the next element, false if the enumerator has passed the end of the collection.</returns>
2137 public bool MoveNext()
2139 _index++;
2140 return (_index<_items.Count);
2143 /// <summary>
2144 /// Gets the current element in the collection.
2145 /// </summary>
2146 public HtmlNode Current
2148 get
2150 return (HtmlNode)(_items[_index]);
2154 /// <summary>
2155 /// Gets the current element in the collection.
2156 /// </summary>
2157 object IEnumerator.Current
2159 get
2161 return (Current);
2167 /// <summary>
2168 /// Represents an HTML text node.
2169 /// </summary>
2170 public class HtmlTextNode: HtmlNode
2172 private string _text;
2174 internal HtmlTextNode(HtmlDocument ownerdocument, int index):
2175 base(HtmlNodeType.Text, ownerdocument, index)
2179 /// <summary>
2180 /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
2181 /// </summary>
2182 public override string InnerHtml
2186 return OuterHtml;
2190 _text = value;
2194 /// <summary>
2195 /// Gets or Sets the object and its content in HTML.
2196 /// </summary>
2197 public override string OuterHtml
2201 if (_text == null)
2203 return base.OuterHtml;
2205 return _text;
2209 /// <summary>
2210 /// Gets or Sets the text of the node.
2211 /// </summary>
2212 public string Text
2216 if (_text == null)
2218 return base.OuterHtml;
2220 return _text;
2224 _text = value;
2229 /// <summary>
2230 /// Represents an HTML comment.
2231 /// </summary>
2232 public class HtmlCommentNode: HtmlNode
2234 private string _comment;
2236 internal HtmlCommentNode(HtmlDocument ownerdocument, int index):
2237 base(HtmlNodeType.Comment, ownerdocument, index)
2241 /// <summary>
2242 /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
2243 /// </summary>
2244 public override string InnerHtml
2248 if (_comment == null)
2250 return base.InnerHtml;
2252 return _comment;
2256 _comment = value;
2260 /// <summary>
2261 /// Gets or Sets the object and its content in HTML.
2262 /// </summary>
2263 public override string OuterHtml
2267 if (_comment == null)
2269 return base.OuterHtml;
2271 return "<!--" + _comment + "-->";
2275 /// <summary>
2276 /// Gets or Sets the comment text of the node.
2277 /// </summary>
2278 public string Comment
2282 if (_comment == null)
2284 return base.InnerHtml;
2286 return _comment;
2290 _comment = value;