2 Copyright (C) 2003 Simon Mourier <simonm@microsoft.com>
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 using System
.Collections
;
33 using System
.Xml
.XPath
;
35 namespace HtmlAgilityPack
38 /// Flags that describe the behavior of an Element node.
40 public enum HtmlElementFlag
43 /// The node is a CDATA node.
48 /// The node is empty. META or IMG are example of such nodes.
53 /// The node will automatically be closed during parsing.
58 /// The node can overlap.
64 /// Represents the type of a node.
66 public enum HtmlNodeType
69 /// The root of a document.
84 /// A text node is always the child of an element or a document node.
90 /// Represents an HTML node.
92 public class HtmlNode
: IXPathNavigable
95 /// Gets the name of a comment node. It is actually defined as '#comment'.
97 public static readonly string HtmlNodeTypeNameComment
= "#comment";
100 /// Gets the name of the document node. It is actually defined as '#document'.
102 public static readonly string HtmlNodeTypeNameDocument
= "#document";
105 /// Gets the name of a text node. It is actually defined as '#text'.
107 public static readonly string HtmlNodeTypeNameText
= "#text";
110 /// Gets a collection of flags that define specific behaviors for specific element nodes.
111 /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
113 public static Hashtable ElementsFlags
;
115 internal HtmlNodeType _nodetype
;
116 internal HtmlNode _nextnode
;
117 internal HtmlNode _prevnode
;
118 internal HtmlNode _parentnode
;
119 internal HtmlDocument _ownerdocument
;
120 internal HtmlNodeCollection _childnodes
;
121 internal HtmlAttributeCollection _attributes
;
122 internal int _line
= 0;
123 internal int _lineposition
= 0;
124 internal int _streamposition
= 0;
125 internal int _innerstartindex
= 0;
126 internal int _innerlength
= 0;
127 internal int _outerstartindex
= 0;
128 internal int _outerlength
= 0;
129 internal int _namestartindex
= 0;
130 internal int _namelength
= 0;
131 internal bool _starttag
= false;
132 internal string _name
;
133 internal HtmlNode _prevwithsamename
= null;
134 internal HtmlNode _endnode
;
136 internal bool _innerchanged
= false;
137 internal bool _outerchanged
= false;
138 internal string _innerhtml
;
139 internal string _outerhtml
;
143 // tags whose content may be anything
144 ElementsFlags
= new Hashtable();
145 ElementsFlags
.Add("script", HtmlElementFlag
.CData
);
146 ElementsFlags
.Add("style", HtmlElementFlag
.CData
);
147 ElementsFlags
.Add("noxhtml", HtmlElementFlag
.CData
);
149 // tags that can not contain other tags
150 ElementsFlags
.Add("base", HtmlElementFlag
.Empty
);
151 ElementsFlags
.Add("link", HtmlElementFlag
.Empty
);
152 ElementsFlags
.Add("meta", HtmlElementFlag
.Empty
);
153 ElementsFlags
.Add("isindex", HtmlElementFlag
.Empty
);
154 ElementsFlags
.Add("hr", HtmlElementFlag
.Empty
);
155 ElementsFlags
.Add("col", HtmlElementFlag
.Empty
);
156 ElementsFlags
.Add("img", HtmlElementFlag
.Empty
);
157 ElementsFlags
.Add("param", HtmlElementFlag
.Empty
);
158 ElementsFlags
.Add("embed", HtmlElementFlag
.Empty
);
159 ElementsFlags
.Add("frame", HtmlElementFlag
.Empty
);
160 ElementsFlags
.Add("wbr", HtmlElementFlag
.Empty
);
161 ElementsFlags
.Add("bgsound", HtmlElementFlag
.Empty
);
162 ElementsFlags
.Add("spacer", HtmlElementFlag
.Empty
);
163 ElementsFlags
.Add("keygen", HtmlElementFlag
.Empty
);
164 ElementsFlags
.Add("area", HtmlElementFlag
.Empty
);
165 ElementsFlags
.Add("input", HtmlElementFlag
.Empty
);
166 ElementsFlags
.Add("basefont", HtmlElementFlag
.Empty
);
168 //ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
169 ElementsFlags
.Add("form", HtmlElementFlag
.CanOverlap
);
171 // they sometimes contain, and sometimes they don 't...
172 ElementsFlags
.Add("option", HtmlElementFlag
.Empty
);
174 // tag whose closing tag is equivalent to open tag:
175 // <p>bla</p>bla will be transformed into <p>bla</p>bla
176 // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
178 ElementsFlags
.Add("br", HtmlElementFlag
.Empty
| HtmlElementFlag
.Closed
);
179 ElementsFlags
.Add("p", HtmlElementFlag
.Empty
| HtmlElementFlag
.Closed
);
183 /// Determines if an element node is closed.
185 /// <param name="name">The name of the element node to check. May not be null.</param>
186 /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
187 public static bool IsClosedElement(string name
)
191 throw new ArgumentNullException("name");
194 object flag
= ElementsFlags
[name
.ToLower()];
199 return (((HtmlElementFlag
)flag
)&HtmlElementFlag
.Closed
) != 0;
203 /// Determines if an element node can be kept overlapped.
205 /// <param name="name">The name of the element node to check. May not be null.</param>
206 /// <returns>true if the name is the name of an element node that can be kept overlapped, false otherwise.</returns>
207 public static bool CanOverlapElement(string name
)
211 throw new ArgumentNullException("name");
214 object flag
= ElementsFlags
[name
.ToLower()];
219 return (((HtmlElementFlag
)flag
)&HtmlElementFlag
.CanOverlap
) != 0;
223 /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
225 /// <param name="text">The text to check. May not be null.</param>
226 /// <returns>true or false.</returns>
227 public static bool IsOverlappedClosingElement(string text
)
231 throw new ArgumentNullException("text");
234 if (text
.Length
<= 4)
237 if ((text
[0] != '<') ||
238 (text
[text
.Length
- 1] != '>') ||
242 string name
= text
.Substring(2, text
.Length
- 3);
243 return CanOverlapElement(name
);
247 /// Determines if an element node is a CDATA element node.
249 /// <param name="name">The name of the element node to check. May not be null.</param>
250 /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
251 public static bool IsCDataElement(string name
)
255 throw new ArgumentNullException("name");
258 object flag
= ElementsFlags
[name
.ToLower()];
263 return (((HtmlElementFlag
)flag
)&HtmlElementFlag
.CData
) != 0;
267 /// Determines if an element node is defined as empty.
269 /// <param name="name">The name of the element node to check. May not be null.</param>
270 /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
271 public static bool IsEmptyElement(string name
)
275 throw new ArgumentNullException("name");
278 if (name
.Length
== 0)
295 object flag
= ElementsFlags
[name
.ToLower()];
300 return (((HtmlElementFlag
)flag
)&HtmlElementFlag
.Empty
) != 0;
304 /// Creates an HTML node from a string representing literal HTML.
306 /// <param name="html">The HTML text.</param>
307 /// <returns>The newly created node instance.</returns>
308 public static HtmlNode
CreateNode(string html
)
310 // REVIEW: this is *not* optimum...
311 HtmlDocument doc
= new HtmlDocument();
313 return doc
.DocumentNode
.FirstChild
;
317 /// Creates a duplicate of the node and the subtree under it.
319 /// <param name="node">The node to duplicate. May not be null.</param>
320 public void CopyFrom(HtmlNode node
)
322 CopyFrom(node
, true);
326 /// Creates a duplicate of the node.
328 /// <param name="node">The node to duplicate. May not be null.</param>
329 /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
330 public void CopyFrom(HtmlNode node
, bool deep
)
334 throw new ArgumentNullException("node");
337 Attributes
.RemoveAll();
338 if (node
.HasAttributes
)
340 foreach(HtmlAttribute att
in node
.Attributes
)
342 SetAttributeValue(att
.Name
, att
.Value
);
349 if (node
.HasChildNodes
)
351 foreach(HtmlNode child
in node
.ChildNodes
)
353 AppendChild(child
.CloneNode(true));
359 internal HtmlNode(HtmlNodeType type
, HtmlDocument ownerdocument
, int index
)
362 _ownerdocument
= ownerdocument
;
363 _outerstartindex
= index
;
367 case HtmlNodeType
.Comment
:
368 _name
= HtmlNodeTypeNameComment
;
372 case HtmlNodeType
.Document
:
373 _name
= HtmlNodeTypeNameDocument
;
377 case HtmlNodeType
.Text
:
378 _name
= HtmlNodeTypeNameText
;
383 if (_ownerdocument
._openednodes
!= null)
387 // we use the index as the key
389 // -1 means the node comes from public
392 _ownerdocument
._openednodes
.Add(index
, this);
397 if ((-1 == index
) && (type
!= HtmlNodeType
.Comment
) && (type
!= HtmlNodeType
.Text
))
399 // innerhtml and outerhtml must be calculated
400 _outerchanged
= true;
401 _innerchanged
= true;
405 internal void CloseNode(HtmlNode endnode
)
407 if (!_ownerdocument
.OptionAutoCloseOnEnd
)
409 // close all children
410 if (_childnodes
!= null)
412 foreach(HtmlNode child
in _childnodes
)
417 // create a fake closer node
418 HtmlNode close
= new HtmlNode(NodeType
, _ownerdocument
, -1);
419 close
._endnode
= close
;
420 child
.CloseNode(close
);
429 if (_ownerdocument
._openednodes
!= null)
431 _ownerdocument
._openednodes
.Remove(_outerstartindex
);
434 HtmlNode self
= _ownerdocument
._lastnodes
[Name
] as HtmlNode
;
437 _ownerdocument
._lastnodes
.Remove(Name
);
438 _ownerdocument
.UpdateLastParentNode();
444 // create an inner section
445 _innerstartindex
= _outerstartindex
+ _outerlength
;
446 _innerlength
= endnode
._outerstartindex
- _innerstartindex
;
448 // update full length
449 _outerlength
= (endnode
._outerstartindex
+ endnode
._outerlength
) - _outerstartindex
;
453 internal HtmlNode EndNode
461 internal string GetId()
463 HtmlAttribute att
= Attributes
["id"];
471 internal void SetId(string id
)
473 HtmlAttribute att
= Attributes
["id"];
476 att
= _ownerdocument
.CreateAttribute("id");
479 _ownerdocument
.SetIdForNode(this, att
.Value
);
480 _outerchanged
= true;
484 /// Creates a new XPathNavigator object for navigating this HTML node.
486 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
487 public XPathNavigator
CreateNavigator()
489 return new HtmlNodeNavigator(_ownerdocument
, this);
493 /// Selects the first XmlNode that matches the XPath expression.
495 /// <param name="xpath">The XPath expression. May not be null.</param>
496 /// <returns>The first HtmlNode that matches the XPath query or a null reference if no matching node was found.</returns>
497 public HtmlNode
SelectSingleNode(string xpath
)
501 throw new ArgumentNullException("xpath");
504 HtmlNodeNavigator nav
= new HtmlNodeNavigator(_ownerdocument
, this);
505 XPathNodeIterator it
= nav
.Select(xpath
);
511 HtmlNodeNavigator node
= (HtmlNodeNavigator
)it
.Current
;
512 return node
.CurrentNode
;
516 /// Selects a list of nodes matching the XPath expression.
518 /// <param name="xpath">The XPath expression.</param>
519 /// <returns>An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.</returns>
520 public HtmlNodeCollection
SelectNodes(string xpath
)
522 HtmlNodeCollection list
= new HtmlNodeCollection(null);
524 HtmlNodeNavigator nav
= new HtmlNodeNavigator(_ownerdocument
, this);
525 XPathNodeIterator it
= nav
.Select(xpath
);
526 while (it
.MoveNext())
528 HtmlNodeNavigator n
= (HtmlNodeNavigator
)it
.Current
;
529 list
.Add(n
.CurrentNode
);
539 /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
545 if (_ownerdocument
._nodesid
== null)
547 throw new Exception(HtmlDocument
.HtmlExceptionUseIdAttributeFalse
);
553 if (_ownerdocument
._nodesid
== null)
555 throw new Exception(HtmlDocument
.HtmlExceptionUseIdAttributeFalse
);
560 throw new ArgumentNullException("value");
575 /// Gets the line number of this node in the document.
586 /// Gets the column number of this node in the document.
588 public int LinePosition
592 return _lineposition
;
597 /// Gets the stream position of this node in the document, relative to the start of the document.
599 public int StreamPosition
603 return _streamposition
;
608 /// Gets a value indicating if this node has been closed or not.
614 return (_endnode
!= null);
619 /// Gets or sets this node's name.
627 _name
= _ownerdocument
._text
.Substring(_namestartindex
, _namelength
).ToLower();
638 /// Gets or Sets the text between the start and end tags of the object.
640 public virtual string InnerText
644 if (_nodetype
== HtmlNodeType
.Text
)
646 return ((HtmlTextNode
)this).Text
;
649 if (_nodetype
== HtmlNodeType
.Comment
)
651 return ((HtmlCommentNode
)this).Comment
;
654 // note: right now, this method is *slow*, because we recompute everything.
655 // it could be optimised like innerhtml
662 foreach(HtmlNode node
in ChildNodes
)
671 /// Gets or Sets the HTML between the start and end tags of the object.
673 public virtual string InnerHtml
679 _innerhtml
= WriteContentTo();
680 _innerchanged
= false;
683 if (_innerhtml
!= null)
688 if (_innerstartindex
< 0)
693 return _ownerdocument
._text
.Substring(_innerstartindex
, _innerlength
);
697 HtmlDocument doc
= new HtmlDocument();
701 AppendChildren(doc
.DocumentNode
.ChildNodes
);
706 /// Gets or Sets the object and its content in HTML.
708 public virtual string OuterHtml
714 _outerhtml
= WriteTo();
715 _outerchanged
= false;
719 if (_outerhtml
!= null)
724 if (_outerstartindex
< 0)
729 return _ownerdocument
._text
.Substring(_outerstartindex
, _outerlength
);
734 /// Creates a duplicate of the node
736 /// <returns></returns>
737 public HtmlNode
Clone()
739 return CloneNode(true);
743 /// Creates a duplicate of the node and changes its name at the same time.
745 /// <param name="newName">The new name of the cloned node. May not be null.</param>
746 /// <returns>The cloned node.</returns>
747 public HtmlNode
CloneNode(string newName
)
749 return CloneNode(newName
, true);
753 /// Creates a duplicate of the node and changes its name at the same time.
755 /// <param name="newName">The new name of the cloned node. May not be null.</param>
756 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
757 /// <returns>The cloned node.</returns>
758 public HtmlNode
CloneNode(string newName
, bool deep
)
762 throw new ArgumentNullException("newName");
765 HtmlNode node
= CloneNode(deep
);
766 node
._name
= newName
;
771 /// Creates a duplicate of the node.
773 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
774 /// <returns>The cloned node.</returns>
775 public HtmlNode
CloneNode(bool deep
)
777 HtmlNode node
= _ownerdocument
.CreateNode(_nodetype
);
782 case HtmlNodeType
.Comment
:
783 ((HtmlCommentNode
)node
).Comment
= ((HtmlCommentNode
)this).Comment
;
786 case HtmlNodeType
.Text
:
787 ((HtmlTextNode
)node
).Text
= ((HtmlTextNode
)this).Text
;
794 foreach(HtmlAttribute att
in _attributes
)
796 HtmlAttribute newatt
= att
.Clone();
797 node
.Attributes
.Append(newatt
);
801 // closing attributes
802 if (HasClosingAttributes
)
804 node
._endnode
= _endnode
.CloneNode(false);
805 foreach(HtmlAttribute att
in _endnode
._attributes
)
807 HtmlAttribute newatt
= att
.Clone();
808 node
._endnode
._attributes
.Append(newatt
);
822 foreach(HtmlNode child
in _childnodes
)
824 HtmlNode newchild
= child
.Clone();
825 node
.AppendChild(newchild
);
831 /// Gets the HTML node immediately following this element.
833 public HtmlNode NextSibling
842 /// Gets the node immediately preceding this node.
844 public HtmlNode PreviousSibling
853 /// Removes all the children and/or attributes of the current node.
855 public void RemoveAll()
864 if ((_endnode
!= null) && (_endnode
!= this))
866 if (_endnode
._attributes
!= null)
868 _endnode
._attributes
.Clear();
871 _outerchanged
= true;
872 _innerchanged
= true;
876 /// Removes all the children of the current node.
878 public void RemoveAllChildren()
885 if (_ownerdocument
.OptionUseIdAttribute
)
887 // remove nodes from id list
888 foreach(HtmlNode node
in _childnodes
)
890 _ownerdocument
.SetIdForNode(null, node
.GetId());
894 _outerchanged
= true;
895 _innerchanged
= true;
899 /// Removes the specified child node.
901 /// <param name="oldChild">The node being removed. May not be null.</param>
902 /// <returns>The node removed.</returns>
903 public HtmlNode
RemoveChild(HtmlNode oldChild
)
905 if (oldChild
== null)
907 throw new ArgumentNullException("oldChild");
912 if (_childnodes
!= null)
914 index
= _childnodes
[oldChild
];
919 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
922 _childnodes
.Remove(index
);
924 _ownerdocument
.SetIdForNode(null, oldChild
.GetId());
925 _outerchanged
= true;
926 _innerchanged
= true;
931 /// Removes the specified child node.
933 /// <param name="oldChild">The node being removed. May not be null.</param>
934 /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
935 /// <returns>The node removed.</returns>
936 public HtmlNode
RemoveChild(HtmlNode oldChild
, bool keepGrandChildren
)
938 if (oldChild
== null)
940 throw new ArgumentNullException("oldChild");
943 if ((oldChild
._childnodes
!= null) && keepGrandChildren
)
946 HtmlNode prev
= oldChild
.PreviousSibling
;
948 // reroute grand children to ourselves
949 foreach(HtmlNode grandchild
in oldChild
._childnodes
)
951 InsertAfter(grandchild
, prev
);
954 RemoveChild(oldChild
);
955 _outerchanged
= true;
956 _innerchanged
= true;
961 /// Replaces the child node oldChild with newChild node.
963 /// <param name="newChild">The new node to put in the child list.</param>
964 /// <param name="oldChild">The node being replaced in the list.</param>
965 /// <returns>The node replaced.</returns>
966 public HtmlNode
ReplaceChild(HtmlNode newChild
, HtmlNode oldChild
)
968 if (newChild
== null)
970 return RemoveChild(oldChild
);
973 if (oldChild
== null)
975 return AppendChild(newChild
);
980 if (_childnodes
!= null)
982 index
= _childnodes
[oldChild
];
987 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
990 _childnodes
.Replace(index
, newChild
);
992 _ownerdocument
.SetIdForNode(null, oldChild
.GetId());
993 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
994 _outerchanged
= true;
995 _innerchanged
= true;
1000 /// Inserts the specified node immediately before the specified reference node.
1002 /// <param name="newChild">The node to insert. May not be null.</param>
1003 /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
1004 /// <returns>The node being inserted.</returns>
1005 public HtmlNode
InsertBefore(HtmlNode newChild
, HtmlNode refChild
)
1007 if (newChild
== null)
1009 throw new ArgumentNullException("newChild");
1012 if (refChild
== null)
1014 return AppendChild(newChild
);
1017 if (newChild
== refChild
)
1024 if (_childnodes
!= null)
1026 index
= _childnodes
[refChild
];
1031 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
1034 _childnodes
.Insert(index
, newChild
);
1036 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1037 _outerchanged
= true;
1038 _innerchanged
= true;
1043 /// Inserts the specified node immediately after the specified reference node.
1045 /// <param name="newChild">The node to insert. May not be null.</param>
1046 /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
1047 /// <returns>The node being inserted.</returns>
1048 public HtmlNode
InsertAfter(HtmlNode newChild
, HtmlNode refChild
)
1050 if (newChild
== null)
1052 throw new ArgumentNullException("newChild");
1055 if (refChild
== null)
1057 return PrependChild(newChild
);
1060 if (newChild
== refChild
)
1067 if (_childnodes
!= null)
1069 index
= _childnodes
[refChild
];
1073 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
1076 _childnodes
.Insert(index
+ 1, newChild
);
1078 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1079 _outerchanged
= true;
1080 _innerchanged
= true;
1085 /// Gets the first child of the node.
1087 public HtmlNode FirstChild
1095 return _childnodes
[0];
1100 /// Gets the last child of the node.
1102 public HtmlNode LastChild
1110 return _childnodes
[_childnodes
.Count
-1];
1115 /// Gets the type of this node.
1117 public HtmlNodeType NodeType
1126 /// Gets the parent of this node (for nodes that can have parents).
1128 public HtmlNode ParentNode
1137 /// Gets the HtmlDocument to which this node belongs.
1139 public HtmlDocument OwnerDocument
1143 return _ownerdocument
;
1148 /// Gets all the children of the node.
1150 public HtmlNodeCollection ChildNodes
1154 if (_childnodes
== null)
1156 _childnodes
= new HtmlNodeCollection(this);
1163 /// Adds the specified node to the beginning of the list of children of this node.
1165 /// <param name="newChild">The node to add. May not be null.</param>
1166 /// <returns>The node added.</returns>
1167 public HtmlNode
PrependChild(HtmlNode newChild
)
1169 if (newChild
== null)
1171 throw new ArgumentNullException("newChild");
1173 ChildNodes
.Prepend(newChild
);
1174 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1175 _outerchanged
= true;
1176 _innerchanged
= true;
1181 /// Adds the specified node list to the beginning of the list of children of this node.
1183 /// <param name="newChildren">The node list to add. May not be null.</param>
1184 public void PrependChildren(HtmlNodeCollection newChildren
)
1186 if (newChildren
== null)
1188 throw new ArgumentNullException("newChildren");
1191 foreach(HtmlNode newChild
in newChildren
)
1193 PrependChild(newChild
);
1198 /// Adds the specified node to the end of the list of children of this node.
1200 /// <param name="newChild">The node to add. May not be null.</param>
1201 /// <returns>The node added.</returns>
1202 public HtmlNode
AppendChild(HtmlNode newChild
)
1204 if (newChild
== null)
1206 throw new ArgumentNullException("newChild");
1209 ChildNodes
.Append(newChild
);
1210 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1211 _outerchanged
= true;
1212 _innerchanged
= true;
1217 /// Adds the specified node to the end of the list of children of this node.
1219 /// <param name="newChildren">The node list to add. May not be null.</param>
1220 public void AppendChildren(HtmlNodeCollection newChildren
)
1222 if (newChildren
== null)
1223 throw new ArgumentNullException("newChildrend");
1225 foreach(HtmlNode newChild
in newChildren
)
1227 AppendChild(newChild
);
1232 /// Gets a value indicating whether the current node has any attributes.
1234 public bool HasAttributes
1238 if (_attributes
== null)
1243 if (_attributes
.Count
<= 0)
1252 /// Gets a value indicating whether the current node has any attributes on the closing tag.
1254 public bool HasClosingAttributes
1258 if ((_endnode
== null) || (_endnode
== this))
1263 if (_endnode
._attributes
== null)
1268 if (_endnode
._attributes
.Count
<= 0)
1277 /// Gets a value indicating whether this node has any child nodes.
1279 public bool HasChildNodes
1283 if (_childnodes
== null)
1288 if (_childnodes
.Count
<= 0)
1297 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1299 /// <param name="name">The name of the attribute to get. May not be null.</param>
1300 /// <param name="def">The default value to return if not found.</param>
1301 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1302 public string GetAttributeValue(string name
, string def
)
1306 throw new ArgumentNullException("name");
1313 HtmlAttribute att
= Attributes
[name
];
1322 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1324 /// <param name="name">The name of the attribute to get. May not be null.</param>
1325 /// <param name="def">The default value to return if not found.</param>
1326 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1327 public int GetAttributeValue(string name
, int def
)
1331 throw new ArgumentNullException("name");
1338 HtmlAttribute att
= Attributes
[name
];
1345 return Convert
.ToInt32(att
.Value
);
1354 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1356 /// <param name="name">The name of the attribute to get. May not be null.</param>
1357 /// <param name="def">The default value to return if not found.</param>
1358 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1359 public bool GetAttributeValue(string name
, bool def
)
1363 throw new ArgumentNullException("name");
1370 HtmlAttribute att
= Attributes
[name
];
1377 return Convert
.ToBoolean(att
.Value
);
1386 /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
1388 /// <param name="name">The name of the attribute to set. May not be null.</param>
1389 /// <param name="value">The value for the attribute.</param>
1390 /// <returns>The corresponding attribute instance.</returns>
1391 public HtmlAttribute
SetAttributeValue(string name
, string value)
1395 throw new ArgumentNullException("name");
1397 HtmlAttribute att
= Attributes
[name
];
1400 return Attributes
.Append(_ownerdocument
.CreateAttribute(name
, value));
1407 /// Gets the collection of HTML attributes for this node. May not be null.
1409 public HtmlAttributeCollection Attributes
1415 _attributes
= new HtmlAttributeCollection(this);
1422 /// Gets the collection of HTML attributes for the closing tag. May not be null.
1424 public HtmlAttributeCollection ClosingAttributes
1428 if (!HasClosingAttributes
)
1430 return new HtmlAttributeCollection(this);
1432 return _endnode
.Attributes
;
1436 internal void WriteAttribute(TextWriter outText
, HtmlAttribute att
)
1440 if (_ownerdocument
.OptionOutputAsXml
)
1442 if (_ownerdocument
.OptionOutputUpperCase
)
1444 name
= att
.XmlName
.ToUpper();
1451 outText
.Write(" " + name
+ "=\"" + HtmlDocument
.HtmlEncode(att
.XmlValue
) + "\"");
1455 if (_ownerdocument
.OptionOutputUpperCase
)
1457 name
= att
.Name
.ToUpper();
1464 if (att
.Name
.Length
>= 4)
1466 if ((att
.Name
[0] == '<') && (att
.Name
[1] == '%') &&
1467 (att
.Name
[att
.Name
.Length
-1] == '>') && (att
.Name
[att
.Name
.Length
-2] == '%'))
1469 outText
.Write(" " + name
);
1473 if (_ownerdocument
.OptionOutputOptimizeAttributeValues
)
1475 if (att
.Value
.IndexOfAny(new Char
[]{(char)10, (char)13, (char)9, ' '}
) < 0)
1477 outText
.Write(" " + name
+ "=" + att
.Value
);
1481 outText
.Write(" " + name
+ "=\"" + att
.Value
+ "\"");
1486 outText
.Write(" " + name
+ "=\"" + att
.Value
+ "\"");
1491 internal static void WriteAttributes(XmlWriter writer
, HtmlNode node
)
1493 if (!node
.HasAttributes
)
1497 // we use _hashitems to make sure attributes are written only once
1498 foreach(HtmlAttribute att
in node
.Attributes
._hashitems
.Values
)
1500 writer
.WriteAttributeString(att
.XmlName
, att
.Value
);
1504 internal void WriteAttributes(TextWriter outText
, bool closing
)
1506 if (_ownerdocument
.OptionOutputAsXml
)
1508 if (_attributes
== null)
1512 // we use _hashitems to make sure attributes are written only once
1513 foreach(HtmlAttribute att
in _attributes
._hashitems
.Values
)
1515 WriteAttribute(outText
, att
);
1522 if (_attributes
!= null)
1525 foreach(HtmlAttribute att
in _attributes
)
1527 WriteAttribute(outText
, att
);
1530 if (_ownerdocument
.OptionAddDebuggingAttributes
)
1532 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_closed", Closed
.ToString()));
1533 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_children", ChildNodes
.Count
.ToString()));
1536 foreach(HtmlNode n
in ChildNodes
)
1538 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_child_" + i
,
1546 if (_endnode
== null)
1551 if (_endnode
._attributes
== null)
1556 if (_endnode
== this)
1561 foreach(HtmlAttribute att
in _endnode
._attributes
)
1563 WriteAttribute(outText
, att
);
1565 if (_ownerdocument
.OptionAddDebuggingAttributes
)
1567 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_closed", Closed
.ToString()));
1568 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_children", ChildNodes
.Count
.ToString()));
1573 internal static string GetXmlComment(HtmlCommentNode comment
)
1575 string s
= comment
.Comment
;
1576 return s
.Substring(4, s
.Length
-7).Replace("--", " - -");
1580 /// Saves the current node to the specified TextWriter.
1582 /// <param name="outText">The TextWriter to which you want to save.</param>
1583 public void WriteTo(TextWriter outText
)
1588 case HtmlNodeType
.Comment
:
1589 html
= ((HtmlCommentNode
)this).Comment
;
1590 if (_ownerdocument
.OptionOutputAsXml
)
1592 outText
.Write("<!--" + GetXmlComment((HtmlCommentNode
)this) + " -->");
1596 outText
.Write(html
);
1600 case HtmlNodeType
.Document
:
1601 if (_ownerdocument
.OptionOutputAsXml
)
1603 outText
.Write("<?xml version=\"1.0\" encoding=\"" + _ownerdocument
.GetOutEncoding().BodyName
+ "\"?>");
1605 // check there is a root element
1606 if (_ownerdocument
.DocumentNode
.HasChildNodes
)
1608 int rootnodes
= _ownerdocument
.DocumentNode
._childnodes
.Count
;
1611 HtmlNode xml
= _ownerdocument
.GetXmlDeclaration();
1619 if (_ownerdocument
.OptionOutputUpperCase
)
1621 outText
.Write("<SPAN>");
1622 WriteContentTo(outText
);
1623 outText
.Write("</SPAN>");
1627 outText
.Write("<span>");
1628 WriteContentTo(outText
);
1629 outText
.Write("</span>");
1636 WriteContentTo(outText
);
1639 case HtmlNodeType
.Text
:
1640 html
= ((HtmlTextNode
)this).Text
;
1641 if (_ownerdocument
.OptionOutputAsXml
)
1643 outText
.Write(HtmlDocument
.HtmlEncode(html
));
1647 outText
.Write(html
);
1651 case HtmlNodeType
.Element
:
1653 if (_ownerdocument
.OptionOutputUpperCase
)
1655 name
= Name
.ToUpper();
1662 if (_ownerdocument
.OptionOutputAsXml
)
1664 if (name
.Length
> 0)
1668 // forget this one, it's been done at the document level
1672 if (name
.Trim().Length
== 0)
1676 name
= HtmlAttribute
.GetXmlName(name
);
1684 outText
.Write("<" + name
);
1685 WriteAttributes(outText
, false);
1689 if (HtmlNode
.IsEmptyElement(Name
))
1691 if ((_ownerdocument
.OptionWriteEmptyNodes
) || (_ownerdocument
.OptionOutputAsXml
))
1693 outText
.Write(" />");
1697 if (Name
.Length
> 0)
1710 outText
.Write("></" + name
+ ">");
1717 if (_ownerdocument
.OptionOutputAsXml
)
1719 if (HtmlNode
.IsCDataElement(Name
))
1721 // this code and the following tries to output things as nicely as possible for old browsers.
1723 outText
.Write("\r\n//<![CDATA[\r\n");
1731 // child must be a text
1732 ChildNodes
[0].WriteTo(outText
);
1734 outText
.Write("\r\n//]]>//\r\n");
1738 WriteContentTo(outText
);
1741 outText
.Write("</" + name
);
1742 if (!_ownerdocument
.OptionOutputAsXml
)
1744 WriteAttributes(outText
, true);
1753 /// Saves the current node to the specified XmlWriter.
1755 /// <param name="writer">The XmlWriter to which you want to save.</param>
1756 public void WriteTo(XmlWriter writer
)
1761 case HtmlNodeType
.Comment
:
1762 writer
.WriteComment(GetXmlComment((HtmlCommentNode
)this));
1765 case HtmlNodeType
.Document
:
1766 writer
.WriteProcessingInstruction("xml", "version=\"1.0\" encoding=\"" + _ownerdocument
.GetOutEncoding().BodyName
+ "\"");
1769 foreach(HtmlNode subnode
in ChildNodes
)
1771 subnode
.WriteTo(writer
);
1776 case HtmlNodeType
.Text
:
1777 html
= ((HtmlTextNode
)this).Text
;
1778 writer
.WriteString(html
);
1781 case HtmlNodeType
.Element
:
1783 if (_ownerdocument
.OptionOutputUpperCase
)
1785 name
= Name
.ToUpper();
1791 writer
.WriteStartElement(name
);
1792 WriteAttributes(writer
, this);
1796 foreach(HtmlNode subnode
in ChildNodes
)
1798 subnode
.WriteTo(writer
);
1801 writer
.WriteEndElement();
1807 /// Saves all the children of the node to the specified TextWriter.
1809 /// <param name="outText">The TextWriter to which you want to save.</param>
1810 public void WriteContentTo(TextWriter outText
)
1812 if (_childnodes
== null)
1817 foreach(HtmlNode node
in _childnodes
)
1819 node
.WriteTo(outText
);
1824 /// Saves the current node to a string.
1826 /// <returns>The saved string.</returns>
1827 public string WriteTo()
1829 StringWriter sw
= new StringWriter();
1832 return sw
.ToString();
1836 /// Saves all the children of the node to a string.
1838 /// <returns>The saved string.</returns>
1839 public string WriteContentTo()
1841 StringWriter sw
= new StringWriter();
1844 return sw
.ToString();
1849 /// Represents a combined list and collection of HTML nodes.
1851 public class HtmlNodeCollection
: IEnumerable
1853 private ArrayList _items
= new ArrayList();
1854 private HtmlNode _parentnode
;
1856 internal HtmlNodeCollection(HtmlNode parentnode
)
1858 _parentnode
= parentnode
; // may be null
1862 /// Gets the number of elements actually contained in the list.
1868 return _items
.Count
;
1872 internal void Clear()
1874 foreach(HtmlNode node
in _items
)
1876 node
._parentnode
= null;
1877 node
._nextnode
= null;
1878 node
._prevnode
= null;
1883 internal void Remove(int index
)
1885 HtmlNode next
= null;
1886 HtmlNode prev
= null;
1887 HtmlNode oldnode
= (HtmlNode
)_items
[index
];
1891 prev
= (HtmlNode
)_items
[index
-1];
1894 if (index
< (_items
.Count
-1))
1896 next
= (HtmlNode
)_items
[index
+1];
1899 _items
.RemoveAt(index
);
1905 throw new InvalidProgramException("Unexpected error.");
1907 prev
._nextnode
= next
;
1912 next
._prevnode
= prev
;
1915 oldnode
._prevnode
= null;
1916 oldnode
._nextnode
= null;
1917 oldnode
._parentnode
= null;
1920 internal void Replace(int index
, HtmlNode node
)
1922 HtmlNode next
= null;
1923 HtmlNode prev
= null;
1924 HtmlNode oldnode
= (HtmlNode
)_items
[index
];
1928 prev
= (HtmlNode
)_items
[index
-1];
1931 if (index
<(_items
.Count
-1))
1933 next
= (HtmlNode
)_items
[index
+1];
1936 _items
[index
] = node
;
1942 throw new InvalidProgramException("Unexpected error.");
1944 prev
._nextnode
= node
;
1949 next
._prevnode
= node
;
1952 node
._prevnode
= prev
;
1955 throw new InvalidProgramException("Unexpected error.");
1957 node
._nextnode
= next
;
1958 node
._parentnode
= _parentnode
;
1960 oldnode
._prevnode
= null;
1961 oldnode
._nextnode
= null;
1962 oldnode
._parentnode
= null;
1965 internal void Insert(int index
, HtmlNode node
)
1967 HtmlNode next
= null;
1968 HtmlNode prev
= null;
1972 prev
= (HtmlNode
)_items
[index
-1];
1975 if (index
<_items
.Count
)
1977 next
= (HtmlNode
)_items
[index
];
1980 _items
.Insert(index
, node
);
1986 throw new InvalidProgramException("Unexpected error.");
1988 prev
._nextnode
= node
;
1993 next
._prevnode
= node
;
1996 node
._prevnode
= prev
;
2000 throw new InvalidProgramException("Unexpected error.");
2003 node
._nextnode
= next
;
2004 node
._parentnode
= _parentnode
;
2007 internal void Append(HtmlNode node
)
2009 HtmlNode last
= null;
2010 if (_items
.Count
> 0)
2012 last
= (HtmlNode
)_items
[_items
.Count
-1];
2016 node
._prevnode
= last
;
2017 node
._nextnode
= null;
2018 node
._parentnode
= _parentnode
;
2023 throw new InvalidProgramException("Unexpected error.");
2025 last
._nextnode
= node
;
2029 internal void Prepend(HtmlNode node
)
2031 HtmlNode first
= null;
2032 if (_items
.Count
> 0)
2034 first
= (HtmlNode
)_items
[0];
2037 _items
.Insert(0, node
);
2041 throw new InvalidProgramException("Unexpected error.");
2043 node
._nextnode
= first
;
2044 node
._prevnode
= null;
2045 node
._parentnode
= _parentnode
;
2048 first
._prevnode
= node
;
2052 internal void Add(HtmlNode node
)
2058 /// Gets the node at the specified index.
2060 public HtmlNode
this[int index
]
2064 return _items
[index
] as HtmlNode
;
2068 internal int GetNodeIndex(HtmlNode node
)
2070 // TODO: should we rewrite this? what would be the key of a node?
2071 for(int i
=0;i
<_items
.Count
;i
++)
2073 if (node
== ((HtmlNode
)_items
[i
]))
2082 /// Gets a given node from the list.
2084 public int this[HtmlNode node
]
2088 int index
= GetNodeIndex(node
);
2091 throw new ArgumentOutOfRangeException("node", "Node \"" + node
.CloneNode(false).OuterHtml
+ "\" was not found in the collection");
2098 /// Returns an enumerator that can iterate through the list.
2100 /// <returns>An IEnumerator for the entire list.</returns>
2101 public HtmlNodeEnumerator
GetEnumerator()
2103 return new HtmlNodeEnumerator(_items
);
2106 IEnumerator IEnumerable
.GetEnumerator()
2108 return GetEnumerator();
2112 /// Represents an enumerator that can iterate through the list.
2114 public class HtmlNodeEnumerator
: IEnumerator
2119 internal HtmlNodeEnumerator(ArrayList items
)
2126 /// Sets the enumerator to its initial position, which is before the first element in the collection.
2134 /// Advances the enumerator to the next element of the collection.
2136 /// <returns>true if the enumerator was successfully advanced to the next element, false if the enumerator has passed the end of the collection.</returns>
2137 public bool MoveNext()
2140 return (_index
<_items
.Count
);
2144 /// Gets the current element in the collection.
2146 public HtmlNode Current
2150 return (HtmlNode
)(_items
[_index
]);
2155 /// Gets the current element in the collection.
2157 object IEnumerator
.Current
2168 /// Represents an HTML text node.
2170 public class HtmlTextNode
: HtmlNode
2172 private string _text
;
2174 internal HtmlTextNode(HtmlDocument ownerdocument
, int index
):
2175 base(HtmlNodeType
.Text
, ownerdocument
, index
)
2180 /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
2182 public override string InnerHtml
2195 /// Gets or Sets the object and its content in HTML.
2197 public override string OuterHtml
2203 return base.OuterHtml
;
2210 /// Gets or Sets the text of the node.
2218 return base.OuterHtml
;
2230 /// Represents an HTML comment.
2232 public class HtmlCommentNode
: HtmlNode
2234 private string _comment
;
2236 internal HtmlCommentNode(HtmlDocument ownerdocument
, int index
):
2237 base(HtmlNodeType
.Comment
, ownerdocument
, index
)
2242 /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
2244 public override string InnerHtml
2248 if (_comment
== null)
2250 return base.InnerHtml
;
2261 /// Gets or Sets the object and its content in HTML.
2263 public override string OuterHtml
2267 if (_comment
== null)
2269 return base.OuterHtml
;
2271 return "<!--" + _comment
+ "-->";
2276 /// Gets or Sets the comment text of the node.
2278 public string Comment
2282 if (_comment
== null)
2284 return base.InnerHtml
;