2 Copyright (C) 2003 Simon Mourier <simonm@microsoft.com>
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 using System
.Collections
;
33 using System
.Xml
.XPath
;
35 namespace HtmlAgilityPack
38 /// Flags that describe the behavior of an Element node.
40 public enum HtmlElementFlag
43 /// The node is a CDATA node.
48 /// The node is empty. META or IMG are example of such nodes.
53 /// The node will automatically be closed during parsing.
58 /// The node can overlap.
64 /// Represents the type of a node.
66 public enum HtmlNodeType
69 /// The root of a document.
84 /// A text node is always the child of an element or a document node.
90 /// Represents an HTML node.
92 public class HtmlNode
: IXPathNavigable
95 /// Gets the name of a comment node. It is actually defined as '#comment'.
97 public static readonly string HtmlNodeTypeNameComment
= "#comment";
100 /// Gets the name of the document node. It is actually defined as '#document'.
102 public static readonly string HtmlNodeTypeNameDocument
= "#document";
105 /// Gets the name of a text node. It is actually defined as '#text'.
107 public static readonly string HtmlNodeTypeNameText
= "#text";
110 /// Gets a collection of flags that define specific behaviors for specific element nodes.
111 /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
113 public static Hashtable ElementsFlags
;
115 internal HtmlNodeType _nodetype
;
116 internal HtmlNode _nextnode
;
117 internal HtmlNode _prevnode
;
118 internal HtmlNode _parentnode
;
119 internal HtmlDocument _ownerdocument
;
120 internal HtmlNodeCollection _childnodes
;
121 internal HtmlAttributeCollection _attributes
;
122 internal int _line
= 0;
123 internal int _lineposition
= 0;
124 internal int _streamposition
= 0;
125 internal int _innerstartindex
= 0;
126 internal int _innerlength
= 0;
127 internal int _outerstartindex
= 0;
128 internal int _outerlength
= 0;
129 internal int _namestartindex
= 0;
130 internal int _namelength
= 0;
131 internal bool _starttag
= false;
132 internal string _name
;
133 internal HtmlNode _prevwithsamename
= null;
134 internal HtmlNode _endnode
;
136 internal bool _innerchanged
= false;
137 internal bool _outerchanged
= false;
138 internal string _innerhtml
;
139 internal string _outerhtml
;
143 // tags whose content may be anything
144 ElementsFlags
= new Hashtable();
145 ElementsFlags
.Add("script", HtmlElementFlag
.CData
);
146 ElementsFlags
.Add("style", HtmlElementFlag
.CData
);
147 ElementsFlags
.Add("noxhtml", HtmlElementFlag
.CData
);
149 // tags that can not contain other tags
150 ElementsFlags
.Add("base", HtmlElementFlag
.Empty
);
151 ElementsFlags
.Add("link", HtmlElementFlag
.Empty
);
152 ElementsFlags
.Add("meta", HtmlElementFlag
.Empty
);
153 ElementsFlags
.Add("isindex", HtmlElementFlag
.Empty
);
154 ElementsFlags
.Add("hr", HtmlElementFlag
.Empty
);
155 ElementsFlags
.Add("col", HtmlElementFlag
.Empty
);
156 ElementsFlags
.Add("img", HtmlElementFlag
.Empty
);
157 ElementsFlags
.Add("param", HtmlElementFlag
.Empty
);
158 ElementsFlags
.Add("embed", HtmlElementFlag
.Empty
);
159 ElementsFlags
.Add("frame", HtmlElementFlag
.Empty
);
160 ElementsFlags
.Add("wbr", HtmlElementFlag
.Empty
);
161 ElementsFlags
.Add("bgsound", HtmlElementFlag
.Empty
);
162 ElementsFlags
.Add("spacer", HtmlElementFlag
.Empty
);
163 ElementsFlags
.Add("keygen", HtmlElementFlag
.Empty
);
164 ElementsFlags
.Add("area", HtmlElementFlag
.Empty
);
165 ElementsFlags
.Add("input", HtmlElementFlag
.Empty
);
166 ElementsFlags
.Add("basefont", HtmlElementFlag
.Empty
);
168 //ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
169 ElementsFlags
.Add("form", HtmlElementFlag
.CanOverlap
);
171 // they sometimes contain, and sometimes they don 't...
172 ElementsFlags
.Add("option", HtmlElementFlag
.Empty
);
174 // tag whose closing tag is equivalent to open tag:
175 // <p>bla</p>bla will be transformed into <p>bla</p>bla
176 // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
178 ElementsFlags
.Add("br", HtmlElementFlag
.Empty
| HtmlElementFlag
.Closed
);
179 ElementsFlags
.Add("p", HtmlElementFlag
.Empty
| HtmlElementFlag
.Closed
);
183 /// Determines if an element node is closed.
185 /// <param name="name">The name of the element node to check. May not be null.</param>
186 /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
187 public static bool IsClosedElement(string name
)
191 throw new ArgumentNullException("name");
194 object flag
= ElementsFlags
[name
.ToLower()];
199 return (((HtmlElementFlag
)flag
)&HtmlElementFlag
.Closed
) != 0;
203 /// Determines if an element node can be kept overlapped.
205 /// <param name="name">The name of the element node to check. May not be null.</param>
206 /// <returns>true if the name is the name of an element node that can be kept overlapped, false otherwise.</returns>
207 public static bool CanOverlapElement(string name
)
211 throw new ArgumentNullException("name");
214 object flag
= ElementsFlags
[name
.ToLower()];
219 return (((HtmlElementFlag
)flag
)&HtmlElementFlag
.CanOverlap
) != 0;
223 /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
225 /// <param name="text">The text to check. May not be null.</param>
226 /// <returns>true or false.</returns>
227 public static bool IsOverlappedClosingElement(string text
)
231 throw new ArgumentNullException("text");
234 if (text
.Length
<= 4)
237 if ((text
[0] != '<') ||
238 (text
[text
.Length
- 1] != '>') ||
242 string name
= text
.Substring(2, text
.Length
- 3);
243 return CanOverlapElement(name
);
247 /// Determines if an element node is a CDATA element node.
249 /// <param name="name">The name of the element node to check. May not be null.</param>
250 /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
251 public static bool IsCDataElement(string name
)
255 throw new ArgumentNullException("name");
258 object flag
= ElementsFlags
[name
.ToLower()];
263 return (((HtmlElementFlag
)flag
)&HtmlElementFlag
.CData
) != 0;
267 /// Determines if an element node is defined as empty.
269 /// <param name="name">The name of the element node to check. May not be null.</param>
270 /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
271 public static bool IsEmptyElement(string name
)
275 throw new ArgumentNullException("name");
278 if (name
.Length
== 0)
295 object flag
= ElementsFlags
[name
.ToLower()];
300 return (((HtmlElementFlag
)flag
)&HtmlElementFlag
.Empty
) != 0;
304 /// Creates an HTML node from a string representing literal HTML.
306 /// <param name="html">The HTML text.</param>
307 /// <returns>The newly created node instance.</returns>
308 public static HtmlNode
CreateNode(string html
)
310 // REVIEW: this is *not* optimum...
311 HtmlDocument doc
= new HtmlDocument();
313 return doc
.DocumentNode
.FirstChild
;
317 /// Creates a duplicate of the node and the subtree under it.
319 /// <param name="node">The node to duplicate. May not be null.</param>
320 public void CopyFrom(HtmlNode node
)
322 CopyFrom(node
, true);
326 /// Creates a duplicate of the node.
328 /// <param name="node">The node to duplicate. May not be null.</param>
329 /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
330 public void CopyFrom(HtmlNode node
, bool deep
)
334 throw new ArgumentNullException("node");
337 Attributes
.RemoveAll();
338 if (node
.HasAttributes
)
340 foreach(HtmlAttribute att
in node
.Attributes
)
342 SetAttributeValue(att
.Name
, att
.Value
);
349 if (node
.HasChildNodes
)
351 foreach(HtmlNode child
in node
.ChildNodes
)
353 AppendChild(child
.CloneNode(true));
359 internal HtmlNode(HtmlNodeType type
, HtmlDocument ownerdocument
, int index
)
362 _ownerdocument
= ownerdocument
;
363 _outerstartindex
= index
;
367 case HtmlNodeType
.Comment
:
368 _name
= HtmlNodeTypeNameComment
;
372 case HtmlNodeType
.Document
:
373 _name
= HtmlNodeTypeNameDocument
;
377 case HtmlNodeType
.Text
:
378 _name
= HtmlNodeTypeNameText
;
383 if (_ownerdocument
._openednodes
!= null)
387 // we use the index as the key
389 // -1 means the node comes from public
392 _ownerdocument
._openednodes
.Add(index
, this);
397 if ((-1 == index
) && (type
!= HtmlNodeType
.Comment
) && (type
!= HtmlNodeType
.Text
))
399 // innerhtml and outerhtml must be calculated
400 _outerchanged
= true;
401 _innerchanged
= true;
405 internal void CloseNode(HtmlNode endnode
)
407 if (!_ownerdocument
.OptionAutoCloseOnEnd
)
409 // close all children
410 if (_childnodes
!= null)
412 foreach(HtmlNode child
in _childnodes
)
417 // create a fake closer node
418 HtmlNode close
= new HtmlNode(NodeType
, _ownerdocument
, -1);
419 close
._endnode
= close
;
420 child
.CloseNode(close
);
429 if (_ownerdocument
._openednodes
!= null)
431 _ownerdocument
._openednodes
.Remove(_outerstartindex
);
434 HtmlNode self
= _ownerdocument
._lastnodes
[Name
] as HtmlNode
;
437 _ownerdocument
._lastnodes
.Remove(Name
);
438 _ownerdocument
.UpdateLastParentNode();
444 // create an inner section
445 _innerstartindex
= _outerstartindex
+ _outerlength
;
446 _innerlength
= endnode
._outerstartindex
- _innerstartindex
;
448 // update full length
449 _outerlength
= (endnode
._outerstartindex
+ endnode
._outerlength
) - _outerstartindex
;
453 internal HtmlNode EndNode
461 internal string GetId()
463 HtmlAttribute att
= Attributes
["id"];
471 internal void SetId(string id
)
473 HtmlAttribute att
= Attributes
["id"];
476 att
= _ownerdocument
.CreateAttribute("id");
479 _ownerdocument
.SetIdForNode(this, att
.Value
);
480 _outerchanged
= true;
484 /// Creates a new XPathNavigator object for navigating this HTML node.
486 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
487 public XPathNavigator
CreateNavigator()
489 return new HtmlNodeNavigator(_ownerdocument
, this);
493 /// Selects the first XmlNode that matches the XPath expression.
495 /// <param name="xpath">The XPath expression. May not be null.</param>
496 /// <returns>The first HtmlNode that matches the XPath query or a null reference if no matching node was found.</returns>
497 public HtmlNode
SelectSingleNode(string xpath
)
501 throw new ArgumentNullException("xpath");
504 HtmlNodeNavigator nav
= new HtmlNodeNavigator(_ownerdocument
, this);
505 XPathNodeIterator it
= nav
.Select(xpath
);
511 HtmlNodeNavigator node
= (HtmlNodeNavigator
)it
.Current
;
512 return node
.CurrentNode
;
516 /// Selects a list of nodes matching the XPath expression.
518 /// <param name="xpath">The XPath expression.</param>
519 /// <returns>An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.</returns>
520 public HtmlNodeCollection
SelectNodes(string xpath
)
522 HtmlNodeCollection list
= new HtmlNodeCollection(null);
524 HtmlNodeNavigator nav
= new HtmlNodeNavigator(_ownerdocument
, this);
525 XPathNodeIterator it
= nav
.Select(xpath
);
526 while (it
.MoveNext())
528 HtmlNodeNavigator n
= (HtmlNodeNavigator
)it
.Current
;
529 list
.Add(n
.CurrentNode
);
539 /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
545 if (_ownerdocument
._nodesid
== null)
547 throw new Exception(HtmlDocument
.HtmlExceptionUseIdAttributeFalse
);
553 if (_ownerdocument
._nodesid
== null)
555 throw new Exception(HtmlDocument
.HtmlExceptionUseIdAttributeFalse
);
560 throw new ArgumentNullException("value");
567 /// Gets the line number of this node in the document.
578 /// Gets the column number of this node in the document.
580 public int LinePosition
584 return _lineposition
;
589 /// Gets the stream position of this node in the document, relative to the start of the document.
591 public int StreamPosition
595 return _streamposition
;
600 /// Gets a value indicating if this node has been closed or not.
606 return (_endnode
!= null);
611 /// Gets or sets this node's name.
619 _name
= _ownerdocument
._text
.Substring(_namestartindex
, _namelength
).ToLower();
630 /// Gets or Sets the text between the start and end tags of the object.
632 public virtual string InnerText
636 if (_nodetype
== HtmlNodeType
.Text
)
638 return ((HtmlTextNode
)this).Text
;
641 if (_nodetype
== HtmlNodeType
.Comment
)
643 return ((HtmlCommentNode
)this).Comment
;
646 // note: right now, this method is *slow*, because we recompute everything.
647 // it could be optimised like innerhtml
654 foreach(HtmlNode node
in ChildNodes
)
663 /// Gets or Sets the HTML between the start and end tags of the object.
665 public virtual string InnerHtml
671 _innerhtml
= WriteContentTo();
672 _innerchanged
= false;
675 if (_innerhtml
!= null)
680 if (_innerstartindex
< 0)
685 return _ownerdocument
._text
.Substring(_innerstartindex
, _innerlength
);
689 HtmlDocument doc
= new HtmlDocument();
693 AppendChildren(doc
.DocumentNode
.ChildNodes
);
698 /// Gets or Sets the object and its content in HTML.
700 public virtual string OuterHtml
706 _outerhtml
= WriteTo();
707 _outerchanged
= false;
711 if (_outerhtml
!= null)
716 if (_outerstartindex
< 0)
721 return _ownerdocument
._text
.Substring(_outerstartindex
, _outerlength
);
726 /// Creates a duplicate of the node
728 /// <returns></returns>
729 public HtmlNode
Clone()
731 return CloneNode(true);
735 /// Creates a duplicate of the node and changes its name at the same time.
737 /// <param name="newName">The new name of the cloned node. May not be null.</param>
738 /// <returns>The cloned node.</returns>
739 public HtmlNode
CloneNode(string newName
)
741 return CloneNode(newName
, true);
745 /// Creates a duplicate of the node and changes its name at the same time.
747 /// <param name="newName">The new name of the cloned node. May not be null.</param>
748 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
749 /// <returns>The cloned node.</returns>
750 public HtmlNode
CloneNode(string newName
, bool deep
)
754 throw new ArgumentNullException("newName");
757 HtmlNode node
= CloneNode(deep
);
758 node
._name
= newName
;
763 /// Creates a duplicate of the node.
765 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
766 /// <returns>The cloned node.</returns>
767 public HtmlNode
CloneNode(bool deep
)
769 HtmlNode node
= _ownerdocument
.CreateNode(_nodetype
);
774 case HtmlNodeType
.Comment
:
775 ((HtmlCommentNode
)node
).Comment
= ((HtmlCommentNode
)this).Comment
;
778 case HtmlNodeType
.Text
:
779 ((HtmlTextNode
)node
).Text
= ((HtmlTextNode
)this).Text
;
786 foreach(HtmlAttribute att
in _attributes
)
788 HtmlAttribute newatt
= att
.Clone();
789 node
.Attributes
.Append(newatt
);
793 // closing attributes
794 if (HasClosingAttributes
)
796 node
._endnode
= _endnode
.CloneNode(false);
797 foreach(HtmlAttribute att
in _endnode
._attributes
)
799 HtmlAttribute newatt
= att
.Clone();
800 node
._endnode
._attributes
.Append(newatt
);
814 foreach(HtmlNode child
in _childnodes
)
816 HtmlNode newchild
= child
.Clone();
817 node
.AppendChild(newchild
);
823 /// Gets the HTML node immediately following this element.
825 public HtmlNode NextSibling
834 /// Gets the node immediately preceding this node.
836 public HtmlNode PreviousSibling
845 /// Removes all the children and/or attributes of the current node.
847 public void RemoveAll()
856 if ((_endnode
!= null) && (_endnode
!= this))
858 if (_endnode
._attributes
!= null)
860 _endnode
._attributes
.Clear();
863 _outerchanged
= true;
864 _innerchanged
= true;
868 /// Removes all the children of the current node.
870 public void RemoveAllChildren()
877 if (_ownerdocument
.OptionUseIdAttribute
)
879 // remove nodes from id list
880 foreach(HtmlNode node
in _childnodes
)
882 _ownerdocument
.SetIdForNode(null, node
.GetId());
886 _outerchanged
= true;
887 _innerchanged
= true;
891 /// Removes the specified child node.
893 /// <param name="oldChild">The node being removed. May not be null.</param>
894 /// <returns>The node removed.</returns>
895 public HtmlNode
RemoveChild(HtmlNode oldChild
)
897 if (oldChild
== null)
899 throw new ArgumentNullException("oldChild");
904 if (_childnodes
!= null)
906 index
= _childnodes
[oldChild
];
911 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
914 _childnodes
.Remove(index
);
916 _ownerdocument
.SetIdForNode(null, oldChild
.GetId());
917 _outerchanged
= true;
918 _innerchanged
= true;
923 /// Removes the specified child node.
925 /// <param name="oldChild">The node being removed. May not be null.</param>
926 /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
927 /// <returns>The node removed.</returns>
928 public HtmlNode
RemoveChild(HtmlNode oldChild
, bool keepGrandChildren
)
930 if (oldChild
== null)
932 throw new ArgumentNullException("oldChild");
935 if ((oldChild
._childnodes
!= null) && keepGrandChildren
)
938 HtmlNode prev
= oldChild
.PreviousSibling
;
940 // reroute grand children to ourselves
941 foreach(HtmlNode grandchild
in oldChild
._childnodes
)
943 InsertAfter(grandchild
, prev
);
946 RemoveChild(oldChild
);
947 _outerchanged
= true;
948 _innerchanged
= true;
953 /// Replaces the child node oldChild with newChild node.
955 /// <param name="newChild">The new node to put in the child list.</param>
956 /// <param name="oldChild">The node being replaced in the list.</param>
957 /// <returns>The node replaced.</returns>
958 public HtmlNode
ReplaceChild(HtmlNode newChild
, HtmlNode oldChild
)
960 if (newChild
== null)
962 return RemoveChild(oldChild
);
965 if (oldChild
== null)
967 return AppendChild(newChild
);
972 if (_childnodes
!= null)
974 index
= _childnodes
[oldChild
];
979 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
982 _childnodes
.Replace(index
, newChild
);
984 _ownerdocument
.SetIdForNode(null, oldChild
.GetId());
985 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
986 _outerchanged
= true;
987 _innerchanged
= true;
992 /// Inserts the specified node immediately before the specified reference node.
994 /// <param name="newChild">The node to insert. May not be null.</param>
995 /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
996 /// <returns>The node being inserted.</returns>
997 public HtmlNode
InsertBefore(HtmlNode newChild
, HtmlNode refChild
)
999 if (newChild
== null)
1001 throw new ArgumentNullException("newChild");
1004 if (refChild
== null)
1006 return AppendChild(newChild
);
1009 if (newChild
== refChild
)
1016 if (_childnodes
!= null)
1018 index
= _childnodes
[refChild
];
1023 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
1026 _childnodes
.Insert(index
, newChild
);
1028 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1029 _outerchanged
= true;
1030 _innerchanged
= true;
1035 /// Inserts the specified node immediately after the specified reference node.
1037 /// <param name="newChild">The node to insert. May not be null.</param>
1038 /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
1039 /// <returns>The node being inserted.</returns>
1040 public HtmlNode
InsertAfter(HtmlNode newChild
, HtmlNode refChild
)
1042 if (newChild
== null)
1044 throw new ArgumentNullException("newChild");
1047 if (refChild
== null)
1049 return PrependChild(newChild
);
1052 if (newChild
== refChild
)
1059 if (_childnodes
!= null)
1061 index
= _childnodes
[refChild
];
1065 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
1068 _childnodes
.Insert(index
+ 1, newChild
);
1070 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1071 _outerchanged
= true;
1072 _innerchanged
= true;
1077 /// Gets the first child of the node.
1079 public HtmlNode FirstChild
1087 return _childnodes
[0];
1092 /// Gets the last child of the node.
1094 public HtmlNode LastChild
1102 return _childnodes
[_childnodes
.Count
-1];
1107 /// Gets the type of this node.
1109 public HtmlNodeType NodeType
1118 /// Gets the parent of this node (for nodes that can have parents).
1120 public HtmlNode ParentNode
1129 /// Gets the HtmlDocument to which this node belongs.
1131 public HtmlDocument OwnerDocument
1135 return _ownerdocument
;
1140 /// Gets all the children of the node.
1142 public HtmlNodeCollection ChildNodes
1146 if (_childnodes
== null)
1148 _childnodes
= new HtmlNodeCollection(this);
1155 /// Adds the specified node to the beginning of the list of children of this node.
1157 /// <param name="newChild">The node to add. May not be null.</param>
1158 /// <returns>The node added.</returns>
1159 public HtmlNode
PrependChild(HtmlNode newChild
)
1161 if (newChild
== null)
1163 throw new ArgumentNullException("newChild");
1165 ChildNodes
.Prepend(newChild
);
1166 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1167 _outerchanged
= true;
1168 _innerchanged
= true;
1173 /// Adds the specified node list to the beginning of the list of children of this node.
1175 /// <param name="newChildren">The node list to add. May not be null.</param>
1176 public void PrependChildren(HtmlNodeCollection newChildren
)
1178 if (newChildren
== null)
1180 throw new ArgumentNullException("newChildren");
1183 foreach(HtmlNode newChild
in newChildren
)
1185 PrependChild(newChild
);
1190 /// Adds the specified node to the end of the list of children of this node.
1192 /// <param name="newChild">The node to add. May not be null.</param>
1193 /// <returns>The node added.</returns>
1194 public HtmlNode
AppendChild(HtmlNode newChild
)
1196 if (newChild
== null)
1198 throw new ArgumentNullException("newChild");
1201 ChildNodes
.Append(newChild
);
1202 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1203 _outerchanged
= true;
1204 _innerchanged
= true;
1209 /// Adds the specified node to the end of the list of children of this node.
1211 /// <param name="newChildren">The node list to add. May not be null.</param>
1212 public void AppendChildren(HtmlNodeCollection newChildren
)
1214 if (newChildren
== null)
1215 throw new ArgumentNullException("newChildrend");
1217 foreach(HtmlNode newChild
in newChildren
)
1219 AppendChild(newChild
);
1224 /// Gets a value indicating whether the current node has any attributes.
1226 public bool HasAttributes
1230 if (_attributes
== null)
1235 if (_attributes
.Count
<= 0)
1244 /// Gets a value indicating whether the current node has any attributes on the closing tag.
1246 public bool HasClosingAttributes
1250 if ((_endnode
== null) || (_endnode
== this))
1255 if (_endnode
._attributes
== null)
1260 if (_endnode
._attributes
.Count
<= 0)
1269 /// Gets a value indicating whether this node has any child nodes.
1271 public bool HasChildNodes
1275 if (_childnodes
== null)
1280 if (_childnodes
.Count
<= 0)
1289 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1291 /// <param name="name">The name of the attribute to get. May not be null.</param>
1292 /// <param name="def">The default value to return if not found.</param>
1293 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1294 public string GetAttributeValue(string name
, string def
)
1298 throw new ArgumentNullException("name");
1305 HtmlAttribute att
= Attributes
[name
];
1314 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1316 /// <param name="name">The name of the attribute to get. May not be null.</param>
1317 /// <param name="def">The default value to return if not found.</param>
1318 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1319 public int GetAttributeValue(string name
, int def
)
1323 throw new ArgumentNullException("name");
1330 HtmlAttribute att
= Attributes
[name
];
1337 return Convert
.ToInt32(att
.Value
);
1346 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1348 /// <param name="name">The name of the attribute to get. May not be null.</param>
1349 /// <param name="def">The default value to return if not found.</param>
1350 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1351 public bool GetAttributeValue(string name
, bool def
)
1355 throw new ArgumentNullException("name");
1362 HtmlAttribute att
= Attributes
[name
];
1369 return Convert
.ToBoolean(att
.Value
);
1378 /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
1380 /// <param name="name">The name of the attribute to set. May not be null.</param>
1381 /// <param name="value">The value for the attribute.</param>
1382 /// <returns>The corresponding attribute instance.</returns>
1383 public HtmlAttribute
SetAttributeValue(string name
, string value)
1387 throw new ArgumentNullException("name");
1389 HtmlAttribute att
= Attributes
[name
];
1392 return Attributes
.Append(_ownerdocument
.CreateAttribute(name
, value));
1399 /// Gets the collection of HTML attributes for this node. May not be null.
1401 public HtmlAttributeCollection Attributes
1407 _attributes
= new HtmlAttributeCollection(this);
1414 /// Gets the collection of HTML attributes for the closing tag. May not be null.
1416 public HtmlAttributeCollection ClosingAttributes
1420 if (!HasClosingAttributes
)
1422 return new HtmlAttributeCollection(this);
1424 return _endnode
.Attributes
;
1428 internal void WriteAttribute(TextWriter outText
, HtmlAttribute att
)
1432 if (_ownerdocument
.OptionOutputAsXml
)
1434 if (_ownerdocument
.OptionOutputUpperCase
)
1436 name
= att
.XmlName
.ToUpper();
1443 outText
.Write(" " + name
+ "=\"" + HtmlDocument
.HtmlEncode(att
.XmlValue
) + "\"");
1447 if (_ownerdocument
.OptionOutputUpperCase
)
1449 name
= att
.Name
.ToUpper();
1456 if (att
.Name
.Length
>= 4)
1458 if ((att
.Name
[0] == '<') && (att
.Name
[1] == '%') &&
1459 (att
.Name
[att
.Name
.Length
-1] == '>') && (att
.Name
[att
.Name
.Length
-2] == '%'))
1461 outText
.Write(" " + name
);
1465 if (_ownerdocument
.OptionOutputOptimizeAttributeValues
)
1467 if (att
.Value
.IndexOfAny(new Char
[]{(char)10, (char)13, (char)9, ' '}
) < 0)
1469 outText
.Write(" " + name
+ "=" + att
.Value
);
1473 outText
.Write(" " + name
+ "=\"" + att
.Value
+ "\"");
1478 outText
.Write(" " + name
+ "=\"" + att
.Value
+ "\"");
1483 internal static void WriteAttributes(XmlWriter writer
, HtmlNode node
)
1485 if (!node
.HasAttributes
)
1489 // we use _hashitems to make sure attributes are written only once
1490 foreach(HtmlAttribute att
in node
.Attributes
._hashitems
.Values
)
1492 writer
.WriteAttributeString(att
.XmlName
, att
.Value
);
1496 internal void WriteAttributes(TextWriter outText
, bool closing
)
1498 if (_ownerdocument
.OptionOutputAsXml
)
1500 if (_attributes
== null)
1504 // we use _hashitems to make sure attributes are written only once
1505 foreach(HtmlAttribute att
in _attributes
._hashitems
.Values
)
1507 WriteAttribute(outText
, att
);
1514 if (_attributes
!= null)
1517 foreach(HtmlAttribute att
in _attributes
)
1519 WriteAttribute(outText
, att
);
1522 if (_ownerdocument
.OptionAddDebuggingAttributes
)
1524 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_closed", Closed
.ToString()));
1525 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_children", ChildNodes
.Count
.ToString()));
1528 foreach(HtmlNode n
in ChildNodes
)
1530 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_child_" + i
,
1538 if (_endnode
== null)
1543 if (_endnode
._attributes
== null)
1548 if (_endnode
== this)
1553 foreach(HtmlAttribute att
in _endnode
._attributes
)
1555 WriteAttribute(outText
, att
);
1557 if (_ownerdocument
.OptionAddDebuggingAttributes
)
1559 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_closed", Closed
.ToString()));
1560 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_children", ChildNodes
.Count
.ToString()));
1565 internal static string GetXmlComment(HtmlCommentNode comment
)
1567 string s
= comment
.Comment
;
1568 return s
.Substring(4, s
.Length
-7).Replace("--", " - -");
1572 /// Saves the current node to the specified TextWriter.
1574 /// <param name="outText">The TextWriter to which you want to save.</param>
1575 public void WriteTo(TextWriter outText
)
1580 case HtmlNodeType
.Comment
:
1581 html
= ((HtmlCommentNode
)this).Comment
;
1582 if (_ownerdocument
.OptionOutputAsXml
)
1584 outText
.Write("<!--" + GetXmlComment((HtmlCommentNode
)this) + " -->");
1588 outText
.Write(html
);
1592 case HtmlNodeType
.Document
:
1593 if (_ownerdocument
.OptionOutputAsXml
)
1595 outText
.Write("<?xml version=\"1.0\" encoding=\"" + _ownerdocument
.GetOutEncoding().BodyName
+ "\"?>");
1597 // check there is a root element
1598 if (_ownerdocument
.DocumentNode
.HasChildNodes
)
1600 int rootnodes
= _ownerdocument
.DocumentNode
._childnodes
.Count
;
1603 HtmlNode xml
= _ownerdocument
.GetXmlDeclaration();
1611 if (_ownerdocument
.OptionOutputUpperCase
)
1613 outText
.Write("<SPAN>");
1614 WriteContentTo(outText
);
1615 outText
.Write("</SPAN>");
1619 outText
.Write("<span>");
1620 WriteContentTo(outText
);
1621 outText
.Write("</span>");
1628 WriteContentTo(outText
);
1631 case HtmlNodeType
.Text
:
1632 html
= ((HtmlTextNode
)this).Text
;
1633 if (_ownerdocument
.OptionOutputAsXml
)
1635 outText
.Write(HtmlDocument
.HtmlEncode(html
));
1639 outText
.Write(html
);
1643 case HtmlNodeType
.Element
:
1645 if (_ownerdocument
.OptionOutputUpperCase
)
1647 name
= Name
.ToUpper();
1654 if (_ownerdocument
.OptionOutputAsXml
)
1656 if (name
.Length
> 0)
1660 // forget this one, it's been done at the document level
1664 if (name
.Trim().Length
== 0)
1668 name
= HtmlAttribute
.GetXmlName(name
);
1676 outText
.Write("<" + name
);
1677 WriteAttributes(outText
, false);
1681 if (HtmlNode
.IsEmptyElement(Name
))
1683 if ((_ownerdocument
.OptionWriteEmptyNodes
) || (_ownerdocument
.OptionOutputAsXml
))
1685 outText
.Write(" />");
1689 if (Name
.Length
> 0)
1702 outText
.Write("></" + name
+ ">");
1709 if (_ownerdocument
.OptionOutputAsXml
)
1711 if (HtmlNode
.IsCDataElement(Name
))
1713 // this code and the following tries to output things as nicely as possible for old browsers.
1715 outText
.Write("\r\n//<![CDATA[\r\n");
1723 // child must be a text
1724 ChildNodes
[0].WriteTo(outText
);
1726 outText
.Write("\r\n//]]>//\r\n");
1730 WriteContentTo(outText
);
1733 outText
.Write("</" + name
);
1734 if (!_ownerdocument
.OptionOutputAsXml
)
1736 WriteAttributes(outText
, true);
1745 /// Saves the current node to the specified XmlWriter.
1747 /// <param name="writer">The XmlWriter to which you want to save.</param>
1748 public void WriteTo(XmlWriter writer
)
1753 case HtmlNodeType
.Comment
:
1754 writer
.WriteComment(GetXmlComment((HtmlCommentNode
)this));
1757 case HtmlNodeType
.Document
:
1758 writer
.WriteProcessingInstruction("xml", "version=\"1.0\" encoding=\"" + _ownerdocument
.GetOutEncoding().BodyName
+ "\"");
1761 foreach(HtmlNode subnode
in ChildNodes
)
1763 subnode
.WriteTo(writer
);
1768 case HtmlNodeType
.Text
:
1769 html
= ((HtmlTextNode
)this).Text
;
1770 writer
.WriteString(html
);
1773 case HtmlNodeType
.Element
:
1775 if (_ownerdocument
.OptionOutputUpperCase
)
1777 name
= Name
.ToUpper();
1783 writer
.WriteStartElement(name
);
1784 WriteAttributes(writer
, this);
1788 foreach(HtmlNode subnode
in ChildNodes
)
1790 subnode
.WriteTo(writer
);
1793 writer
.WriteEndElement();
1799 /// Saves all the children of the node to the specified TextWriter.
1801 /// <param name="outText">The TextWriter to which you want to save.</param>
1802 public void WriteContentTo(TextWriter outText
)
1804 if (_childnodes
== null)
1809 foreach(HtmlNode node
in _childnodes
)
1811 node
.WriteTo(outText
);
1816 /// Saves the current node to a string.
1818 /// <returns>The saved string.</returns>
1819 public string WriteTo()
1821 StringWriter sw
= new StringWriter();
1824 return sw
.ToString();
1828 /// Saves all the children of the node to a string.
1830 /// <returns>The saved string.</returns>
1831 public string WriteContentTo()
1833 StringWriter sw
= new StringWriter();
1836 return sw
.ToString();
1841 /// Represents a combined list and collection of HTML nodes.
1843 public class HtmlNodeCollection
: IEnumerable
1845 private ArrayList _items
= new ArrayList();
1846 private HtmlNode _parentnode
;
1848 internal HtmlNodeCollection(HtmlNode parentnode
)
1850 _parentnode
= parentnode
; // may be null
1854 /// Gets the number of elements actually contained in the list.
1860 return _items
.Count
;
1864 internal void Clear()
1866 foreach(HtmlNode node
in _items
)
1868 node
._parentnode
= null;
1869 node
._nextnode
= null;
1870 node
._prevnode
= null;
1875 internal void Remove(int index
)
1877 HtmlNode next
= null;
1878 HtmlNode prev
= null;
1879 HtmlNode oldnode
= (HtmlNode
)_items
[index
];
1883 prev
= (HtmlNode
)_items
[index
-1];
1886 if (index
< (_items
.Count
-1))
1888 next
= (HtmlNode
)_items
[index
+1];
1891 _items
.RemoveAt(index
);
1897 throw new InvalidProgramException("Unexpected error.");
1899 prev
._nextnode
= next
;
1904 next
._prevnode
= prev
;
1907 oldnode
._prevnode
= null;
1908 oldnode
._nextnode
= null;
1909 oldnode
._parentnode
= null;
1912 internal void Replace(int index
, HtmlNode node
)
1914 HtmlNode next
= null;
1915 HtmlNode prev
= null;
1916 HtmlNode oldnode
= (HtmlNode
)_items
[index
];
1920 prev
= (HtmlNode
)_items
[index
-1];
1923 if (index
<(_items
.Count
-1))
1925 next
= (HtmlNode
)_items
[index
+1];
1928 _items
[index
] = node
;
1934 throw new InvalidProgramException("Unexpected error.");
1936 prev
._nextnode
= node
;
1941 next
._prevnode
= node
;
1944 node
._prevnode
= prev
;
1947 throw new InvalidProgramException("Unexpected error.");
1949 node
._nextnode
= next
;
1950 node
._parentnode
= _parentnode
;
1952 oldnode
._prevnode
= null;
1953 oldnode
._nextnode
= null;
1954 oldnode
._parentnode
= null;
1957 internal void Insert(int index
, HtmlNode node
)
1959 HtmlNode next
= null;
1960 HtmlNode prev
= null;
1964 prev
= (HtmlNode
)_items
[index
-1];
1967 if (index
<_items
.Count
)
1969 next
= (HtmlNode
)_items
[index
];
1972 _items
.Insert(index
, node
);
1978 throw new InvalidProgramException("Unexpected error.");
1980 prev
._nextnode
= node
;
1985 next
._prevnode
= node
;
1988 node
._prevnode
= prev
;
1992 throw new InvalidProgramException("Unexpected error.");
1995 node
._nextnode
= next
;
1996 node
._parentnode
= _parentnode
;
1999 internal void Append(HtmlNode node
)
2001 HtmlNode last
= null;
2002 if (_items
.Count
> 0)
2004 last
= (HtmlNode
)_items
[_items
.Count
-1];
2008 node
._prevnode
= last
;
2009 node
._nextnode
= null;
2010 node
._parentnode
= _parentnode
;
2015 throw new InvalidProgramException("Unexpected error.");
2017 last
._nextnode
= node
;
2021 internal void Prepend(HtmlNode node
)
2023 HtmlNode first
= null;
2024 if (_items
.Count
> 0)
2026 first
= (HtmlNode
)_items
[0];
2029 _items
.Insert(0, node
);
2033 throw new InvalidProgramException("Unexpected error.");
2035 node
._nextnode
= first
;
2036 node
._prevnode
= null;
2037 node
._parentnode
= _parentnode
;
2040 first
._prevnode
= node
;
2044 internal void Add(HtmlNode node
)
2050 /// Gets the node at the specified index.
2052 public HtmlNode
this[int index
]
2056 return _items
[index
] as HtmlNode
;
2060 internal int GetNodeIndex(HtmlNode node
)
2062 // TODO: should we rewrite this? what would be the key of a node?
2063 for(int i
=0;i
<_items
.Count
;i
++)
2065 if (node
== ((HtmlNode
)_items
[i
]))
2074 /// Gets a given node from the list.
2076 public int this[HtmlNode node
]
2080 int index
= GetNodeIndex(node
);
2083 throw new ArgumentOutOfRangeException("node", "Node \"" + node
.CloneNode(false).OuterHtml
+ "\" was not found in the collection");
2090 /// Returns an enumerator that can iterate through the list.
2092 /// <returns>An IEnumerator for the entire list.</returns>
2093 public HtmlNodeEnumerator
GetEnumerator()
2095 return new HtmlNodeEnumerator(_items
);
2098 IEnumerator IEnumerable
.GetEnumerator()
2100 return GetEnumerator();
2104 /// Represents an enumerator that can iterate through the list.
2106 public class HtmlNodeEnumerator
: IEnumerator
2111 internal HtmlNodeEnumerator(ArrayList items
)
2118 /// Sets the enumerator to its initial position, which is before the first element in the collection.
2126 /// Advances the enumerator to the next element of the collection.
2128 /// <returns>true if the enumerator was successfully advanced to the next element, false if the enumerator has passed the end of the collection.</returns>
2129 public bool MoveNext()
2132 return (_index
<_items
.Count
);
2136 /// Gets the current element in the collection.
2138 public HtmlNode Current
2142 return (HtmlNode
)(_items
[_index
]);
2147 /// Gets the current element in the collection.
2149 object IEnumerator
.Current
2160 /// Represents an HTML text node.
2162 public class HtmlTextNode
: HtmlNode
2164 private string _text
;
2166 internal HtmlTextNode(HtmlDocument ownerdocument
, int index
):
2167 base(HtmlNodeType
.Text
, ownerdocument
, index
)
2172 /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
2174 public override string InnerHtml
2187 /// Gets or Sets the object and its content in HTML.
2189 public override string OuterHtml
2195 return base.OuterHtml
;
2202 /// Gets or Sets the text of the node.
2210 return base.OuterHtml
;
2222 /// Represents an HTML comment.
2224 public class HtmlCommentNode
: HtmlNode
2226 private string _comment
;
2228 internal HtmlCommentNode(HtmlDocument ownerdocument
, int index
):
2229 base(HtmlNodeType
.Comment
, ownerdocument
, index
)
2234 /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
2236 public override string InnerHtml
2240 if (_comment
== null)
2242 return base.InnerHtml
;
2253 /// Gets or Sets the object and its content in HTML.
2255 public override string OuterHtml
2259 if (_comment
== null)
2261 return base.OuterHtml
;
2263 return "<!--" + _comment
+ "-->";
2268 /// Gets or Sets the comment text of the node.
2270 public string Comment
2274 if (_comment
== null)
2276 return base.InnerHtml
;