1 require "rexml/element"
2 require "rexml/xmldecl"
4 require "rexml/comment"
5 require "rexml/doctype"
6 require "rexml/instruction"
8 require "rexml/parseexception"
10 require "rexml/parsers/baseparser"
11 require "rexml/parsers/streamparser"
12 require "rexml/parsers/treeparser"
15 # Represents a full XML document, including PIs, a doctype, etc. A
16 # Document has a single child that can be accessed by root().
17 # Note that if you want to have an XML declaration written for a document
18 # you create, you must add one; REXML documents do not write a default
19 # declaration for you. See |DECLARATION| and |write|.
20 class Document < Element
21 # A convenient default XML declaration. If you want an XML declaration,
22 # the easiest way to add one is mydoc << Document::DECLARATION
24 # Use: mydoc << XMLDecl.default
25 DECLARATION = XMLDecl.default
28 # @param source if supplied, must be a Document, String, or IO.
29 # Documents have their context and Element attributes cloned.
30 # Strings are expected to be valid XML documents. IOs are expected
31 # to be sources of valid XML documents.
32 # @param context if supplied, contains the context of the document;
33 # this should be a Hash.
34 def initialize( source = nil, context = {} )
38 if source.kind_of? Document
39 @context = source.context
55 # According to the XML spec, a root node has no expanded name
59 #d ? d.name : "UNDEFINED"
62 alias :name :expanded_name
64 # We override this, because XMLDecls and DocTypes must go at the start
67 if child.kind_of? XMLDecl
68 @children.unshift child
70 elsif child.kind_of? DocType
71 # Find first Element or DocType node and insert the decl right
72 # before it. If there is no such node, just insert the child at the
73 # end. If there is a child and it is an DocType, then replace it.
74 insert_before_index = 0
76 insert_before_index += 1
77 x.kind_of?(Element) || x.kind_of?(DocType)
79 if @children[ insert_before_index ] # Not null = not end of list
80 if @children[ insert_before_index ].kind_of DocType
81 @children[ insert_before_index ] = child
83 @children[ index_before_index-1, 0 ] = child
85 else # Insert at end of list
86 @children[insert_before_index] = child
91 raise "attempted adding second root element to document" if @elements.size > 1
97 def add_element(arg=nil, arg2=nil)
99 raise "attempted adding second root element to document" if @elements.size > 1
103 # @return the root Element of the document, or nil if this document
108 #@children.find { |item| item.kind_of? Element }
111 # @return the DocType child of the document, if one exists,
114 @children.find { |item| item.kind_of? DocType }
117 # @return the XMLDecl of this document; if no XMLDecl has been
118 # set, the default declaration is returned.
121 return rv if rv.kind_of? XMLDecl
122 rv = @children.unshift(XMLDecl.default)[0]
125 # @return the XMLDecl version of this document as a String.
126 # If no XMLDecl has been set, returns the default version.
131 # @return the XMLDecl encoding of this document as a String.
132 # If no XMLDecl has been set, returns the default encoding.
137 # @return the XMLDecl standalone value of this document as a String.
138 # If no XMLDecl has been set, returns the default setting.
140 xml_decl().stand_alone?
143 # Write the XML tree out, optionally with indent. This writes out the
144 # entire XML document, including XML declarations, doctype declarations,
145 # and processing instructions (if any are given).
147 # A controversial point is whether Document should always write the XML
148 # declaration (<?xml version='1.0'?>) whether or not one is given by the
149 # user (or source document). REXML does not write one if one was not
150 # specified, because it adds unneccessary bandwidth to applications such
153 # See also the classes in the rexml/formatters package for the proper way
154 # to change the default formatting of XML output
157 # Document.new("<a><b/></a>").serialize
160 # tr = Transitive.new( output_string )
161 # Document.new("<a><b/></a>").serialize( tr )
164 # output an object which supports '<< string'; this is where the
165 # document will be written.
167 # An integer. If -1, no indenting will be used; otherwise, the
168 # indentation will be twice this number of spaces, and children will be
169 # indented an additional amount. For a value of 3, every item will be
170 # indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
172 # If transitive is true and indent is >= 0, then the output will be
173 # pretty-printed in such a way that the added whitespace does not affect
174 # the absolute *value* of the document -- that is, it leaves the value
175 # and number of Text nodes in the document unchanged.
177 # Internet Explorer is the worst piece of crap to have ever been
178 # written, with the possible exception of Windows itself. Since IE is
179 # unable to parse proper XML, we have to provide a hack to generate XML
180 # that IE's limited abilities can handle. This hack inserts a space
181 # before the /> on empty tags. Defaults to false
182 def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
183 if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
184 output = Output.new( output, xml_decl.encoding )
186 formatter = if indent > -1
188 REXML::Formatters::Transitive.new( indent, ie_hack )
190 REXML::Formatters::Pretty.new( indent, ie_hack )
193 REXML::Formatters::Default.new( ie_hack )
195 formatter.write( self, output )
199 def Document::parse_stream( source, listener )
200 Parsers::StreamParser.new( source, listener ).parse
205 Parsers::TreeParser.new( source, self ).parse