lib/rexml/element.rb

   1 require "rexml/parent"
   2 require "rexml/namespace"
   3 require "rexml/attribute"
   4 require "rexml/cdata"
   5 require "rexml/xpath"
   6 require "rexml/parseexception"
   7
   8 module REXML
   9   # An implementation note about namespaces:
  10   # As we parse, when we find namespaces we put them in a hash and assign
  11   # them a unique ID.  We then convert the namespace prefix for the node
  12   # to the unique ID.  This makes namespace lookup much faster for the
  13   # cost of extra memory use.  We save the namespace prefix for the
  14   # context node and convert it back when we write it.
  15   @@namespaces = {}
  16
  17   # Represents a tagged XML element.  Elements are characterized by
  18   # having children, attributes, and names, and can themselves be
  19   # children.
  20   class Element < Parent
  21     include Namespace
  22
  23     UNDEFINED = "UNDEFINED";            # The default name
  24
  25     # Mechanisms for accessing attributes and child elements of this
  26     # element.
  27     attr_reader :attributes, :elements
  28     # The context holds information about the processing environment, such as
  29     # whitespace handling.
  30     attr_accessor :context
  31
  32     # Constructor
  33     # arg::
  34     #   if not supplied, will be set to the default value.
  35     #   If a String, the name of this object will be set to the argument.
  36     #   If an Element, the object will be shallowly cloned; name,
  37     #   attributes, and namespaces will be copied.  Children will +not+ be
  38     #   copied.
  39     # parent::
  40     #   if supplied, must be a Parent, and will be used as
  41     #   the parent of this object.
  42     # context::
  43     #   If supplied, must be a hash containing context items.  Context items
  44     #   include:
  45     # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
  46     #   strings being the names of the elements to respect
  47     #   whitespace for.  Defaults to :+all+.
  48     # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
  49     #   strings being the names of the elements to ignore whitespace on.
  50     #   Overrides :+respect_whitespace+.
  51     # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
  52     #   of strings being the names of the elements in which to ignore
  53     #   whitespace-only nodes.  If this is set, Text nodes which contain only
  54     #   whitespace will not be added to the document tree.
  55     # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
  56     #   the elements to process in raw mode.  In raw mode, special
  57     #   characters in text is not converted to or from entities.
  58     def initialize( arg = UNDEFINED, parent=nil, context=nil )
  59       super(parent)
  60
  61       @elements = Elements.new(self)
  62       @attributes = Attributes.new(self)
  63       @context = context
  64
  65       if arg.kind_of? String
  66         self.name = arg
  67       elsif arg.kind_of? Element
  68         self.name = arg.expanded_name
  69         arg.attributes.each_attribute{ |attribute|
  70           @attributes << Attribute.new( attribute )
  71         }
  72         @context = arg.context
  73       end
  74     end
  75
  76     def inspect
  77       rv = "<#@expanded_name"
  78
  79       @attributes.each_attribute do |attr|
  80         rv << " "
  81         attr.write( rv, 0 )
  82       end
  83
  84       if children.size > 0
  85         rv << "> ... </>"
  86       else
  87         rv << "/>"
  88       end
  89     end
  90
  91
  92     # Creates a shallow copy of self.
  93     #   d = Document.new "<a><b/><b/><c><d/></c></a>"
  94     #   new_a = d.root.clone
  95     #   puts new_a  # => "<a/>"
  96     def clone
  97       self.class.new self
  98     end
  99
 100     # Evaluates to the root node of the document that this element
 101     # belongs to. If this element doesn't belong to a document, but does
 102     # belong to another Element, the parent's root will be returned, until the
 103     # earliest ancestor is found.
 104     #
 105     # Note that this is not the same as the document element.
 106     # In the following example, <a> is the document element, and the root
 107     # node is the parent node of the document element.  You may ask yourself
 108     # why the root node is useful: consider the doctype and XML declaration,
 109     # and any processing instructions before the document element... they
 110     # are children of the root node, or siblings of the document element.
 111     # The only time this isn't true is when an Element is created that is
 112     # not part of any Document.  In this case, the ancestor that has no
 113     # parent acts as the root node.
 114     #  d = Document.new '<a><b><c/></b></a>'
 115     #  a = d[1] ; c = a[1][1]
 116     #  d.root_node == d   # TRUE
 117     #  a.root_node        # namely, d
 118     #  c.root_node        # again, d
 119     def root_node
 120       parent.nil? ? self : parent.root_node
 121     end
 122
 123     def root
 124       return elements[1] if self.kind_of? Document
 125       return self if parent.kind_of? Document or parent.nil?
 126       return parent.root
 127     end
 128
 129     # Evaluates to the document to which this element belongs, or nil if this
 130     # element doesn't belong to a document.
 131     def document
 132       rt = root
 133       rt.parent if rt
 134     end
 135
 136     # Evaluates to +true+ if whitespace is respected for this element.  This
 137     # is the case if:
 138     # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
 139     # 2. The context has :+respect_whitespace+ set to :+all+ or
 140     #    an array containing the name of this element, and
 141     #    :+compress_whitespace+ isn't set to :+all+ or an array containing the
 142     #    name of this element.
 143     # The evaluation is tested against +expanded_name+, and so is namespace
 144     # sensitive.
 145     def whitespace
 146       @whitespace = nil
 147       if @context
 148         if @context[:respect_whitespace]
 149           @whitespace = (@context[:respect_whitespace] == :all or
 150                          @context[:respect_whitespace].include? expanded_name)
 151         end
 152         @whitespace = false if (@context[:compress_whitespace] and
 153                                 (@context[:compress_whitespace] == :all or
 154                                  @context[:compress_whitespace].include? expanded_name)
 155                                )
 156       end
 157       @whitespace = true unless @whitespace == false
 158       @whitespace
 159     end
 160
 161     def ignore_whitespace_nodes
 162       @ignore_whitespace_nodes = false
 163       if @context
 164         if @context[:ignore_whitespace_nodes]
 165           @ignore_whitespace_nodes =
 166             (@context[:ignore_whitespace_nodes] == :all or
 167              @context[:ignore_whitespace_nodes].include? expanded_name)
 168         end
 169       end
 170     end
 171
 172     # Evaluates to +true+ if raw mode is set for this element.  This
 173     # is the case if the context has :+raw+ set to :+all+ or
 174     # an array containing the name of this element.
 175     #
 176     # The evaluation is tested against +expanded_name+, and so is namespace
 177     # sensitive.
 178     def raw
 179       @raw = (@context and @context[:raw] and
 180               (@context[:raw] == :all or
 181                @context[:raw].include? expanded_name))
 182                @raw
 183     end
 184
 185     #once :whitespace, :raw, :ignore_whitespace_nodes
 186
 187     #################################################
 188     # Namespaces                                    #
 189     #################################################
 190
 191     # Evaluates to an +Array+ containing the prefixes (names) of all defined
 192     # namespaces at this context node.
 193     #  doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
 194     #  doc.elements['//b'].prefixes # -> ['x', 'y']
 195     def prefixes
 196       prefixes = []
 197       prefixes = parent.prefixes if parent
 198       prefixes |= attributes.prefixes
 199       return prefixes
 200     end
 201
 202     def namespaces
 203       namespaces = {}
 204       namespaces = parent.namespaces if parent
 205       namespaces = namespaces.merge( attributes.namespaces )
 206       return namespaces
 207     end
 208
 209     # Evalutas to the URI for a prefix, or the empty string if no such
 210     # namespace is declared for this element. Evaluates recursively for
 211     # ancestors.  Returns the default namespace, if there is one.
 212     # prefix::
 213     #   the prefix to search for.  If not supplied, returns the default
 214     #   namespace if one exists
 215     # Returns::
 216     #   the namespace URI as a String, or nil if no such namespace
 217     #   exists.  If the namespace is undefined, returns an empty string
 218     #  doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
 219     #  b = doc.elements['//b']
 220     #  b.namespace           # -> '1'
 221     #  b.namespace("y")      # -> '2'
 222     def namespace(prefix=nil)
 223       if prefix.nil?
 224         prefix = prefix()
 225       end
 226       if prefix == ''
 227         prefix = "xmlns"
 228       else
 229         prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
 230       end
 231       ns = attributes[ prefix ]
 232       ns = parent.namespace(prefix) if ns.nil? and parent
 233       ns = '' if ns.nil? and prefix == 'xmlns'
 234       return ns
 235     end
 236
 237     # Adds a namespace to this element.
 238     # prefix::
 239     #   the prefix string, or the namespace URI if +uri+ is not
 240     #   supplied
 241     # uri::
 242     #   the namespace URI.  May be nil, in which +prefix+ is used as
 243     #   the URI
 244     # Evaluates to: this Element
 245     #  a = Element.new("a")
 246     #  a.add_namespace("xmlns:foo", "bar" )
 247     #  a.add_namespace("foo", "bar")  # shorthand for previous line
 248     #  a.add_namespace("twiddle")
 249     #  puts a   #-> <a xmlns:foo='bar' xmlns='twiddle'/>
 250     def add_namespace( prefix, uri=nil )
 251       unless uri
 252         @attributes["xmlns"] = prefix
 253       else
 254         prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
 255         @attributes[ prefix ] = uri
 256       end
 257       self
 258     end
 259
 260     # Removes a namespace from this node.  This only works if the namespace is
 261     # actually declared in this node.  If no argument is passed, deletes the
 262     # default namespace.
 263     #
 264     # Evaluates to: this element
 265     #  doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
 266     #  doc.root.delete_namespace
 267     #  puts doc     # -> <a xmlns:foo='bar'/>
 268     #  doc.root.delete_namespace 'foo'
 269     #  puts doc     # -> <a/>
 270     def delete_namespace namespace="xmlns"
 271       namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
 272       attribute = attributes.get_attribute(namespace)
 273       attribute.remove unless attribute.nil?
 274       self
 275     end
 276
 277     #################################################
 278     # Elements                                      #
 279     #################################################
 280
 281     # Adds a child to this element, optionally setting attributes in
 282     # the element.
 283     # element::
 284     #   optional.  If Element, the element is added.
 285     #   Otherwise, a new Element is constructed with the argument (see
 286     #   Element.initialize).
 287     # attrs::
 288     #   If supplied, must be a Hash containing String name,value
 289     #   pairs, which will be used to set the attributes of the new Element.
 290     # Returns:: the Element that was added
 291     #  el = doc.add_element 'my-tag'
 292     #  el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
 293     #  el = Element.new 'my-tag'
 294     #  doc.add_element el
 295     def add_element element, attrs=nil
 296       raise "First argument must be either an element name, or an Element object" if element.nil?
 297       el = @elements.add(element)
 298       attrs.each do |key, value|
 299         el.attributes[key]=Attribute.new(key,value,self)
 300       end       if attrs.kind_of? Hash
 301       el
 302     end
 303
 304     # Deletes a child element.
 305     # element::
 306     #   Must be an +Element+, +String+, or +Integer+.  If Element,
 307     #   the element is removed.  If String, the element is found (via XPath)
 308     #   and removed.  <em>This means that any parent can remove any
 309     #   descendant.<em>  If Integer, the Element indexed by that number will be
 310     #   removed.
 311     # Returns:: the element that was removed.
 312     #  doc.delete_element "/a/b/c[@id='4']"
 313     #  doc.delete_element doc.elements["//k"]
 314     #  doc.delete_element 1
 315     def delete_element element
 316       @elements.delete element
 317     end
 318
 319     # Evaluates to +true+ if this element has at least one child Element
 320     #  doc = Document.new "<a><b/><c>Text</c></a>"
 321     #  doc.root.has_elements               # -> true
 322     #  doc.elements["/a/b"].has_elements   # -> false
 323     #  doc.elements["/a/c"].has_elements   # -> false
 324     def has_elements?
 325       !@elements.empty?
 326     end
 327
 328     # Iterates through the child elements, yielding for each Element that
 329     # has a particular attribute set.
 330     # key::
 331     #   the name of the attribute to search for
 332     # value::
 333     #   the value of the attribute
 334     # max::
 335     #   (optional) causes this method to return after yielding
 336     #   for this number of matching children
 337     # name::
 338     #   (optional) if supplied, this is an XPath that filters
 339     #   the children to check.
 340     #
 341     #  doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
 342     #  # Yields b, c, d
 343     #  doc.root.each_element_with_attribute( 'id' ) {|e| p e}
 344     #  # Yields b, d
 345     #  doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
 346     #  # Yields b
 347     #  doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
 348     #  # Yields d
 349     #  doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
 350     def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
 351       each_with_something( proc {|child|
 352         if value.nil?
 353           child.attributes[key] != nil
 354         else
 355           child.attributes[key]==value
 356         end
 357       }, max, name, &block )
 358     end
 359
 360     # Iterates through the children, yielding for each Element that
 361     # has a particular text set.
 362     # text::
 363     #   the text to search for.  If nil, or not supplied, will itterate
 364     #   over all +Element+ children that contain at least one +Text+ node.
 365     # max::
 366     #   (optional) causes this method to return after yielding
 367     #   for this number of matching children
 368     # name::
 369     #   (optional) if supplied, this is an XPath that filters
 370     #   the children to check.
 371     #
 372     #  doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
 373     #  # Yields b, c, d
 374     #  doc.each_element_with_text {|e|p e}
 375     #  # Yields b, c
 376     #  doc.each_element_with_text('b'){|e|p e}
 377     #  # Yields b
 378     #  doc.each_element_with_text('b', 1){|e|p e}
 379     #  # Yields d
 380     #  doc.each_element_with_text(nil, 0, 'd'){|e|p e}
 381     def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
 382       each_with_something( proc {|child|
 383         if text.nil?
 384           child.has_text?
 385         else
 386           child.text == text
 387         end
 388       }, max, name, &block )
 389     end
 390
 391     # Synonym for Element.elements.each
 392     def each_element( xpath=nil, &block ) # :yields: Element
 393       @elements.each( xpath, &block )
 394     end
 395
 396     # Synonym for Element.to_a
 397     # This is a little slower than calling elements.each directly.
 398     # xpath:: any XPath by which to search for elements in the tree
 399     # Returns:: an array of Elements that match the supplied path
 400     def get_elements( xpath )
 401       @elements.to_a( xpath )
 402     end
 403
 404     # Returns the next sibling that is an element, or nil if there is
 405     # no Element sibling after this one
 406     #  doc = Document.new '<a><b/>text<c/></a>'
 407     #  doc.root.elements['b'].next_element          #-> <c/>
 408     #  doc.root.elements['c'].next_element          #-> nil
 409     def next_element
 410       element = next_sibling
 411       element = element.next_sibling until element.nil? or element.kind_of? Element
 412       return element
 413     end
 414
 415     # Returns the previous sibling that is an element, or nil if there is
 416     # no Element sibling prior to this one
 417     #  doc = Document.new '<a><b/>text<c/></a>'
 418     #  doc.root.elements['c'].previous_element          #-> <b/>
 419     #  doc.root.elements['b'].previous_element          #-> nil
 420     def previous_element
 421       element = previous_sibling
 422       element = element.previous_sibling until element.nil? or element.kind_of? Element
 423       return element
 424     end
 425
 426
 427     #################################################
 428     # Text                                          #
 429     #################################################
 430
 431     # Evaluates to +true+ if this element has at least one Text child
 432     def has_text?
 433       not text().nil?
 434     end
 435
 436     # A convenience method which returns the String value of the _first_
 437     # child text element, if one exists, and +nil+ otherwise.
 438     #
 439     # <em>Note that an element may have multiple Text elements, perhaps
 440     # separated by other children</em>.  Be aware that this method only returns
 441     # the first Text node.
 442     #
 443     # This method returns the +value+ of the first text child node, which
 444     # ignores the +raw+ setting, so always returns normalized text. See
 445     # the Text::value documentation.
 446     #
 447     #  doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
 448     #  # The element 'p' has two text elements, "some text " and " more text".
 449     #  doc.root.text              #-> "some text "
 450     def text( path = nil )
 451       rv = get_text(path)
 452       return rv.value unless rv.nil?
 453       nil
 454     end
 455
 456     # Returns the first child Text node, if any, or +nil+ otherwise.
 457     # This method returns the actual +Text+ node, rather than the String content.
 458     #  doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
 459     #  # The element 'p' has two text elements, "some text " and " more text".
 460     #  doc.root.get_text.value            #-> "some text "
 461     def get_text path = nil
 462       rv = nil
 463       if path
 464         element = @elements[ path ]
 465         rv = element.get_text unless element.nil?
 466       else
 467         rv = @children.find { |node| node.kind_of? Text }
 468       end
 469       return rv
 470     end
 471
 472     # Sets the first Text child of this object.  See text() for a
 473     # discussion about Text children.
 474     #
 475     # If a Text child already exists, the child is replaced by this
 476     # content.  This means that Text content can be deleted by calling
 477     # this method with a nil argument.  In this case, the next Text
 478     # child becomes the first Text child.  In no case is the order of
 479     # any siblings disturbed.
 480     # text::
 481     #   If a String, a new Text child is created and added to
 482     #   this Element as the first Text child.  If Text, the text is set
 483     #   as the first Child element.  If nil, then any existing first Text
 484     #   child is removed.
 485     # Returns:: this Element.
 486     #  doc = Document.new '<a><b/></a>'
 487     #  doc.root.text = 'Sean'      #-> '<a><b/>Sean</a>'
 488     #  doc.root.text = 'Elliott'   #-> '<a><b/>Elliott</a>'
 489     #  doc.root.add_element 'c'    #-> '<a><b/>Elliott<c/></a>'
 490     #  doc.root.text = 'Russell'   #-> '<a><b/>Russell<c/></a>'
 491     #  doc.root.text = nil         #-> '<a><b/><c/></a>'
 492     def text=( text )
 493       if text.kind_of? String
 494         text = Text.new( text, whitespace(), nil, raw() )
 495       elsif text and !text.kind_of? Text
 496         text = Text.new( text.to_s, whitespace(), nil, raw() )
 497       end
 498       old_text = get_text
 499       if text.nil?
 500         old_text.remove unless old_text.nil?
 501       else
 502         if old_text.nil?
 503           self << text
 504         else
 505           old_text.replace_with( text )
 506         end
 507       end
 508       return self
 509     end
 510
 511     # A helper method to add a Text child.  Actual Text instances can
 512     # be added with regular Parent methods, such as add() and <<()
 513     # text::
 514     #   if a String, a new Text instance is created and added
 515     #   to the parent.  If Text, the object is added directly.
 516     # Returns:: this Element
 517     #  e = Element.new('a')          #-> <e/>
 518     #  e.add_text 'foo'              #-> <e>foo</e>
 519     #  e.add_text Text.new(' bar')    #-> <e>foo bar</e>
 520     # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
 521     # element and <b>2</b> Text node children.
 522     def add_text( text )
 523       if text.kind_of? String
 524         if @children[-1].kind_of? Text
 525           @children[-1] << text
 526           return
 527         end
 528         text = Text.new( text, whitespace(), nil, raw() )
 529       end
 530       self << text unless text.nil?
 531       return self
 532     end
 533
 534     def node_type
 535       :element
 536     end
 537
 538     def xpath
 539       path_elements = []
 540       cur = self
 541       path_elements << __to_xpath_helper( self )
 542       while cur.parent
 543         cur = cur.parent
 544         path_elements << __to_xpath_helper( cur )
 545       end
 546       return path_elements.reverse.join( "/" )
 547     end
 548
 549     #################################################
 550     # Attributes                                    #
 551     #################################################
 552
 553     def attribute( name, namespace=nil )
 554       prefix = nil
 555       prefix = namespaces.index(namespace) if namespace
 556       prefix = nil if prefix == 'xmlns'
 557       attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
 558     end
 559
 560     # Evaluates to +true+ if this element has any attributes set, false
 561     # otherwise.
 562     def has_attributes?
 563       return !@attributes.empty?
 564     end
 565
 566     # Adds an attribute to this element, overwriting any existing attribute
 567     # by the same name.
 568     # key::
 569     #   can be either an Attribute or a String.  If an Attribute,
 570     #   the attribute is added to the list of Element attributes.  If String,
 571     #   the argument is used as the name of the new attribute, and the value
 572     #   parameter must be supplied.
 573     # value::
 574     #   Required if +key+ is a String, and ignored if the first argument is
 575     #   an Attribute.  This is a String, and is used as the value
 576     #   of the new Attribute.  This should be the unnormalized value of the
 577     #   attribute (without entities).
 578     # Returns:: the Attribute added
 579     #  e = Element.new 'e'
 580     #  e.add_attribute( 'a', 'b' )               #-> <e a='b'/>
 581     #  e.add_attribute( 'x:a', 'c' )             #-> <e a='b' x:a='c'/>
 582     #  e.add_attribute Attribute.new('b', 'd')   #-> <e a='b' x:a='c' b='d'/>
 583     def add_attribute( key, value=nil )
 584       if key.kind_of? Attribute
 585         @attributes << key
 586       else
 587         @attributes[key] = value
 588       end
 589     end
 590
 591     # Add multiple attributes to this element.
 592     # hash:: is either a hash, or array of arrays
 593     #  el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
 594     #  el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
 595     def add_attributes hash
 596       if hash.kind_of? Hash
 597         hash.each_pair {|key, value| @attributes[key] = value }
 598       elsif hash.kind_of? Array
 599         hash.each { |value| @attributes[ value[0] ] = value[1] }
 600       end
 601     end
 602
 603     # Removes an attribute
 604     # key::
 605     #   either an Attribute or a String.  In either case, the
 606     #   attribute is found by matching the attribute name to the argument,
 607     #   and then removed.  If no attribute is found, no action is taken.
 608     # Returns::
 609     #   the attribute removed, or nil if this Element did not contain
 610     #   a matching attribute
 611     #  e = Element.new('E')
 612     #  e.add_attribute( 'name', 'Sean' )             #-> <E name='Sean'/>
 613     #  r = e.add_attribute( 'sur:name', 'Russell' )  #-> <E name='Sean' sur:name='Russell'/>
 614     #  e.delete_attribute( 'name' )                  #-> <E sur:name='Russell'/>
 615     #  e.delete_attribute( r )                       #-> <E/>
 616     def delete_attribute(key)
 617       attr = @attributes.get_attribute(key)
 618       attr.remove unless attr.nil?
 619     end
 620
 621     #################################################
 622     # Other Utilities                               #
 623     #################################################
 624
 625     # Get an array of all CData children.
 626     # IMMUTABLE
 627     def cdatas
 628       find_all { |child| child.kind_of? CData }.freeze
 629     end
 630
 631     # Get an array of all Comment children.
 632     # IMMUTABLE
 633     def comments
 634       find_all { |child| child.kind_of? Comment }.freeze
 635     end
 636
 637     # Get an array of all Instruction children.
 638     # IMMUTABLE
 639     def instructions
 640       find_all { |child| child.kind_of? Instruction }.freeze
 641     end
 642
 643     # Get an array of all Text children.
 644     # IMMUTABLE
 645     def texts
 646       find_all { |child| child.kind_of? Text }.freeze
 647     end
 648
 649     # == DEPRECATED
 650     # See REXML::Formatters
 651     #
 652     # Writes out this element, and recursively, all children.
 653     # output::
 654     #     output an object which supports '<< string'; this is where the
 655     #   document will be written.
 656     # indent::
 657     #   An integer.  If -1, no indenting will be used; otherwise, the
 658     #   indentation will be this number of spaces, and children will be
 659     #   indented an additional amount.  Defaults to -1
 660     # transitive::
 661     #   If transitive is true and indent is >= 0, then the output will be
 662     #   pretty-printed in such a way that the added whitespace does not affect
 663     #   the parse tree of the document
 664     # ie_hack::
 665     #   Internet Explorer is the worst piece of crap to have ever been
 666     #   written, with the possible exception of Windows itself.  Since IE is
 667     #   unable to parse proper XML, we have to provide a hack to generate XML
 668     #   that IE's limited abilities can handle.  This hack inserts a space
 669     #   before the /> on empty tags.  Defaults to false
 670     #
 671     #  out = ''
 672     #  doc.write( out )     #-> doc is written to the string 'out'
 673     #  doc.write( $stdout ) #-> doc written to the console
 674     def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
 675       Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters")
 676       formatter = if indent > -1
 677           if transitive
 678             REXML::Formatters::Transitive.new( indent, ie_hack )
 679           else
 680             REXML::Formatters::Pretty.new( indent, ie_hack )
 681           end
 682         else
 683           REXML::Formatters::Default.new( ie_hack )
 684         end
 685       formatter.write( self, output )
 686     end
 687
 688
 689     private
 690     def __to_xpath_helper node
 691       rv = node.expanded_name.clone
 692       if node.parent
 693         results = node.parent.find_all {|n|
 694           n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
 695         }
 696         if results.length > 1
 697           idx = results.index( node )
 698           rv << "[#{idx+1}]"
 699         end
 700       end
 701       rv
 702     end
 703
 704     # A private helper method
 705     def each_with_something( test, max=0, name=nil )
 706       num = 0
 707       child=nil
 708       @elements.each( name ){ |child|
 709         yield child if test.call(child) and num += 1
 710         return if max>0 and num == max
 711       }
 712     end
 713   end
 714
 715   ########################################################################
 716   # ELEMENTS                                                             #
 717   ########################################################################
 718
 719   # A class which provides filtering of children for Elements, and
 720   # XPath search support.  You are expected to only encounter this class as
 721   # the <tt>element.elements</tt> object.  Therefore, you are
 722   # _not_ expected to instantiate this yourself.
 723   class Elements
 724     include Enumerable
 725     # Constructor
 726     # parent:: the parent Element
 727     def initialize parent
 728       @element = parent
 729     end
 730
 731     # Fetches a child element.  Filters only Element children, regardless of
 732     # the XPath match.
 733     # index::
 734     #   the search parameter.  This is either an Integer, which
 735     #   will be used to find the index'th child Element, or an XPath,
 736     #   which will be used to search for the Element.  <em>Because
 737     #   of the nature of XPath searches, any element in the connected XML
 738     #   document can be fetched through any other element.</em>  <b>The
 739     #   Integer index is 1-based, not 0-based.</b>  This means that the first
 740     #   child element is at index 1, not 0, and the +n+th element is at index
 741     #   +n+, not <tt>n-1</tt>.  This is because XPath indexes element children
 742     #   starting from 1, not 0, and the indexes should be the same.
 743     # name::
 744     #   optional, and only used in the first argument is an
 745     #   Integer.  In that case, the index'th child Element that has the
 746     #   supplied name will be returned.  Note again that the indexes start at 1.
 747     # Returns:: the first matching Element, or nil if no child matched
 748     #  doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
 749     #  doc.root.elements[1]       #-> <b/>
 750     #  doc.root.elements['c']     #-> <c id="1"/>
 751     #  doc.root.elements[2,'c']   #-> <c id="2"/>
 752     def []( index, name=nil)
 753       if index.kind_of? Integer
 754         raise "index (#{index}) must be >= 1" if index < 1
 755         name = literalize(name) if name
 756         num = 0
 757         child = nil
 758         @element.find { |child|
 759           child.kind_of? Element and
 760           (name.nil? ? true : child.has_name?( name )) and
 761           (num += 1) == index
 762         }
 763       else
 764         return XPath::first( @element, index )
 765         #{ |element|
 766         #       return element if element.kind_of? Element
 767         #}
 768         #return nil
 769       end
 770     end
 771
 772     # Sets an element, replacing any previous matching element.  If no
 773     # existing element is found ,the element is added.
 774     # index:: Used to find a matching element to replace.  See []().
 775     # element::
 776     #   The element to replace the existing element with
 777     #   the previous element
 778     # Returns:: nil if no previous element was found.
 779     #
 780     #  doc = Document.new '<a/>'
 781     #  doc.root.elements[10] = Element.new('b')    #-> <a><b/></a>
 782     #  doc.root.elements[1]                        #-> <b/>
 783     #  doc.root.elements[1] = Element.new('c')     #-> <a><c/></a>
 784     #  doc.root.elements['c'] = Element.new('d')   #-> <a><d/></a>
 785     def []=( index, element )
 786       previous = self[index]
 787       if previous.nil?
 788         @element.add element
 789       else
 790         previous.replace_with element
 791       end
 792       return previous
 793     end
 794
 795     # Returns +true+ if there are no +Element+ children, +false+ otherwise
 796     def empty?
 797       @element.find{ |child| child.kind_of? Element}.nil?
 798     end
 799
 800     # Returns the index of the supplied child (starting at 1), or -1 if
 801     # the element is not a child
 802     # element:: an +Element+ child
 803     def index element
 804       rv = 0
 805       found = @element.find do |child|
 806         child.kind_of? Element and
 807         (rv += 1) and
 808         child == element
 809       end
 810       return rv if found == element
 811       return -1
 812     end
 813
 814     # Deletes a child Element
 815     # element::
 816     #   Either an Element, which is removed directly; an
 817     #   xpath, where the first matching child is removed; or an Integer,
 818     #   where the n'th Element is removed.
 819     # Returns:: the removed child
 820     #  doc = Document.new '<a><b/><c/><c id="1"/></a>'
 821     #  b = doc.root.elements[1]
 822     #  doc.root.elements.delete b           #-> <a><c/><c id="1"/></a>
 823     #  doc.elements.delete("a/c[@id='1']")  #-> <a><c/></a>
 824     #  doc.root.elements.delete 1           #-> <a/>
 825     def delete element
 826       if element.kind_of? Element
 827         @element.delete element
 828       else
 829         el = self[element]
 830         el.remove if el
 831       end
 832     end
 833
 834     # Removes multiple elements.  Filters for Element children, regardless of
 835     # XPath matching.
 836     # xpath:: all elements matching this String path are removed.
 837     # Returns:: an Array of Elements that have been removed
 838     #  doc = Document.new '<a><c/><c/><c/><c/></a>'
 839     #  deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
 840     def delete_all( xpath )
 841       rv = []
 842       XPath::each( @element, xpath) {|element|
 843         rv << element if element.kind_of? Element
 844       }
 845       rv.each do |element|
 846         @element.delete element
 847         element.remove
 848       end
 849       return rv
 850     end
 851
 852     # Adds an element
 853     # element::
 854     #   if supplied, is either an Element, String, or
 855     #   Source (see Element.initialize).  If not supplied or nil, a
 856     #   new, default Element will be constructed
 857     # Returns:: the added Element
 858     #  a = Element.new('a')
 859     #  a.elements.add(Element.new('b'))  #-> <a><b/></a>
 860     #  a.elements.add('c')               #-> <a><b/><c/></a>
 861     def add element=nil
 862       rv = nil
 863       if element.nil?
 864         Element.new("", self, @element.context)
 865       elsif not element.kind_of?(Element)
 866         Element.new(element, self, @element.context)
 867       else
 868         @element << element
 869         element.context = @element.context
 870         element
 871       end
 872     end
 873
 874     alias :<< :add
 875
 876     # Iterates through all of the child Elements, optionally filtering
 877     # them by a given XPath
 878     # xpath::
 879     #   optional.  If supplied, this is a String XPath, and is used to
 880     #   filter the children, so that only matching children are yielded.  Note
 881     #   that XPaths are automatically filtered for Elements, so that
 882     #   non-Element children will not be yielded
 883     #  doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
 884     #  doc.root.each {|e|p e}       #-> Yields b, c, d, b, c, d elements
 885     #  doc.root.each('b') {|e|p e}  #-> Yields b, b elements
 886     #  doc.root.each('child::node()')  {|e|p e}
 887     #  #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
 888     #  XPath.each(doc.root, 'child::node()', &block)
 889     #  #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
 890     def each( xpath=nil, &block)
 891       XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
 892     end
 893
 894     def collect( xpath=nil, &block )
 895       collection = []
 896       XPath::each( @element, xpath ) {|e|
 897         collection << yield(e)  if e.kind_of?(Element)
 898       }
 899       collection
 900     end
 901
 902     def inject( xpath=nil, initial=nil, &block )
 903       first = true
 904       XPath::each( @element, xpath ) {|e|
 905         if (e.kind_of? Element)
 906           if (first and initial == nil)
 907             initial = e
 908             first = false
 909           else
 910             initial = yield( initial, e ) if e.kind_of? Element
 911           end
 912         end
 913       }
 914       initial
 915     end
 916
 917     # Returns the number of +Element+ children of the parent object.
 918     #  doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
 919     #  doc.root.size            #-> 6, 3 element and 3 text nodes
 920     #  doc.root.elements.size   #-> 3
 921     def size
 922       count = 0
 923       @element.each {|child| count+=1 if child.kind_of? Element }
 924       count
 925     end
 926
 927     # Returns an Array of Element children.  An XPath may be supplied to
 928     # filter the children.  Only Element children are returned, even if the
 929     # supplied XPath matches non-Element children.
 930     #  doc = Document.new '<a>sean<b/>elliott<c/></a>'
 931     #  doc.root.elements.to_a                  #-> [ <b/>, <c/> ]
 932     #  doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
 933     #  XPath.match(doc.root, "child::node()")  #-> [ sean, <b/>, elliott, <c/> ]
 934     def to_a( xpath=nil )
 935       rv = XPath.match( @element, xpath )
 936       return rv.find_all{|e| e.kind_of? Element} if xpath
 937       rv
 938     end
 939
 940     private
 941     # Private helper class.  Removes quotes from quoted strings
 942     def literalize name
 943       name = name[1..-2] if name[0] == ?' or name[0] == ?"               #'
 944       name
 945     end
 946   end
 947
 948   ########################################################################
 949   # ATTRIBUTES                                                           #
 950   ########################################################################
 951
 952   # A class that defines the set of Attributes of an Element and provides
 953   # operations for accessing elements in that set.
 954   class Attributes < Hash
 955     # Constructor
 956     # element:: the Element of which this is an Attribute
 957     def initialize element
 958       @element = element
 959     end
 960
 961     # Fetches an attribute value.  If you want to get the Attribute itself,
 962     # use get_attribute()
 963     # name:: an XPath attribute name.  Namespaces are relevant here.
 964     # Returns::
 965     #   the String value of the matching attribute, or +nil+ if no
 966     #   matching attribute was found.  This is the unnormalized value
 967     #   (with entities expanded).
 968     #
 969     #  doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
 970     #  doc.root.attributes['att']         #-> '<'
 971     #  doc.root.attributes['bar:att']     #-> '2'
 972     def [](name)
 973       attr = get_attribute(name)
 974       return attr.value unless attr.nil?
 975       return nil
 976     end
 977
 978     def to_a
 979       values.flatten
 980     end
 981
 982     # Returns the number of attributes the owning Element contains.
 983     #  doc = Document "<a x='1' y='2' foo:x='3'/>"
 984     #  doc.root.attributes.length        #-> 3
 985     def length
 986       c = 0
 987       each_attribute { c+=1 }
 988       c
 989     end
 990     alias :size :length
 991
 992     # Itterates over the attributes of an Element.  Yields actual Attribute
 993     # nodes, not String values.
 994     #
 995     #  doc = Document.new '<a x="1" y="2"/>'
 996     #  doc.root.attributes.each_attribute {|attr|
 997     #    p attr.expanded_name+" => "+attr.value
 998     #  }
 999     def each_attribute # :yields: attribute
1000       each_value do |val|
1001         if val.kind_of? Attribute
1002           yield val
1003         else
1004           val.each_value { |atr| yield atr }
1005         end
1006       end
1007     end
1008
1009     # Itterates over each attribute of an Element, yielding the expanded name
1010     # and value as a pair of Strings.
1011     #
1012     #  doc = Document.new '<a x="1" y="2"/>'
1013     #  doc.root.attributes.each {|name, value| p name+" => "+value }
1014     def each
1015       each_attribute do |attr|
1016         yield attr.expanded_name, attr.value
1017       end
1018     end
1019
1020     # Fetches an attribute
1021     # name::
1022     #   the name by which to search for the attribute.  Can be a
1023     #   <tt>prefix:name</tt> namespace name.
1024     # Returns:: The first matching attribute, or nil if there was none.  This
1025     # value is an Attribute node, not the String value of the attribute.
1026     #  doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
1027     #  doc.root.attributes.get_attribute("foo").value    #-> "2"
1028     #  doc.root.attributes.get_attribute("x:foo").value  #-> "1"
1029     def get_attribute( name )
1030       attr = fetch( name, nil )
1031       if attr.nil?
1032         return nil if name.nil?
1033         # Look for prefix
1034         name =~ Namespace::NAMESPLIT
1035         prefix, n = $1, $2
1036         if prefix
1037           attr = fetch( n, nil )
1038           # check prefix
1039           if attr == nil
1040           elsif attr.kind_of? Attribute
1041             return attr if prefix == attr.prefix
1042           else
1043             attr = attr[ prefix ]
1044             return attr
1045           end
1046         end
1047         element_document = @element.document
1048         if element_document and element_document.doctype
1049           expn = @element.expanded_name
1050           expn = element_document.doctype.name if expn.size == 0
1051           attr_val = element_document.doctype.attribute_of(expn, name)
1052           return Attribute.new( name, attr_val ) if attr_val
1053         end
1054         return nil
1055       end
1056       if attr.kind_of? Hash
1057         attr = attr[ @element.prefix ]
1058       end
1059       return attr
1060     end
1061
1062     # Sets an attribute, overwriting any existing attribute value by the
1063     # same name.  Namespace is significant.
1064     # name:: the name of the attribute
1065     # value::
1066     #   (optional) If supplied, the value of the attribute.  If
1067     #   nil, any existing matching attribute is deleted.
1068     # Returns::
1069     #   Owning element
1070     #  doc = Document.new "<a x:foo='1' foo='3'/>"
1071     #  doc.root.attributes['y:foo'] = '2'
1072     #  doc.root.attributes['foo'] = '4'
1073     #  doc.root.attributes['x:foo'] = nil
1074     def []=( name, value )
1075       if value.nil?             # Delete the named attribute
1076         attr = get_attribute(name)
1077         delete attr
1078         return
1079       end
1080       element_document = @element.document
1081       unless value.kind_of? Attribute
1082         if @element.document and @element.document.doctype
1083           value = Text::normalize( value, @element.document.doctype )
1084         else
1085           value = Text::normalize( value, nil )
1086         end
1087         value = Attribute.new(name, value)
1088       end
1089       value.element = @element
1090       old_attr = fetch(value.name, nil)
1091       if old_attr.nil?
1092         store(value.name, value)
1093       elsif old_attr.kind_of? Hash
1094         old_attr[value.prefix] = value
1095       elsif old_attr.prefix != value.prefix
1096         # Check for conflicting namespaces
1097         raise ParseException.new(
1098           "Namespace conflict in adding attribute \"#{value.name}\": "+
1099           "Prefix \"#{old_attr.prefix}\" = "+
1100           "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
1101           "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
1102           value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
1103           @element.namespace( old_attr.prefix ) ==
1104             @element.namespace( value.prefix )
1105           store value.name, { old_attr.prefix   => old_attr,
1106             value.prefix                => value }
1107       else
1108         store value.name, value
1109       end
1110       return @element
1111     end
1112
1113     # Returns an array of Strings containing all of the prefixes declared
1114     # by this set of # attributes.  The array does not include the default
1115     # namespace declaration, if one exists.
1116     #  doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
1117     #        "z='glorp' p:k='gru'/>")
1118     #  prefixes = doc.root.attributes.prefixes    #-> ['x', 'y']
1119     def prefixes
1120       ns = []
1121       each_attribute do |attribute|
1122         ns << attribute.name if attribute.prefix == 'xmlns'
1123       end
1124       if @element.document and @element.document.doctype
1125         expn = @element.expanded_name
1126         expn = @element.document.doctype.name if expn.size == 0
1127         @element.document.doctype.attributes_of(expn).each {
1128           |attribute|
1129           ns << attribute.name if attribute.prefix == 'xmlns'
1130         }
1131       end
1132       ns
1133     end
1134
1135     def namespaces
1136       namespaces = {}
1137       each_attribute do |attribute|
1138         namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1139       end
1140       if @element.document and @element.document.doctype
1141         expn = @element.expanded_name
1142         expn = @element.document.doctype.name if expn.size == 0
1143         @element.document.doctype.attributes_of(expn).each {
1144           |attribute|
1145           namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1146         }
1147       end
1148       namespaces
1149     end
1150
1151     # Removes an attribute
1152     # attribute::
1153     #   either a String, which is the name of the attribute to remove --
1154     #   namespaces are significant here -- or the attribute to remove.
1155     # Returns:: the owning element
1156     #  doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
1157     #  doc.root.attributes.delete 'foo'   #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
1158     #  doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
1159     #  attr = doc.root.attributes.get_attribute('y:foo')
1160     #  doc.root.attributes.delete attr    #-> <a z:foo='4'/>"
1161     def delete( attribute )
1162       name = nil
1163       prefix = nil
1164       if attribute.kind_of? Attribute
1165         name = attribute.name
1166         prefix = attribute.prefix
1167       else
1168         attribute =~ Namespace::NAMESPLIT
1169         prefix, name = $1, $2
1170         prefix = '' unless prefix
1171       end
1172       old = fetch(name, nil)
1173       attr = nil
1174       if old.kind_of? Hash # the supplied attribute is one of many
1175         attr = old.delete(prefix)
1176         if old.size == 1
1177           repl = nil
1178           old.each_value{|v| repl = v}
1179           store name, repl
1180         end
1181       elsif old.nil?
1182         return @element
1183       else # the supplied attribute is a top-level one
1184         attr = old
1185         res = super(name)
1186       end
1187       @element
1188     end
1189
1190     # Adds an attribute, overriding any existing attribute by the
1191     # same name.  Namespaces are significant.
1192     # attribute:: An Attribute
1193     def add( attribute )
1194       self[attribute.name] = attribute
1195     end
1196
1197     alias :<< :add
1198
1199     # Deletes all attributes matching a name.  Namespaces are significant.
1200     # name::
1201     #   A String; all attributes that match this path will be removed
1202     # Returns:: an Array of the Attributes that were removed
1203     def delete_all( name )
1204       rv = []
1205       each_attribute { |attribute|
1206         rv << attribute if attribute.expanded_name == name
1207       }
1208       rv.each{ |attr| attr.remove }
1209       return rv
1210     end
1211
1212     # The +get_attribute_ns+ method retrieves a method by its namespace
1213     # and name. Thus it is possible to reliably identify an attribute
1214     # even if an XML processor has changed the prefix.
1215     #
1216     # Method contributed by Henrik Martensson
1217     def get_attribute_ns(namespace, name)
1218       each_attribute() { |attribute|
1219         if name == attribute.name &&
1220           namespace == attribute.namespace()
1221           return attribute
1222         end
1223       }
1224       nil
1225     end
1226   end
1227 end