Updating tags for StringIO.
[rbx.git] / lib / rexml / entity.rb
blobff2d45f39bd9352ff30265f6bdad412d04d843cf
1 require 'rexml/child'
2 require 'rexml/source'
3 require 'rexml/xmltokens'
5 module REXML
6         # God, I hate DTDs.  I really do.  Why this idiot standard still
7         # plagues us is beyond me.
8         class Entity < Child
9                 include XMLTokens
10                 PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
11                 SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
12                 PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
13                 EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
14                 NDATADECL = "\\s+NDATA\\s+#{NAME}"
15                 PEREFERENCE = "%#{NAME};"
16                 ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
17                 PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
18                 ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
19                 PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
20                 GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
21                 ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
23                 attr_reader :name, :external, :ref, :ndata, :pubid
25                 # Create a new entity.  Simple entities can be constructed by passing a
26                 # name, value to the constructor; this creates a generic, plain entity
27                 # reference. For anything more complicated, you have to pass a Source to
28                 # the constructor with the entity definiton, or use the accessor methods.
29                 # +WARNING+: There is no validation of entity state except when the entity
30                 # is read from a stream.  If you start poking around with the accessors,
31                 # you can easily create a non-conformant Entity.  The best thing to do is
32                 # dump the stupid DTDs and use XMLSchema instead.
33                 # 
34                 #  e = Entity.new( 'amp', '&' )
35                 def initialize stream, value=nil, parent=nil, reference=false
36                         super(parent)
37                         @ndata = @pubid = @value = @external = nil
38                         if stream.kind_of? Array
39                                 @name = stream[1]
40                                 if stream[-1] == '%'
41                                         @reference = true 
42                                         stream.pop
43                                 else
44                                         @reference = false
45                                 end
46                                 if stream[2] =~ /SYSTEM|PUBLIC/
47                                         @external = stream[2]
48                                         if @external == 'SYSTEM'
49                                                 @ref = stream[3]
50                                                 @ndata = stream[4] if stream.size == 5
51                                         else
52                                                 @pubid = stream[3]
53                                                 @ref = stream[4]
54                                         end
55                                 else
56                                         @value = stream[2]
57                                 end
58                         else
59                                 @reference = reference
60                                 @external = nil
61                                 @name = stream
62                                 @value = value
63                         end
64                 end
66                 # Evaluates whether the given string matchs an entity definition,
67                 # returning true if so, and false otherwise.
68                 def Entity::matches? string
69                         (ENTITYDECL =~ string) == 0
70                 end
72                 # Evaluates to the unnormalized value of this entity; that is, replacing
73                 # all entities -- both %ent; and &ent; entities.  This differs from
74                 # +value()+ in that +value+ only replaces %ent; entities.
75                 def unnormalized
76                         v = value()
77                         return nil if v.nil?
78                         @unnormalized = Text::unnormalize(v, parent)
79                         @unnormalized
80                 end
82                 #once :unnormalized
84                 # Returns the value of this entity unprocessed -- raw.  This is the
85                 # normalized value; that is, with all %ent; and &ent; entities intact
86                 def normalized
87                         @value
88                 end
90                 # Write out a fully formed, correct entity definition (assuming the Entity
91                 # object itself is valid.)
92     #
93     # out::
94     #   An object implementing <TT>&lt;&lt;<TT> to which the entity will be
95     #   output
96     # indent::
97     #   *DEPRECATED* and ignored
98                 def write out, indent=-1
99                         out << '<!ENTITY '
100                         out << '% ' if @reference
101                         out << @name
102                         out << ' '
103                         if @external
104                                 out << @external << ' '
105                                 if @pubid
106                                         q = @pubid.include?('"')?"'":'"'
107                                         out << q << @pubid << q << ' '
108                                 end
109                                 q = @ref.include?('"')?"'":'"'
110                                 out << q << @ref << q
111                                 out << ' NDATA ' << @ndata if @ndata
112                         else
113                                 q = @value.include?('"')?"'":'"'
114                                 out << q << @value << q
115                         end
116                         out << '>'
117                 end
119                 # Returns this entity as a string.  See write().
120                 def to_s
121                         rv = ''
122                         write rv
123                         rv
124                 end
126                 PEREFERENCE_RE = /#{PEREFERENCE}/um
127                 # Returns the value of this entity.  At the moment, only internal entities
128                 # are processed.  If the value contains internal references (IE,
129                 # %blah;), those are replaced with their values.  IE, if the doctype
130                 # contains:
131                 #  <!ENTITY % foo "bar">
132                 #  <!ENTITY yada "nanoo %foo; nanoo>
133                 # then:
134                 #  doctype.entity('yada').value   #-> "nanoo bar nanoo"
135                 def value
136                         if @value
137                                 matches = @value.scan(PEREFERENCE_RE)
138                                 rv = @value.clone
139                                 if @parent
140                                         matches.each do |entity_reference|
141                                                 entity_value = @parent.entity( entity_reference[0] )
142                                                 rv.gsub!( /%#{entity_reference};/um, entity_value )
143                                         end
144                                 end
145                                 return rv
146                         end
147                         nil
148                 end
149         end
151         # This is a set of entity constants -- the ones defined in the XML
152         # specification.  These are +gt+, +lt+, +amp+, +quot+ and +apos+.
153         module EntityConst
154                 # +>+
155                 GT = Entity.new( 'gt', '>' )
156                 # +<+
157                 LT = Entity.new( 'lt', '<' )
158                 # +&+
159                 AMP = Entity.new( 'amp', '&' )
160                 # +"+
161                 QUOT = Entity.new( 'quot', '"' )
162                 # +'+
163                 APOS = Entity.new( 'apos', "'" )
164         end