Imported File#ftype spec from rubyspecs.
[rbx.git] / lib / rexml / parsers / sax2parser.rb
blobe402eb7747007083af22dbc2bc7d694cd415cc76
1 require 'rexml/parsers/baseparser'
2 require 'rexml/parseexception'
3 require 'rexml/namespace'
4 require 'rexml/text'
6 module REXML
7         module Parsers
8     # SAX2Parser
9                 class SAX2Parser
10                         def initialize source
11                                 @parser = BaseParser.new(source)
12                                 @listeners = []
13                                 @procs = []
14                                 @namespace_stack = []
15                                 @has_listeners = false
16                                 @tag_stack = []
17         @entities = {}
18                         end
20       def source
21         @parser.source
22       end
23                         
24       def add_listener( listener )
25         @parser.add_listener( listener )
26       end
28                         # Listen arguments:
29                         #
30                         # Symbol, Array, Block
31                         #       Listen to Symbol events on Array elements
32                         # Symbol, Block
33                         #   Listen to Symbol events
34                         # Array, Listener
35                         #       Listen to all events on Array elements
36                         # Array, Block
37                         #       Listen to :start_element events on Array elements
38                         # Listener
39                         #       Listen to All events
40                         #
41                         # Symbol can be one of: :start_element, :end_element,
42                         # :start_prefix_mapping, :end_prefix_mapping, :characters,
43                         # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
44                         # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
45       #
46       # There is an additional symbol that can be listened for: :progress.
47       # This will be called for every event generated, passing in the current 
48       # stream position.
49                         #
50                         # Array contains regular expressions or strings which will be matched
51                         # against fully qualified element names.
52                         #
53                         # Listener must implement the methods in SAX2Listener
54                         #
55                         # Block will be passed the same arguments as a SAX2Listener method would
56                         # be, where the method name is the same as the matched Symbol.
57                         # See the SAX2Listener for more information.
58                         def listen( *args, &blok )
59                                 if args[0].kind_of? Symbol
60                                         if args.size == 2
61                                                 args[1].each { |match| @procs << [args[0], match, blok] }
62                                         else
63                                                 add( [args[0], nil, blok] )
64                                         end
65                                 elsif args[0].kind_of? Array
66                                         if args.size == 2
67                                                 args[0].each { |match| add( [nil, match, args[1]] ) }
68                                         else
69                                                 args[0].each { |match| add( [ :start_element, match, blok ] ) }
70                                         end
71                                 else
72                                         add([nil, nil, args[0]])
73                                 end
74                         end
75                         
76                         def deafen( listener=nil, &blok )
77                                 if listener
78                                         @listeners.delete_if {|item| item[-1] == listener }
79                                         @has_listeners = false if @listeners.size == 0
80                                 else
81                                         @procs.delete_if {|item| item[-1] == blok }
82                                 end
83                         end
84                         
85                         def parse
86                                 @procs.each { |sym,match,block| block.call if sym == :start_document }
87                                 @listeners.each { |sym,match,block| 
88                                         block.start_document if sym == :start_document or sym.nil?
89                                 }
90                                 root = context = []
91                                 while true
92                                         event = @parser.pull
93                                         case event[0]
94                                         when :end_document
95                                                 handle( :end_document )
96                                                 break
97           when :start_doctype
98             handle( :doctype, *event[1..-1])
99                                         when :end_doctype
100                                                 context = context[1]
101                                         when :start_element
102                                                 @tag_stack.push(event[1])
103                                                 # find the observers for namespaces
104                                                 procs = get_procs( :start_prefix_mapping, event[1] )
105                                                 listeners = get_listeners( :start_prefix_mapping, event[1] )
106                                                 if procs or listeners
107                                                         # break out the namespace declarations
108                                                         # The attributes live in event[2]
109                                                         event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
110                                                         nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
111                                                         nsdecl.collect! { |n, value| [ n[6..-1], value ] }
112                                                         @namespace_stack.push({})
113                                                         nsdecl.each do |n,v|
114                                                                 @namespace_stack[-1][n] = v
115                                                                 # notify observers of namespaces
116                                                                 procs.each { |ob| ob.call( n, v ) } if procs
117                                                                 listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
118                                                         end
119                                                 end
120                                                 event[1] =~ Namespace::NAMESPLIT
121                                                 prefix = $1
122                                                 local = $2
123                                                 uri = get_namespace(prefix)
124                                                 # find the observers for start_element
125                                                 procs = get_procs( :start_element, event[1] )
126                                                 listeners = get_listeners( :start_element, event[1] )
127                                                 # notify observers
128                                                 procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
129                                                 listeners.each { |ob| 
130                                                         ob.start_element( uri, local, event[1], event[2] ) 
131                                                 } if listeners
132                                         when :end_element
133                                                 @tag_stack.pop
134                                                 event[1] =~ Namespace::NAMESPLIT
135                                                 prefix = $1
136                                                 local = $2
137                                                 uri = get_namespace(prefix)
138                                                 # find the observers for start_element
139                                                 procs = get_procs( :end_element, event[1] )
140                                                 listeners = get_listeners( :end_element, event[1] )
141                                                 # notify observers
142                                                 procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
143                                                 listeners.each { |ob| 
144                                                         ob.end_element( uri, local, event[1] ) 
145                                                 } if listeners
147                                                 namespace_mapping = @namespace_stack.pop
148                                                 # find the observers for namespaces
149                                                 procs = get_procs( :end_prefix_mapping, event[1] )
150                                                 listeners = get_listeners( :end_prefix_mapping, event[1] )
151                                                 if procs or listeners
152                                                         namespace_mapping.each do |prefix, uri|
153                                                                 # notify observers of namespaces
154                                                                 procs.each { |ob| ob.call( prefix ) } if procs
155                                                                 listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
156                                                         end
157                                                 end
158                                         when :text
159             #normalized = @parser.normalize( event[1] )
160             #handle( :characters, normalized )
161             copy = event[1].clone
162             @entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
163             copy.gsub!( Text::NUMERICENTITY ) {|m|
164               m=$1
165               m = "0#{m}" if m[0] == ?x
166               [Integer(m)].pack('U*')
167             }
168             handle( :characters, copy )
169           when :entitydecl
170             @entities[ event[1] ] = event[2] if event.size == 3
171                                                 handle( *event )
172                                         when :processing_instruction, :comment, :attlistdecl, 
173                                                 :elementdecl, :cdata, :notationdecl, :xmldecl
174                                                 handle( *event )
175                                         end
176           handle( :progress, @parser.position )
177                                 end
178                         end
180                         private
181                         def handle( symbol, *arguments )
182                                 tag = @tag_stack[-1]
183                                 procs = get_procs( symbol, tag )
184                                 listeners = get_listeners( symbol, tag )
185                                 # notify observers
186                                 procs.each { |ob| ob.call( *arguments ) } if procs
187                                 listeners.each { |l| 
188                                         l.send( symbol.to_s, *arguments ) 
189                                 } if listeners
190                         end
192                         # The following methods are duplicates, but it is faster than using
193                         # a helper
194                         def get_procs( symbol, name )
195                                 return nil if @procs.size == 0
196                                 @procs.find_all do |sym, match, block|
197           #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
198                                         (
199                                                 (sym.nil? or symbol == sym) and 
200                                                 ((name.nil? and match.nil?) or match.nil? or (
201                                                         (name == match) or
202                                                         (match.kind_of? Regexp and name =~ match)
203                                                         )
204                                                 )
205                                         )
206                                 end.collect{|x| x[-1]}
207                         end
208                         def get_listeners( symbol, name )
209                                 return nil if @listeners.size == 0
210                                 @listeners.find_all do |sym, match, block|
211                                         (
212                                                 (sym.nil? or symbol == sym) and 
213                                                 ((name.nil? and match.nil?) or match.nil? or (
214                                                         (name == match) or
215                                                         (match.kind_of? Regexp and name =~ match)
216                                                         )
217                                                 )
218                                         )
219                                 end.collect{|x| x[-1]}
220                         end
222                         def add( pair )
223                                 if pair[-1].respond_to? :call
224                                         @procs << pair unless @procs.include? pair
225                                 else
226                                         @listeners << pair unless @listeners.include? pair
227                                         @has_listeners = true
228                                 end
229                         end
231                         def get_namespace( prefix ) 
232         uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
233                                         (@namespace_stack.find { |ns| not ns[nil].nil? })
234                                 uris[-1][prefix] unless uris.nil? or 0 == uris.size
235                         end
236                 end
237         end