1 # depends on: class.rb string.rb
5 ivar_as_index :__ivars__ => 0, :source => 1, :data => 2, :names => 3
6 def __ivars__; @__ivars__ ; end
7 def source ; @source ; end
9 def names ; @names ; end
11 ValidOptions = ['m','i','x']
12 ValidKcode = [?n,?e,?s,?u]
13 KcodeValue = [16,32,48,64]
28 # Constructs a new regular expression from the given pattern. The pattern
29 # may either be a String or a Regexp. If given a Regexp, options are copied
30 # from the pattern and any options given are not honoured. If the pattern is
31 # a String, additional options may be given.
33 # The first optional argument can either be a Fixnum representing one or
34 # more of the Regexp options ORed together (Regexp::IGNORECASE, EXTENDED and
35 # MULTILINE) or a flag to toggle case sensitivity. If opts is nil or false,
36 # the match is case sensitive. If opts is any non-nil, non-false and
37 # non-Fixnum object, its presence makes the regexp case insensitive (the obj
38 # is not used in any way.)
40 # The second optional argument can be used to enable multibyte support
41 # (which is disabled by default.) The flag must be one of the following
42 # strings in any combination of upper- and lowercase:
44 # * 'e', 'euc' for EUC
45 # * 's', 'sjis' for SJIS
46 # * 'u', 'utf8' for UTF-8
48 # You may also explicitly pass in 'n', 'N' or 'none' to disable multibyte
49 # support. Any other values are ignored.
51 def self.new(pattern, opts = nil, lang = nil)
52 if pattern.is_a?(Regexp)
53 opts = pattern.options
54 pattern = pattern.source
55 elsif opts.kind_of?(Fixnum)
56 opts = opts & (OPTION_MASK | KCODE_MASK) if opts > 0
63 if opts and lang and lang.kind_of?(String)
65 idx = ValidKcode.index(lang.downcase[0])
66 opts |= KcodeValue[idx] if idx
69 if self.class.equal? Regexp
70 return __regexp_new__(pattern, opts)
72 r = __regexp_new__(pattern, opts)
73 r.send :initialize, pattern, opts, lang
79 # FIXME - Optimize me using String#[], String#chr, etc.
80 # Do away with the control-character comparisons.
84 meta = %w![ ] { } ( ) | - * . \\ ? + ^ $ #!
86 str.codepoints.each do |c|
87 quoted << if meta.include?(c)
107 alias_method :compile, :new
108 alias_method :quote, :escape
112 # See Regexp.new. This may be overridden by subclasses.
114 def initialize(arg, opts, lang)
118 def self.last_match(field = nil)
119 match = MethodContext.current.sender.last_match
121 return match if field.nil?
128 def self.last_match=(match)
129 # Set an ivar in the sender of our sender
130 parent = MethodContext.current.sender
132 ctx.last_match = match
136 # Different than last_match= because it sets the current last match, while
137 # last_match= sets the senders last match.
139 def self.my_last_match=(match)
140 # Set an ivar in the sender
141 ctx = MethodContext.current.sender
142 ctx.last_match = match
145 def self.union(*patterns)
146 if patterns.nil? || patterns.length == 0
151 patterns.each do |pattern|
152 string += '|' if flag
153 string += pattern.to_s
156 return Regexp.new(string)
162 if !line.is_a?(String)
163 Regexp.last_match = nil
166 res = self.match(line)
167 return res.nil? ? nil : res.begin(0)
170 # Returns the index of the first character in the region that
171 # matched or nil if there was no match. See #match for returning
172 # the MatchData instead.
174 # unless str.nil? because it's nil and only nil, not false.
175 str = StringValue(str) unless str.nil?
177 match = match_from(str, 0)
179 Regexp.last_match = match
180 return match.begin(0)
182 Regexp.last_match = nil
190 while(match = self.match_from(str, start))
202 if !other.is_a?(String)
203 if !other.respond_to?(:to_str)
204 Regexp.last_match = nil
208 if match = self.match_from(other.to_str, 0)
209 Regexp.last_match = match
212 Regexp.last_match = nil
218 (options & IGNORECASE) > 0 ? true : false
222 return false unless other.kind_of?(Regexp)
223 # Ruby 1.8 doesn't destinguish between KCODE_NONE (16) & not specified (0) for eql?
224 self_options = options & KCODE_MASK != 0 ? options : options + KCODE_NONE
225 other_options = other.options & KCODE_MASK != 0 ? other.options : other.options + KCODE_NONE
226 return (source == other.source) && ( self_options == other_options)
229 alias_method :==, :eql?
232 str = '/' << source << '/' << option_to_string(options)
233 if options & KCODE_MASK == 0
242 str = '/' << source.gsub("/", "\\/") << '/' << option_to_string(options)
244 str << k[0,1] if k and k != "none"
249 lang = options & KCODE_MASK
250 return "none" if lang == KCODE_NONE
251 return "euc" if lang == KCODE_EUC
252 return 'sjis' if lang == KCODE_SJIS
253 return 'utf8' if lang == KCODE_UTF8
257 # Performs normal match and returns MatchData object from $~ or nil.
259 return nil if str.nil?
260 Regexp.last_match = search_region(str, 0, str.size, true)
263 def match_from(str, count)
264 return nil if str.nil?
265 search_region(str, count, str.size, true)
277 if (len - idx) > 4 && pattern[idx,2] == "(?"
280 offset = get_option_string_length(pattern[idx..-1])
282 option |= string_to_option(pattern[idx, offset])
286 if pattern[idx,1] == '-'
288 offset = get_option_string_length(pattern[idx..-1])
290 option &= ~string_to_option(pattern[idx, offset])
295 if pattern[idx..1] == ')'
298 elsif pattern[idx,1] == ':' && pattern[-1,1] == ')'
300 if !Regexp.new(pattern[idx..-2], 0).is_a?(Regexp)
301 option = self.options
311 string << option_to_string(option)
312 if (option & OPTION_MASK) != OPTION_MASK
313 string << '-' << option_to_string(~option)
315 string << ':' << pattern[0..endpt] << ')'
318 def get_option_string_length(string)
320 while idx < string.length do
321 if !ValidOptions.include?(string[idx,1])
329 def option_to_string(option)
331 string << 'm' if (option & MULTILINE) > 0
332 string << 'i' if (option & IGNORECASE) > 0
333 string << 'x' if (option & EXTENDED) > 0
341 ivar_as_index :__ivars__ => 0, :source => 1, :regexp => 2, :full => 3, :region => 4
356 return full.at(0) if idx == 0
357 return @region.at(idx - 1).at(0)
361 return full.at(1) if idx == 0
362 @region.at(idx - 1).at(1)
367 out << self.begin(idx)
378 @region.each do |tup|
385 out << @source[x, y-x]
392 return "" if full.at(0) == 0
397 def pre_match_from(idx)
398 return "" if full.at(0) == 0
400 @source[idx, nd-idx+1]
404 self.begin(0) == self.end(0)
408 nd = @source.size - 1
413 def [](idx, len = nil)
415 return to_a[idx, len]
416 elsif idx.is_a?(Symbol)
417 num = @regexp.names[idx]
418 raise ArgumentError, "Unknown named group '#{idx}'" unless num
419 return get_capture(num)
420 elsif !idx.is_a?(Integer) or idx < 0
425 return matched_area()
427 return get_capture(idx - 1)
436 "#<MatchData:0x#{object_id.to_s(16)} \"#{matched_area}\">"
441 raise LocalJumpError, "no block given"
446 out << ma if yield ma
448 each_capture do |str|
456 alias_method :size, :length
460 ary.unshift matched_area()
464 def values_at(*indexes)
465 indexes.map { |i| self[i] }
474 private :matched_area
478 return nil if !y or x == -1
480 return @source[x, y-x]
486 @region.each do |tup|
488 yield @source[x, y-x]
492 private :each_capture