7 alias open_uri_original_open open # :nodoc:
9 # makes possible to open various resources including URIs.
10 # If the first argument respond to `open' method,
11 # the method is called with the rest arguments.
13 # If the first argument is a string which begins with xxx://,
14 # it is parsed by URI.parse. If the parsed object respond to `open' method,
15 # the method is called with the rest arguments.
17 # Otherwise original open is called.
19 # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
21 # Kernel[#.]open can accepts such URIs and strings which begins with
22 # http://, https:// and ftp://.
23 # In these case, the opened file object is extended by OpenURI::Meta.
24 def open(name, *rest, &block) # :doc:
25 if name.respond_to?(:open)
26 name.open(*rest, &block)
27 elsif name.respond_to?(:to_str) &&
28 %r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
29 (uri = URI.parse(name)).respond_to?(:open)
30 uri.open(*rest, &block)
32 open_uri_original_open(name, *rest, &block)
38 # OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp.
42 # It is possible to open http/https/ftp URL as usual like opening a file:
44 # open("http://www.ruby-lang.org/") {|f|
45 # f.each_line {|line| p line}
48 # The opened file has several methods for meta information as follows since
49 # it is extended by OpenURI::Meta.
51 # open("http://www.ruby-lang.org/en") {|f|
52 # f.each_line {|line| p line}
53 # p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
54 # p f.content_type # "text/html"
55 # p f.charset # "iso-8859-1"
56 # p f.content_encoding # []
57 # p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
60 # Additional header fields can be specified by an optional hash argument.
62 # open("http://www.ruby-lang.org/en/",
63 # "User-Agent" => "Ruby/#{RUBY_VERSION}",
64 # "From" => "foo@bar.invalid",
65 # "Referer" => "http://www.ruby-lang.org/") {|f|
69 # The environment variables such as http_proxy, https_proxy and ftp_proxy
70 # are in effect by default. :proxy => nil disables proxy.
72 # open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f|
76 # URI objects can be opened in a similar way.
78 # uri = URI.parse("http://www.ruby-lang.org/en/")
83 # URI objects can be read directly. The returned string is also extended by
89 # Author:: Tanaka Akira <akr@m17n.org>
94 :progress_proc => true,
95 :content_length_proc => true,
96 :http_basic_authentication => true,
99 def OpenURI.check_options(options) # :nodoc:
101 next unless Symbol === k
102 unless Options.include? k
103 raise ArgumentError, "unrecognized option: #{k}"
108 def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
109 if !rest.empty? && (String === rest.first || Integer === rest.first)
111 if !rest.empty? && Integer === rest.first
115 return mode, perm, rest
118 def OpenURI.open_uri(name, *rest) # :nodoc:
119 uri = URI::Generic === name ? name : URI.parse(name)
120 mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
121 options = rest.shift if !rest.empty? && Hash === rest.first
122 raise ArgumentError.new("extra arguments") if !rest.empty?
124 OpenURI.check_options(options)
126 unless mode == nil ||
127 mode == 'r' || mode == 'rb' ||
129 raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
132 io = open_loop(uri, options)
144 def OpenURI.open_loop(uri, options) # :nodoc:
145 case opt_proxy = options.fetch(:proxy, true)
147 find_proxy = lambda {|u| u.find_proxy}
149 find_proxy = lambda {|u| nil}
151 opt_proxy = URI.parse(opt_proxy)
152 find_proxy = lambda {|u| opt_proxy}
154 find_proxy = lambda {|u| opt_proxy}
156 raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
162 redirect = catch(:open_uri_redirect) {
164 uri.buffer_open(buf, find_proxy.call(uri), options)
168 if redirect.relative?
169 # Although it violates RFC2616, Location: field may have relative
170 # URI. It is converted to absolute URI using uri as a base URI.
171 redirect = uri + redirect
173 unless OpenURI.redirectable?(uri, redirect)
174 raise "redirection forbidden: #{uri} -> #{redirect}"
176 if options.include? :http_basic_authentication
177 # send authentication only for the URI directly specified.
178 options = options.dup
179 options.delete :http_basic_authentication
182 raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
183 uri_set[uri.to_s] = true
193 def OpenURI.redirectable?(uri1, uri2) # :nodoc:
194 # This test is intended to forbid a redirection from http://... to
195 # file:///etc/passwd.
196 # However this is ad hoc. It should be extensible/configurable.
197 uri1.scheme.downcase == uri2.scheme.downcase ||
198 (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
201 def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
203 raise "Non-HTTP proxy URI: #{proxy}" if proxy.class != URI::HTTP
206 if target.userinfo && "1.9.0" <= RUBY_VERSION
207 # don't raise for 1.8 because compatibility.
208 raise ArgumentError, "userinfo not supported. [RFC3986]"
213 if URI::HTTP === target
216 klass = Net::HTTP::Proxy(proxy.host, proxy.port)
218 target_host = target.host
219 target_port = target.port
220 request_uri = target.request_uri
222 # FTP over HTTP proxy
223 target_host = proxy.host
224 target_port = proxy.port
225 request_uri = target.to_s
228 http = klass.new(target_host, target_port)
229 if target.class == URI::HTTPS
232 http.verify_mode = OpenSSL::SSL::VERIFY_PEER
233 store = OpenSSL::X509::Store.new
234 store.set_default_paths
235 http.cert_store = store
239 options.each {|k, v| header[k] = v if String === k }
243 if target.class == URI::HTTPS
244 # xxx: information hiding violation
245 sock = http.instance_variable_get(:@socket)
246 if sock.respond_to?(:io)
249 sock = sock.instance_variable_get(:@socket) # 1.8
251 sock.post_connection_check(target_host)
253 req = Net::HTTP::Get.new(request_uri, header)
254 if options.include? :http_basic_authentication
255 user, pass = options[:http_basic_authentication]
256 req.basic_auth user, pass
258 http.request(req) {|response|
260 if options[:content_length_proc] && Net::HTTPSuccess === resp
261 if resp.key?('Content-Length')
262 options[:content_length_proc].call(resp['Content-Length'].to_i)
264 options[:content_length_proc].call(nil)
267 resp.read_body {|str|
269 if options[:progress_proc] && Net::HTTPSuccess === resp
270 options[:progress_proc].call(buf.size)
277 io.status = [resp.code, resp.message]
278 resp.each {|name,value| buf.io.meta_add_field name, value }
280 when Net::HTTPSuccess
281 when Net::HTTPMovedPermanently, # 301
282 Net::HTTPFound, # 302
283 Net::HTTPSeeOther, # 303
284 Net::HTTPTemporaryRedirect # 307
285 throw :open_uri_redirect, URI.parse(resp['location'])
287 raise OpenURI::HTTPError.new(io.status.join(' '), io)
291 class HTTPError < StandardError
292 def initialize(message, io)
299 class Buffer # :nodoc:
310 if StringIO === @io && StringMax < @size
312 io = Tempfile.new('open-uri')
314 Meta.init io, @io if @io.respond_to? :meta
321 Meta.init @io unless @io.respond_to? :meta
326 # Mixin for holding meta-information.
328 def Meta.init(obj, src=nil) # :nodoc:
335 obj.status = src.status
336 obj.base_uri = src.base_uri
337 src.meta.each {|name, value|
338 obj.meta_add_field(name, value)
343 # returns an Array which consists status code and message.
344 attr_accessor :status
346 # returns a URI which is base of relative URIs in the data.
347 # It may differ from the URI supplied by a user because redirection.
348 attr_accessor :base_uri
350 # returns a Hash which represents header fields.
351 # The Hash keys are downcased for canonicalization.
354 def meta_add_field(name, value) # :nodoc:
355 @meta[name.downcase] = value
358 # returns a Time which represents Last-Modified field.
360 if v = @meta['last-modified']
367 RE_LWS = /[\r\n\t ]+/n
368 RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
369 RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
370 RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
372 def content_type_parse # :nodoc:
373 v = @meta['content-type']
374 # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
375 if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v
377 subtype = $2.downcase
379 $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
380 val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/) { $1 ? $1[1,1] : $& } if qval
381 parameters << [att.downcase, val]
383 ["#{type}/#{subtype}", *parameters]
389 # returns "type/subtype" which is MIME Content-Type.
390 # It is downcased for canonicalization.
391 # Content-Type parameters are stripped.
393 type, *parameters = content_type_parse
394 type || 'application/octet-stream'
397 # returns a charset parameter in Content-Type field.
398 # It is downcased for canonicalization.
400 # If charset parameter is not given but a block is given,
401 # the block is called and its result is returned.
402 # It can be used to guess charset.
404 # If charset parameter and block is not given,
405 # nil is returned except text type in HTTP.
406 # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
408 type, *parameters = content_type_parse
409 if pair = parameters.assoc('charset')
413 elsif type && %r{\Atext/} =~ type &&
414 @base_uri && /\Ahttp\z/i =~ @base_uri.scheme
415 "iso-8859-1" # RFC2616 3.7.1
421 # returns a list of encodings in Content-Encoding field
422 # as an Array of String.
423 # The encodings are downcased for canonicalization.
425 v = @meta['content-encoding']
426 if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
427 v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
434 # Mixin for HTTP and FTP URIs.
436 # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
438 # OpenURI::OpenRead#open takes optional 3 arguments as:
439 # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
441 # `mode', `perm' is same as Kernel#open.
443 # However, `mode' must be read mode because OpenURI::OpenRead#open doesn't
444 # support write mode (yet).
445 # Also `perm' is just ignored because it is meaningful only for file
448 # `options' must be a hash.
450 # Each pairs which key is a string in the hash specify a extra header
452 # I.e. it is ignored for FTP without HTTP proxy.
454 # The hash may include other options which key is a symbol:
458 # :proxy => "http://proxy.foo.com:8000/"
459 # :proxy => URI.parse("http://proxy.foo.com:8000/")
464 # If :proxy option is specified, the value should be String, URI,
466 # When String or URI is given, it is treated as proxy URI.
467 # When true is given or the option itself is not specified,
468 # environment variable `scheme_proxy' is examined.
469 # `scheme' is replaced by `http', `https' or `ftp'.
470 # When false or nil is given, the environment variables are ignored and
471 # connection will be made to a server directly.
473 # [:http_basic_authentication]
475 # :http_basic_authentication=>[user, password]
477 # If :http_basic_authentication is specified,
478 # the value should be an array which contains 2 strings:
479 # username and password.
480 # It is used for HTTP Basic authentication defined by RFC 2617.
482 # [:content_length_proc]
484 # :content_length_proc => lambda {|content_length| ... }
486 # If :content_length_proc option is specified, the option value procedure
487 # is called before actual transfer is started.
488 # It takes one argument which is expected content length in bytes.
490 # If two or more transfer is done by HTTP redirection, the procedure
491 # is called only one for a last transfer.
493 # When expected content length is unknown, the procedure is called with
495 # It is happen when HTTP response has no Content-Length header.
499 # :progress_proc => lambda {|size| ...}
501 # If :progress_proc option is specified, the proc is called with one
502 # argument each time when `open' gets content fragment from network.
503 # The argument `size' `size' is a accumulated transfered size in bytes.
505 # If two or more transfer is done by HTTP redirection, the procedure
506 # is called only one for a last transfer.
508 # :progress_proc and :content_length_proc are intended to be used for
510 # For example, it can be implemented as follows using Ruby/ProgressBar.
514 # :content_length_proc => lambda {|t|
516 # pbar = ProgressBar.new("...", t)
517 # pbar.file_transfer_mode
520 # :progress_proc => lambda {|s|
524 # OpenURI::OpenRead#open returns an IO like object if block is not given.
525 # Otherwise it yields the IO object and return the value of the block.
526 # The IO object is extended with OpenURI::Meta.
527 def open(*rest, &block)
528 OpenURI.open_uri(self, *rest, &block)
531 # OpenURI::OpenRead#read([options]) reads a content referenced by self and
532 # returns the content as string.
533 # The string is extended with OpenURI::Meta.
534 # The argument `options' is same as OpenURI::OpenRead#open.
536 self.open(options) {|f|
547 # returns a proxy URI.
548 # The proxy URI is obtained from environment variables such as http_proxy,
549 # ftp_proxy, no_proxy, etc.
550 # If there is no proper proxy, nil is returned.
552 # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
555 # But http_proxy and HTTP_PROXY is treated specially under CGI environment.
556 # It's because HTTP_PROXY may be set by Proxy: header.
557 # So HTTP_PROXY is not used.
558 # http_proxy is not used too if the variable is case insensitive.
559 # CGI_HTTP_PROXY can be used instead.
561 name = self.scheme.downcase + '_proxy'
563 if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
564 # HTTP_PROXY conflicts with *_proxy for proxy settings and
565 # HTTP_* for header information in CGI.
566 # So it should be careful to use it.
567 pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
569 when 0 # no proxy setting anyway.
573 if k == 'http_proxy' && ENV[k.upcase] == nil
574 # http_proxy is safe to use because ENV is case sensitive.
575 proxy_uri = ENV[name]
579 else # http_proxy is safe to use because ENV is case sensitive.
580 proxy_uri = ENV[name]
583 # Use CGI_HTTP_PROXY. cf. libwww-perl.
584 proxy_uri = ENV["CGI_#{name.upcase}"]
586 elsif name == 'http_proxy'
587 unless proxy_uri = ENV[name]
588 if proxy_uri = ENV[name.upcase]
589 warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.'
593 proxy_uri = ENV[name] || ENV[name.upcase]
596 if proxy_uri && self.host
599 addr = IPSocket.getaddress(self.host)
600 proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr
606 proxy_uri = URI.parse(proxy_uri)
608 if no_proxy = ENV[name] || ENV[name.upcase]
609 no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
610 if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
611 (!port || self.port == port.to_i)
625 def buffer_open(buf, proxy, options) # :nodoc:
626 OpenURI.open_http(buf, self, proxy, options)
629 include OpenURI::OpenRead
633 def buffer_open(buf, proxy, options) # :nodoc:
635 OpenURI.open_http(buf, self, proxy, options)
640 directories = self.path.split(%r{/}, -1)
641 directories.shift if directories[0] == '' # strip a field before leading slash
642 directories.each {|d|
643 d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
645 unless filename = directories.pop
646 raise ArgumentError, "no filename: #{self.inspect}"
648 directories.each {|d|
650 raise ArgumentError, "invalid directory: #{d.inspect}"
653 if /[\r\n]/ =~ filename
654 raise ArgumentError, "invalid filename: #{filename.inspect}"
656 typecode = self.typecode
657 if typecode && /\A[aid]\z/ !~ typecode
658 raise ArgumentError, "invalid typecode: #{typecode.inspect}"
661 # The access sequence is defined by RFC 1738
662 ftp = Net::FTP.open(self.host)
663 # todo: extract user/passwd from .netrc.
666 user, passwd = self.userinfo.split(/:/) if self.userinfo
667 ftp.login(user, passwd)
668 directories.each {|cwd|
669 ftp.voidcmd("CWD #{cwd}")
672 # xxx: typecode D is not handled.
673 ftp.voidcmd("TYPE #{typecode.upcase}")
675 if options[:content_length_proc]
676 options[:content_length_proc].call(ftp.size(filename))
678 ftp.retrbinary("RETR #{filename}", 4096) { |str|
680 options[:progress_proc].call(buf.size) if options[:progress_proc]
686 include OpenURI::OpenRead