7 alias open_uri_original_open open # :nodoc:
9 alias open_uri_original_open open # :nodoc:
12 # makes possible to open various resources including URIs.
13 # If the first argument respond to `open' method,
14 # the method is called with the rest arguments.
16 # If the first argument is a string which begins with xxx://,
17 # it is parsed by URI.parse. If the parsed object respond to `open' method,
18 # the method is called with the rest arguments.
20 # Otherwise original open is called.
22 # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
24 # Kernel[#.]open can accepts such URIs and strings which begins with
25 # http://, https:// and ftp://.
26 # In these case, the opened file object is extended by OpenURI::Meta.
27 def open(name, *rest, &block) # :doc:
28 if name.respond_to?(:open)
29 name.open(*rest, &block)
30 elsif name.respond_to?(:to_str) &&
31 %r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
32 (uri = URI.parse(name)).respond_to?(:open)
33 uri.open(*rest, &block)
35 open_uri_original_open(name, *rest, &block)
41 # OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp.
45 # It is possible to open http/https/ftp URL as usual like opening a file:
47 # open("http://www.ruby-lang.org/") {|f|
48 # f.each_line {|line| p line}
51 # The opened file has several methods for meta information as follows since
52 # it is extended by OpenURI::Meta.
54 # open("http://www.ruby-lang.org/en") {|f|
55 # f.each_line {|line| p line}
56 # p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
57 # p f.content_type # "text/html"
58 # p f.charset # "iso-8859-1"
59 # p f.content_encoding # []
60 # p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
63 # Additional header fields can be specified by an optional hash argument.
65 # open("http://www.ruby-lang.org/en/",
66 # "User-Agent" => "Ruby/#{RUBY_VERSION}",
67 # "From" => "foo@bar.invalid",
68 # "Referer" => "http://www.ruby-lang.org/") {|f|
72 # The environment variables such as http_proxy, https_proxy and ftp_proxy
73 # are in effect by default. :proxy => nil disables proxy.
75 # open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f|
79 # URI objects can be opened in a similar way.
81 # uri = URI.parse("http://www.ruby-lang.org/en/")
86 # URI objects can be read directly. The returned string is also extended by
92 # Author:: Tanaka Akira <akr@m17n.org>
97 :proxy_http_basic_authentication => true,
98 :progress_proc => true,
99 :content_length_proc => true,
100 :http_basic_authentication => true,
101 :read_timeout => true,
103 :ssl_verify_mode => nil,
104 :ftp_active_mode => false,
108 def OpenURI.check_options(options) # :nodoc:
110 next unless Symbol === k
111 unless Options.include? k
112 raise ArgumentError, "unrecognized option: #{k}"
117 def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
118 if !rest.empty? && (String === rest.first || Integer === rest.first)
120 if !rest.empty? && Integer === rest.first
124 return mode, perm, rest
127 def OpenURI.open_uri(name, *rest) # :nodoc:
128 uri = URI::Generic === name ? name : URI.parse(name)
129 mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
130 options = rest.shift if !rest.empty? && Hash === rest.first
131 raise ArgumentError.new("extra arguments") if !rest.empty?
133 OpenURI.check_options(options)
135 unless mode == nil ||
136 mode == 'r' || mode == 'rb' ||
138 raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
141 io = open_loop(uri, options)
153 def OpenURI.open_loop(uri, options) # :nodoc:
155 proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
156 proxy_opts << :proxy if options.include? :proxy
158 if 1 < proxy_opts.length
159 raise ArgumentError, "multiple proxy options specified"
161 case proxy_opts.first
162 when :proxy_http_basic_authentication
163 opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
164 proxy_user = proxy_user.to_str
165 proxy_pass = proxy_pass.to_str
167 raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
170 opt_proxy = options.fetch(:proxy)
180 find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
182 find_proxy = lambda {|u| nil}
184 opt_proxy = URI.parse(opt_proxy)
185 find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
187 find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
189 raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
195 redirect = catch(:open_uri_redirect) {
197 uri.buffer_open(buf, find_proxy.call(uri), options)
201 if redirect.relative?
202 # Although it violates RFC2616, Location: field may have relative
203 # URI. It is converted to absolute URI using uri as a base URI.
204 redirect = uri + redirect
206 if !options.fetch(:redirect, true)
207 raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
209 unless OpenURI.redirectable?(uri, redirect)
210 raise "redirection forbidden: #{uri} -> #{redirect}"
212 if options.include? :http_basic_authentication
213 # send authentication only for the URI directly specified.
214 options = options.dup
215 options.delete :http_basic_authentication
218 raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
219 uri_set[uri.to_s] = true
229 def OpenURI.redirectable?(uri1, uri2) # :nodoc:
230 # This test is intended to forbid a redirection from http://... to
231 # file:///etc/passwd.
232 # https to http redirect is also forbidden intentionally.
233 # It avoids sending secure cookie or referer by non-secure HTTP protocol.
234 # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
235 # However this is ad hoc. It should be extensible/configurable.
236 uri1.scheme.downcase == uri2.scheme.downcase ||
237 (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
240 def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
242 proxy_uri, proxy_user, proxy_pass = proxy
243 raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
246 if target.userinfo && "1.9.0" <= RUBY_VERSION
247 # don't raise for 1.8 because compatibility.
248 raise ArgumentError, "userinfo not supported. [RFC3986]"
252 options.each {|k, v| header[k] = v if String === k }
256 if URI::HTTP === target
259 if proxy_user && proxy_pass
260 klass = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port, proxy_user, proxy_pass)
262 klass = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port)
265 target_host = target.host
266 target_port = target.port
267 request_uri = target.request_uri
269 # FTP over HTTP proxy
270 target_host = proxy_uri.host
271 target_port = proxy_uri.port
272 request_uri = target.to_s
273 if proxy_user && proxy_pass
274 header["Proxy-Authorization"] = 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m').delete("\r\n")
278 http = klass.new(target_host, target_port)
279 if target.class == URI::HTTPS
282 http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
283 store = OpenSSL::X509::Store.new
284 if options[:ssl_ca_cert]
285 if File.directory? options[:ssl_ca_cert]
286 store.add_path options[:ssl_ca_cert]
288 store.add_file options[:ssl_ca_cert]
291 store.set_default_paths
293 http.cert_store = store
295 if options.include? :read_timeout
296 http.read_timeout = options[:read_timeout]
301 req = Net::HTTP::Get.new(request_uri, header)
302 if options.include? :http_basic_authentication
303 user, pass = options[:http_basic_authentication]
304 req.basic_auth user, pass
306 http.request(req) {|response|
308 if options[:content_length_proc] && Net::HTTPSuccess === resp
309 if resp.key?('Content-Length')
310 options[:content_length_proc].call(resp['Content-Length'].to_i)
312 options[:content_length_proc].call(nil)
315 resp.read_body {|str|
317 if options[:progress_proc] && Net::HTTPSuccess === resp
318 options[:progress_proc].call(buf.size)
325 io.status = [resp.code, resp.message]
326 resp.each {|name,value| buf.io.meta_add_field name, value }
328 when Net::HTTPSuccess
329 when Net::HTTPMovedPermanently, # 301
330 Net::HTTPFound, # 302
331 Net::HTTPSeeOther, # 303
332 Net::HTTPTemporaryRedirect # 307
334 loc_uri = URI.parse(resp['location'])
335 rescue URI::InvalidURIError
336 raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
338 throw :open_uri_redirect, loc_uri
340 raise OpenURI::HTTPError.new(io.status.join(' '), io)
344 class HTTPError < StandardError
345 def initialize(message, io)
352 class HTTPRedirect < HTTPError
353 def initialize(message, io, uri)
360 class Buffer # :nodoc:
371 if StringIO === @io && StringMax < @size
373 io = Tempfile.new('open-uri')
375 Meta.init io, @io if Meta === @io
382 Meta.init @io unless Meta === @io
387 # Mixin for holding meta-information.
389 def Meta.init(obj, src=nil) # :nodoc:
396 obj.status = src.status
397 obj.base_uri = src.base_uri
398 src.meta.each {|name, value|
399 obj.meta_add_field(name, value)
404 # returns an Array which consists status code and message.
405 attr_accessor :status
407 # returns a URI which is base of relative URIs in the data.
408 # It may differ from the URI supplied by a user because redirection.
409 attr_accessor :base_uri
411 # returns a Hash which represents header fields.
412 # The Hash keys are downcased for canonicalization.
415 def meta_setup_encoding # :nodoc:
416 charset = self.charset
420 enc = Encoding.find(charset)
424 enc = Encoding::ASCII_8BIT unless enc
425 if self.respond_to? :force_encoding
426 self.force_encoding(enc)
427 elsif self.respond_to? :string
428 self.string.force_encoding(enc)
430 self.set_encoding enc
434 def meta_add_field(name, value) # :nodoc:
437 meta_setup_encoding if name == 'content-type'
440 # returns a Time which represents Last-Modified field.
442 if v = @meta['last-modified']
449 RE_LWS = /[\r\n\t ]+/n
450 RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
451 RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
452 RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
454 def content_type_parse # :nodoc:
455 v = @meta['content-type']
456 # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
457 if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v
459 subtype = $2.downcase
461 $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
462 val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/n) { $1 ? $1[1,1] : $& } if qval
463 parameters << [att.downcase, val]
465 ["#{type}/#{subtype}", *parameters]
471 # returns "type/subtype" which is MIME Content-Type.
472 # It is downcased for canonicalization.
473 # Content-Type parameters are stripped.
475 type, *parameters = content_type_parse
476 type || 'application/octet-stream'
479 # returns a charset parameter in Content-Type field.
480 # It is downcased for canonicalization.
482 # If charset parameter is not given but a block is given,
483 # the block is called and its result is returned.
484 # It can be used to guess charset.
486 # If charset parameter and block is not given,
487 # nil is returned except text type in HTTP.
488 # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
490 type, *parameters = content_type_parse
491 if pair = parameters.assoc('charset')
495 elsif type && %r{\Atext/} =~ type &&
496 @base_uri && /\Ahttp\z/i =~ @base_uri.scheme
497 "iso-8859-1" # RFC2616 3.7.1
503 # returns a list of encodings in Content-Encoding field
504 # as an Array of String.
505 # The encodings are downcased for canonicalization.
507 v = @meta['content-encoding']
508 if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
509 v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
516 # Mixin for HTTP and FTP URIs.
518 # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
520 # OpenURI::OpenRead#open takes optional 3 arguments as:
521 # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
523 # `mode', `perm' is same as Kernel#open.
525 # However, `mode' must be read mode because OpenURI::OpenRead#open doesn't
526 # support write mode (yet).
527 # Also `perm' is just ignored because it is meaningful only for file
530 # `options' must be a hash.
532 # Each pairs which key is a string in the hash specify a extra header
534 # I.e. it is ignored for FTP without HTTP proxy.
536 # The hash may include other options which key is a symbol:
540 # :proxy => "http://proxy.foo.com:8000/"
541 # :proxy => URI.parse("http://proxy.foo.com:8000/")
546 # If :proxy option is specified, the value should be String, URI,
548 # When String or URI is given, it is treated as proxy URI.
549 # When true is given or the option itself is not specified,
550 # environment variable `scheme_proxy' is examined.
551 # `scheme' is replaced by `http', `https' or `ftp'.
552 # When false or nil is given, the environment variables are ignored and
553 # connection will be made to a server directly.
555 # [:proxy_http_basic_authentication]
557 # :proxy_http_basic_authentication => ["http://proxy.foo.com:8000/", "proxy-user", "proxy-password"]
558 # :proxy_http_basic_authentication => [URI.parse("http://proxy.foo.com:8000/"), "proxy-user", "proxy-password"]
560 # If :proxy option is specified, the value should be an Array with 3 elements.
561 # It should contain a proxy URI, a proxy user name and a proxy password.
562 # The proxy URI should be a String, an URI or nil.
563 # The proxy user name and password should be a String.
565 # If nil is given for the proxy URI, this option is just ignored.
567 # If :proxy and :proxy_http_basic_authentication is specified,
568 # ArgumentError is raised.
570 # [:http_basic_authentication]
572 # :http_basic_authentication=>[user, password]
574 # If :http_basic_authentication is specified,
575 # the value should be an array which contains 2 strings:
576 # username and password.
577 # It is used for HTTP Basic authentication defined by RFC 2617.
579 # [:content_length_proc]
581 # :content_length_proc => lambda {|content_length| ... }
583 # If :content_length_proc option is specified, the option value procedure
584 # is called before actual transfer is started.
585 # It takes one argument which is expected content length in bytes.
587 # If two or more transfer is done by HTTP redirection, the procedure
588 # is called only one for a last transfer.
590 # When expected content length is unknown, the procedure is called with
592 # It is happen when HTTP response has no Content-Length header.
596 # :progress_proc => lambda {|size| ...}
598 # If :progress_proc option is specified, the proc is called with one
599 # argument each time when `open' gets content fragment from network.
600 # The argument `size' `size' is a accumulated transfered size in bytes.
602 # If two or more transfer is done by HTTP redirection, the procedure
603 # is called only one for a last transfer.
605 # :progress_proc and :content_length_proc are intended to be used for
607 # For example, it can be implemented as follows using Ruby/ProgressBar.
611 # :content_length_proc => lambda {|t|
613 # pbar = ProgressBar.new("...", t)
614 # pbar.file_transfer_mode
617 # :progress_proc => lambda {|s|
623 # :read_timeout=>nil (no timeout)
624 # :read_timeout=>10 (10 second)
626 # :read_timeout option specifies a timeout of read for http connections.
630 # :ssl_ca_cert=>filename
632 # :ssl_ca_cert is used to specify CA certificate for SSL.
633 # If it is given, default certificates are not used.
637 # :ssl_verify_mode=>mode
639 # :ssl_verify_mode is used to specify openssl verify mode.
641 # OpenURI::OpenRead#open returns an IO like object if block is not given.
642 # Otherwise it yields the IO object and return the value of the block.
643 # The IO object is extended with OpenURI::Meta.
647 # :ftp_active_mode=>bool
649 # :ftp_active_mode=>true is used to make ftp active mode.
650 # Note that the active mode is default in Ruby 1.8 or prior.
651 # Ruby 1.9 uses passive mode by default.
657 # :redirect=>false is used to disable HTTP redirects at all.
658 # OpenURI::HTTPRedirect exception raised on redirection.
659 # It is true by default.
660 # The true means redirections between http and ftp is permitted.
662 def open(*rest, &block)
663 OpenURI.open_uri(self, *rest, &block)
666 # OpenURI::OpenRead#read([options]) reads a content referenced by self and
667 # returns the content as string.
668 # The string is extended with OpenURI::Meta.
669 # The argument `options' is same as OpenURI::OpenRead#open.
671 self.open(options) {|f|
682 # returns a proxy URI.
683 # The proxy URI is obtained from environment variables such as http_proxy,
684 # ftp_proxy, no_proxy, etc.
685 # If there is no proper proxy, nil is returned.
687 # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
690 # But http_proxy and HTTP_PROXY is treated specially under CGI environment.
691 # It's because HTTP_PROXY may be set by Proxy: header.
692 # So HTTP_PROXY is not used.
693 # http_proxy is not used too if the variable is case insensitive.
694 # CGI_HTTP_PROXY can be used instead.
696 name = self.scheme.downcase + '_proxy'
698 if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
699 # HTTP_PROXY conflicts with *_proxy for proxy settings and
700 # HTTP_* for header information in CGI.
701 # So it should be careful to use it.
702 pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
704 when 0 # no proxy setting anyway.
708 if k == 'http_proxy' && ENV[k.upcase] == nil
709 # http_proxy is safe to use because ENV is case sensitive.
710 proxy_uri = ENV[name]
714 else # http_proxy is safe to use because ENV is case sensitive.
715 proxy_uri = ENV.to_hash[name]
718 # Use CGI_HTTP_PROXY. cf. libwww-perl.
719 proxy_uri = ENV["CGI_#{name.upcase}"]
721 elsif name == 'http_proxy'
722 unless proxy_uri = ENV[name]
723 if proxy_uri = ENV[name.upcase]
724 warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.'
728 proxy_uri = ENV[name] || ENV[name.upcase]
731 if proxy_uri && self.host
734 addr = IPSocket.getaddress(self.host)
735 proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr
741 proxy_uri = URI.parse(proxy_uri)
743 if no_proxy = ENV[name] || ENV[name.upcase]
744 no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
745 if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
746 (!port || self.port == port.to_i)
760 def buffer_open(buf, proxy, options) # :nodoc:
761 OpenURI.open_http(buf, self, proxy, options)
764 include OpenURI::OpenRead
768 def buffer_open(buf, proxy, options) # :nodoc:
770 OpenURI.open_http(buf, self, proxy, options)
775 directories = self.path.split(%r{/}, -1)
776 directories.shift if directories[0] == '' # strip a field before leading slash
777 directories.each {|d|
778 d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
780 unless filename = directories.pop
781 raise ArgumentError, "no filename: #{self.inspect}"
783 directories.each {|d|
785 raise ArgumentError, "invalid directory: #{d.inspect}"
788 if /[\r\n]/ =~ filename
789 raise ArgumentError, "invalid filename: #{filename.inspect}"
791 typecode = self.typecode
792 if typecode && /\A[aid]\z/ !~ typecode
793 raise ArgumentError, "invalid typecode: #{typecode.inspect}"
796 # The access sequence is defined by RFC 1738
797 ftp = Net::FTP.open(self.host)
798 ftp.passive = true if !options[:ftp_active_mode]
799 # todo: extract user/passwd from .netrc.
802 user, passwd = self.userinfo.split(/:/) if self.userinfo
803 ftp.login(user, passwd)
804 directories.each {|cwd|
805 ftp.voidcmd("CWD #{cwd}")
808 # xxx: typecode D is not handled.
809 ftp.voidcmd("TYPE #{typecode.upcase}")
811 if options[:content_length_proc]
812 options[:content_length_proc].call(ftp.size(filename))
814 ftp.retrbinary("RETR #{filename}", 4096) { |str|
816 options[:progress_proc].call(buf.size) if options[:progress_proc]
822 include OpenURI::OpenRead