123456789_123456789_123456789_123456789_123456789_

Module: OpenURI

Relationships & Source Files
Namespace Children
Modules:
Classes:
Exceptions:
Defined in: lib/open-uri.rb

Overview

OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP.

Example

It is possible to open an http, https or ftp URL as though it were a file:

URI.open("http://www.ruby-lang.org/") {|f|
  f.each_line {|line| p line}
}

The opened file has several getter methods for its meta-information, as follows, since it is extended by Meta.

URI.open("http://www.ruby-lang.org/en") {|f|
  f.each_line {|line| p line}
  p f.base_uri         # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
  p f.content_type     # "text/html"
  p f.charset          # "iso-8859-1"
  p f.content_encoding # []
  p f.last_modified    # Thu Dec 05 02:45:02 UTC 2002
}

Additional header fields can be specified by an optional hash argument.

URI.open("http://www.ruby-lang.org/en/",
  "User-Agent" => "Ruby/#{RUBY_VERSION}",
  "From" => "foo@bar.invalid",
  "Referer" => "http://www.ruby-lang.org/") {|f|
  # ...
}

The environment variables such as http_proxy, https_proxy and ftp_proxy are in effect by default. Here we disable proxy:

URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
  # ...
}

See OpenURI::OpenRead.open and URI.open for more on available options.

::URI objects can be opened in a similar way.

uri = URI.parse("http://www.ruby-lang.org/en/")
uri.open {|f|
  # ...
}

::URI objects can be read directly. The returned string is also extended by Meta.

str = uri.read
p str.base_uri
Author

Tanaka Akira <akr@m17n.org>

Constant Summary

Class Method Summary

Class Method Details

.check_options(options)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/open-uri.rb', line 114

def OpenURI.check_options(options) # :nodoc:
  options.each {|k, v|
    next unless Symbol === k
    unless Options.include? k
      raise ArgumentError, "unrecognized option: #{k}"
    end
  }
end

.open_http(buf, target, proxy, options)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/open-uri.rb', line 264

def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
  if proxy
    proxy_uri, proxy_user, proxy_pass = proxy
    raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
  end

  if target.userinfo
    raise ArgumentError, "userinfo not supported.  [RFC3986]"
  end

  header = {}
  options.each {|k, v| header[k] = v if String === k }

  require 'net/http'
  klass = Net::HTTP
  if URI::HTTP === target
    # HTTP or HTTPS
    if proxy
      unless proxy_user && proxy_pass
        proxy_user, proxy_pass = proxy_uri.userinfo.split(':') if proxy_uri.userinfo
      end
      if proxy_user && proxy_pass
        klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port, proxy_user, proxy_pass)
      else
        klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port)
      end
    end
    target_host = target.hostname
    target_port = target.port
    request_uri = target.request_uri
  else
    # FTP over HTTP proxy
    target_host = proxy_uri.hostname
    target_port = proxy_uri.port
    request_uri = target.to_s
    if proxy_user && proxy_pass
      header["Proxy-Authorization"] =
                      'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m0')
    end
  end

  http = proxy ? klass.new(target_host, target_port) : klass.new(target_host, target_port, nil)
  if target.class == URI::HTTPS
    require 'net/https'
    http.use_ssl = true
    http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
    http.min_version = options[:ssl_min_version]
    http.max_version = options[:ssl_max_version]
    store = OpenSSL::X509::Store.new
    if options[:ssl_ca_cert]
      Array(options[:ssl_ca_cert]).each do |cert|
        if File.directory? cert
          store.add_path cert
        else
          store.add_file cert
        end
      end
    else
      store.set_default_paths
    end
    http.cert_store = store
  end
  if options.include? :read_timeout
    http.read_timeout = options[:read_timeout]
  end
  if options.include? :open_timeout
    http.open_timeout = options[:open_timeout]
  end

  resp = nil
  http.start {
    req = Net::HTTP::Get.new(request_uri, header)
    if options.include? :http_basic_authentication
      user, pass = options[:http_basic_authentication]
      req.basic_auth user, pass
    end
    http.request(req) {|response|
      resp = response
      if options[:content_length_proc] && Net::HTTPSuccess === resp
        if resp.key?('Content-Length')
          options[:content_length_proc].call(resp['Content-Length'].to_i)
        else
          options[:content_length_proc].call(nil)
        end
      end
      resp.read_body {|str|
        buf << str
        if options[:progress_proc] && Net::HTTPSuccess === resp
          options[:progress_proc].call(buf.size)
        end
        str.clear
      }
    }
  }
  io = buf.io
  io.rewind
  io.status = [resp.code, resp.message]
  resp.each_name {|name| buf.io.meta_add_field2 name, resp.get_fields(name) }
  case resp
  when Net::HTTPSuccess
  when Net::HTTPMovedPermanently, # 301
       Net::HTTPFound, # 302
       Net::HTTPSeeOther, # 303
       Net::HTTPTemporaryRedirect, # 307
       Net::HTTPPermanentRedirect # 308
    begin
      loc_uri = URI.parse(resp['location'])
    rescue URI::InvalidURIError
      raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
    end
    throw :open_uri_redirect, loc_uri
  else
    raise OpenURI::HTTPError.new(io.status.join(' '), io)
  end
end

.open_loop(uri, options)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/open-uri.rb', line 175

def OpenURI.open_loop(uri, options) # :nodoc:
  proxy_opts = []
  proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
  proxy_opts << :proxy if options.include? :proxy
  proxy_opts.compact!
  if 1 < proxy_opts.length
    raise ArgumentError, "multiple proxy options specified"
  end
  case proxy_opts.first
  when :proxy_http_basic_authentication
    opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
    proxy_user = proxy_user.to_str
    proxy_pass = proxy_pass.to_str
    if opt_proxy == true
      raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
    end
  when :proxy
    opt_proxy = options.fetch(:proxy)
    proxy_user = nil
    proxy_pass = nil
  when nil
    opt_proxy = true
    proxy_user = nil
    proxy_pass = nil
  end
  case opt_proxy
  when true
    find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
  when nil, false
    find_proxy = lambda {|u| nil}
  when String
    opt_proxy = URI.parse(opt_proxy)
    find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
  when URI::Generic
    find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
  else
    raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
  end

  uri_set = {}
  max_redirects = options[:max_redirects]
  buf = nil
  while true
    redirect = catch(:open_uri_redirect) {
      buf = Buffer.new
      uri.buffer_open(buf, find_proxy.call(uri), options)
      nil
    }
    if redirect
      if redirect.relative?
        # Although it violates RFC2616, Location: field may have relative
        # URI.  It is converted to absolute URI using uri as a base URI.
        redirect = uri + redirect
      end
      if !options.fetch(:redirect, true)
        raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
      end
      unless OpenURI.redirectable?(uri, redirect)
        raise "redirection forbidden: #{uri} -> #{redirect}"
      end
      if options.include? :http_basic_authentication
        # send authentication only for the URI directly specified.
        options = options.dup
        options.delete :http_basic_authentication
      end
      uri = redirect
      raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
      uri_set[uri.to_s] = true
      raise TooManyRedirects.new("Too many redirects", buf.io) if max_redirects && uri_set.size > max_redirects
    else
      break
    end
  end
  io = buf.io
  io.base_uri = uri
  io
end

.open_uri(name, *rest)

This method is for internal use only.

Raises:

  • (ArgumentError)
[ GitHub ]

  
# File 'lib/open-uri.rb', line 133

def OpenURI.open_uri(name, *rest) # :nodoc:
  uri = URI::Generic === name ? name : URI.parse(name)
  mode, _, rest = OpenURI.scan_open_optional_arguments(*rest)
  options = rest.shift if !rest.empty? && Hash === rest.first
  raise ArgumentError.new("extra arguments") if !rest.empty?
  options ||= {}
  OpenURI.check_options(options)

  if /\Arb?(?:\Z|:([^:]+))/ =~ mode
    encoding, = $1,Encoding.find($1) if $1
    mode = nil
  end
  if options.has_key? :encoding
    if !encoding.nil?
      raise ArgumentError, "encoding specified twice"
    end
    encoding = Encoding.find(options[:encoding])
  end

  unless mode == nil ||
         mode == 'r' || mode == 'rb' ||
         mode == File::RDONLY
    raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
  end

  io = open_loop(uri, options)
  io.set_encoding(encoding) if encoding
  if block_given?
    begin
      yield io
    ensure
      if io.respond_to? :close!
        io.close! # Tempfile
      else
        io.close if !io.closed?
      end
    end
  else
    io
  end
end

.redirectable?(uri1, uri2) ⇒ Boolean

This method is for internal use only.
[ GitHub ]

  
# File 'lib/open-uri.rb', line 253

def OpenURI.redirectable?(uri1, uri2) # :nodoc:
  # This test is intended to forbid a redirection from http://... to
  # file:///etc/passwd, file:///dev/zero, etc.  CVE-2011-1521
  # https to http redirect is also forbidden intentionally.
  # It avoids sending secure cookie or referer by non-secure HTTP protocol.
  # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
  # However this is ad hoc.  It should be extensible/configurable.
  uri1.scheme.downcase == uri2.scheme.downcase ||
  (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:https?|ftp)\z/i =~ uri2.scheme)
end

.scan_open_optional_arguments(*rest)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/open-uri.rb', line 123

def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
  if !rest.empty? && (String === rest.first || Integer === rest.first)
    mode = rest.shift
    if !rest.empty? && Integer === rest.first
      perm = rest.shift
    end
  end
  return mode, perm, rest
end