123456789_123456789_123456789_123456789_123456789_

Module: URI

Overview

URI is a module providing classes to handle Uniform Resource Identifiers (RFC2396).

Features

  • Uniform way of handling URIs.

  • Flexibility to introduce custom URI schemes.

  • Flexibility to have an alternate URI::Parser (or just different patterns and regexp’s).

Basic example

require 'uri'

uri = URI("http://foo.com/posts?id=30&limit=5#time=1305298413")
#=> #<URI::HTTP http://foo.com/posts?id=30&limit=5#time=1305298413>

uri.scheme    #=> "http"
uri.host      #=> "foo.com"
uri.path      #=> "/posts"
uri.query     #=> "id=30&limit=5"
uri.fragment  #=> "time=1305298413"

uri.to_s      #=> "http://foo.com/posts?id=30&limit=5#time=1305298413"

Adding custom URIs

module URI
  class RSYNC < Generic
    DEFAULT_PORT = 873
  end
  register_scheme 'RSYNC', RSYNC
end
#=> URI::RSYNC

URI.scheme_list
#=> {"FILE"=>URI::File, "FTP"=>URI::FTP, "HTTP"=>URI::HTTP,
#    "HTTPS"=>URI::HTTPS, "LDAP"=>URI::LDAP, "LDAPS"=>URI::LDAPS,
#    "MAILTO"=>URI::MailTo, "RSYNC"=>URI::RSYNC}

uri = URI("rsync://rsync.foo.com")
#=> #<URI::RSYNC rsync://rsync.foo.com>

RFC References

A good place to view an RFC spec is www.ietf.org/rfc.html.

Here is a list of all related RFC’s:

Class tree

  • Generic (in uri/generic.rb)

    • URI::File - (in uri/file.rb)

    • URI::FTP - (in uri/ftp.rb)

    • URI::HTTP - (in uri/http.rb)

      • URI::HTTPS - (in uri/https.rb)

    • URI::LDAP - (in uri/ldap.rb)

      • URI::LDAPS - (in uri/ldaps.rb)

    • URI::MailTo - (in uri/mailto.rb)

  • URI::Parser - (in uri/common.rb)

  • URI::REGEXP - (in uri/common.rb)

    • URI::REGEXP::PATTERN - (in uri/common.rb)

  • Util - (in uri/common.rb)

  • Error - (in uri/common.rb)

    • URI::InvalidURIError - (in uri/common.rb)

    • URI::InvalidComponentError - (in uri/common.rb)

    • URI::BadURIError - (in uri/common.rb)

Copyright Info

Author

Akira Yamada <akira@ruby-lang.org>

Documentation

Akira Yamada <akira@ruby-lang.org> Dmitry V. Sabanin <sdmitry@lrn.ru> Vincent Batts <vbatts@hashbangbash.com>

License

Copyright © 2001 akira yamada <akira@ruby-lang.org> You can redistribute it and/or modify it under the same term as Ruby.

Constant Summary

  • DEFAULT_PARSER =
    # File 'lib/uri/common.rb', line 22
    RFC3986_PARSER
  • INITIAL_SCHEMES = private
    # File 'lib/uri/common.rb', line 126
    scheme_list
  • RFC2396_PARSER =
    # File 'lib/uri/common.rb', line 16
    RFC2396_Parser.new
  • RFC3986_PARSER =
    # File 'lib/uri/common.rb', line 19
    RFC3986_Parser.new
  • TBLDECWWWCOMP_ = Internal use only
    # File 'lib/uri/common.rb', line 311
    {}
  • TBLENCURICOMP_ =
    # File 'lib/uri/common.rb', line 308
    TBLENCWWWCOMP_.dup.freeze
  • TBLENCWWWCOMP_ = Internal use only
    # File 'lib/uri/common.rb', line 304
    {}
  • VERSION = Internal use only
    # File 'lib/uri/version.rb', line 4
    VERSION_CODE.scan(/../).collect{|n| n.to_i}.join('.').freeze
  • VERSION_CODE = Internal use only
    # File 'lib/uri/version.rb', line 3
    '010002'.freeze
  • WEB_ENCODINGS_ = Internal use only

    curl encoding.spec.whatwg.org/encodings.json|

    ruby -rjson -e 'H={}
    h={
      "shift_jis"=>"Windows-31J",
      "euc-jp"=>"cp51932",
      "iso-2022-jp"=>"cp50221",
      "x-mac-cyrillic"=>"macCyrillic",
    }
    JSON($<.read).map{|x|x["encodings"]}.flatten.each{|x|
      Encoding.find(n=h.fetch(n=x["name"].downcase,n))rescue next
      x["labels"].each{|y|H[y]=n}
    }
    puts "{"
    H.each{|k,v|puts %[  #{k.dump}=>#{v.dump},]}
    puts "}"

    # File 'lib/uri/common.rb', line 635
    {
      "unicode-1-1-utf-8"=>"utf-8",
      "utf-8"=>"utf-8",
      "utf8"=>"utf-8",
      "866"=>"ibm866",
      "cp866"=>"ibm866",
      "csibm866"=>"ibm866",
      "ibm866"=>"ibm866",
      "csisolatin2"=>"iso-8859-2",
      "iso-8859-2"=>"iso-8859-2",
      "iso-ir-101"=>"iso-8859-2",
      "iso8859-2"=>"iso-8859-2",
      "iso88592"=>"iso-8859-2",
      "iso_8859-2"=>"iso-8859-2",
      "iso_8859-2:1987"=>"iso-8859-2",
      "l2"=>"iso-8859-2",
      "latin2"=>"iso-8859-2",
      "csisolatin3"=>"iso-8859-3",
      "iso-8859-3"=>"iso-8859-3",
      "iso-ir-109"=>"iso-8859-3",
      "iso8859-3"=>"iso-8859-3",
      "iso88593"=>"iso-8859-3",
      "iso_8859-3"=>"iso-8859-3",
      "iso_8859-3:1988"=>"iso-8859-3",
      "l3"=>"iso-8859-3",
      "latin3"=>"iso-8859-3",
      "csisolatin4"=>"iso-8859-4",
      "iso-8859-4"=>"iso-8859-4",
      "iso-ir-110"=>"iso-8859-4",
      "iso8859-4"=>"iso-8859-4",
      "iso88594"=>"iso-8859-4",
      "iso_8859-4"=>"iso-8859-4",
      "iso_8859-4:1988"=>"iso-8859-4",
      "l4"=>"iso-8859-4",
      "latin4"=>"iso-8859-4",
      "csisolatincyrillic"=>"iso-8859-5",
      "cyrillic"=>"iso-8859-5",
      "iso-8859-5"=>"iso-8859-5",
      "iso-ir-144"=>"iso-8859-5",
      "iso8859-5"=>"iso-8859-5",
      "iso88595"=>"iso-8859-5",
      "iso_8859-5"=>"iso-8859-5",
      "iso_8859-5:1988"=>"iso-8859-5",
      "arabic"=>"iso-8859-6",
      "asmo-708"=>"iso-8859-6",
      "csiso88596e"=>"iso-8859-6",
      "csiso88596i"=>"iso-8859-6",
      "csisolatinarabic"=>"iso-8859-6",
      "ecma-114"=>"iso-8859-6",
      "iso-8859-6"=>"iso-8859-6",
      "iso-8859-6-e"=>"iso-8859-6",
      "iso-8859-6-i"=>"iso-8859-6",
      "iso-ir-127"=>"iso-8859-6",
      "iso8859-6"=>"iso-8859-6",
      "iso88596"=>"iso-8859-6",
      "iso_8859-6"=>"iso-8859-6",
      "iso_8859-6:1987"=>"iso-8859-6",
      "csisolatingreek"=>"iso-8859-7",
      "ecma-118"=>"iso-8859-7",
      "elot_928"=>"iso-8859-7",
      "greek"=>"iso-8859-7",
      "greek8"=>"iso-8859-7",
      "iso-8859-7"=>"iso-8859-7",
      "iso-ir-126"=>"iso-8859-7",
      "iso8859-7"=>"iso-8859-7",
      "iso88597"=>"iso-8859-7",
      "iso_8859-7"=>"iso-8859-7",
      "iso_8859-7:1987"=>"iso-8859-7",
      "sun_eu_greek"=>"iso-8859-7",
      "csiso88598e"=>"iso-8859-8",
      "csisolatinhebrew"=>"iso-8859-8",
      "hebrew"=>"iso-8859-8",
      "iso-8859-8"=>"iso-8859-8",
      "iso-8859-8-e"=>"iso-8859-8",
      "iso-ir-138"=>"iso-8859-8",
      "iso8859-8"=>"iso-8859-8",
      "iso88598"=>"iso-8859-8",
      "iso_8859-8"=>"iso-8859-8",
      "iso_8859-8:1988"=>"iso-8859-8",
      "visual"=>"iso-8859-8",
      "csisolatin6"=>"iso-8859-10",
      "iso-8859-10"=>"iso-8859-10",
      "iso-ir-157"=>"iso-8859-10",
      "iso8859-10"=>"iso-8859-10",
      "iso885910"=>"iso-8859-10",
      "l6"=>"iso-8859-10",
      "latin6"=>"iso-8859-10",
      "iso-8859-13"=>"iso-8859-13",
      "iso8859-13"=>"iso-8859-13",
      "iso885913"=>"iso-8859-13",
      "iso-8859-14"=>"iso-8859-14",
      "iso8859-14"=>"iso-8859-14",
      "iso885914"=>"iso-8859-14",
      "csisolatin9"=>"iso-8859-15",
      "iso-8859-15"=>"iso-8859-15",
      "iso8859-15"=>"iso-8859-15",
      "iso885915"=>"iso-8859-15",
      "iso_8859-15"=>"iso-8859-15",
      "l9"=>"iso-8859-15",
      "iso-8859-16"=>"iso-8859-16",
      "cskoi8r"=>"koi8-r",
      "koi"=>"koi8-r",
      "koi8"=>"koi8-r",
      "koi8-r"=>"koi8-r",
      "koi8_r"=>"koi8-r",
      "koi8-ru"=>"koi8-u",
      "koi8-u"=>"koi8-u",
      "dos-874"=>"windows-874",
      "iso-8859-11"=>"windows-874",
      "iso8859-11"=>"windows-874",
      "iso885911"=>"windows-874",
      "tis-620"=>"windows-874",
      "windows-874"=>"windows-874",
      "cp1250"=>"windows-1250",
      "windows-1250"=>"windows-1250",
      "x-cp1250"=>"windows-1250",
      "cp1251"=>"windows-1251",
      "windows-1251"=>"windows-1251",
      "x-cp1251"=>"windows-1251",
      "ansi_x3.4-1968"=>"windows-1252",
      "ascii"=>"windows-1252",
      "cp1252"=>"windows-1252",
      "cp819"=>"windows-1252",
      "csisolatin1"=>"windows-1252",
      "ibm819"=>"windows-1252",
      "iso-8859-1"=>"windows-1252",
      "iso-ir-100"=>"windows-1252",
      "iso8859-1"=>"windows-1252",
      "iso88591"=>"windows-1252",
      "iso_8859-1"=>"windows-1252",
      "iso_8859-1:1987"=>"windows-1252",
      "l1"=>"windows-1252",
      "latin1"=>"windows-1252",
      "us-ascii"=>"windows-1252",
      "windows-1252"=>"windows-1252",
      "x-cp1252"=>"windows-1252",
      "cp1253"=>"windows-1253",
      "windows-1253"=>"windows-1253",
      "x-cp1253"=>"windows-1253",
      "cp1254"=>"windows-1254",
      "csisolatin5"=>"windows-1254",
      "iso-8859-9"=>"windows-1254",
      "iso-ir-148"=>"windows-1254",
      "iso8859-9"=>"windows-1254",
      "iso88599"=>"windows-1254",
      "iso_8859-9"=>"windows-1254",
      "iso_8859-9:1989"=>"windows-1254",
      "l5"=>"windows-1254",
      "latin5"=>"windows-1254",
      "windows-1254"=>"windows-1254",
      "x-cp1254"=>"windows-1254",
      "cp1255"=>"windows-1255",
      "windows-1255"=>"windows-1255",
      "x-cp1255"=>"windows-1255",
      "cp1256"=>"windows-1256",
      "windows-1256"=>"windows-1256",
      "x-cp1256"=>"windows-1256",
      "cp1257"=>"windows-1257",
      "windows-1257"=>"windows-1257",
      "x-cp1257"=>"windows-1257",
      "cp1258"=>"windows-1258",
      "windows-1258"=>"windows-1258",
      "x-cp1258"=>"windows-1258",
      "x-mac-cyrillic"=>"macCyrillic",
      "x-mac-ukrainian"=>"macCyrillic",
      "chinese"=>"gbk",
      "csgb2312"=>"gbk",
      "csiso58gb231280"=>"gbk",
      "gb2312"=>"gbk",
      "gb_2312"=>"gbk",
      "gb_2312-80"=>"gbk",
      "gbk"=>"gbk",
      "iso-ir-58"=>"gbk",
      "x-gbk"=>"gbk",
      "gb18030"=>"gb18030",
      "big5"=>"big5",
      "big5-hkscs"=>"big5",
      "cn-big5"=>"big5",
      "csbig5"=>"big5",
      "x-x-big5"=>"big5",
      "cseucpkdfmtjapanese"=>"cp51932",
      "euc-jp"=>"cp51932",
      "x-euc-jp"=>"cp51932",
      "csiso2022jp"=>"cp50221",
      "iso-2022-jp"=>"cp50221",
      "csshiftjis"=>"Windows-31J",
      "ms932"=>"Windows-31J",
      "ms_kanji"=>"Windows-31J",
      "shift-jis"=>"Windows-31J",
      "shift_jis"=>"Windows-31J",
      "sjis"=>"Windows-31J",
      "windows-31j"=>"Windows-31J",
      "x-sjis"=>"Windows-31J",
      "cseuckr"=>"euc-kr",
      "csksc56011987"=>"euc-kr",
      "euc-kr"=>"euc-kr",
      "iso-ir-149"=>"euc-kr",
      "korean"=>"euc-kr",
      "ks_c_5601-1987"=>"euc-kr",
      "ks_c_5601-1989"=>"euc-kr",
      "ksc5601"=>"euc-kr",
      "ksc_5601"=>"euc-kr",
      "windows-949"=>"euc-kr",
      "utf-16be"=>"utf-16be",
      "utf-16"=>"utf-16le",
      "utf-16le"=>"utf-16le",
    }

Class Method Summary

Class Method Details

._decode_uri_component(regexp, str, enc) (private)

Raises:

  • (ArgumentError)
[ GitHub ]

  
# File 'lib/uri/common.rb', line 420

def self._decode_uri_component(regexp, str, enc)
  raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
  str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
end

._encode_uri_component(regexp, table, str, enc) (private)

[ GitHub ]

  
# File 'lib/uri/common.rb', line 406

def self._encode_uri_component(regexp, table, str, enc)
  str = str.to_s.dup
  if str.encoding != Encoding::ASCII_8BIT
    if enc && enc != Encoding::ASCII_8BIT
      str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace)
      str.encode!(enc, fallback: ->(x){"&##{x.ord};"})
    end
    str.force_encoding(Encoding::ASCII_8BIT)
  end
  str.gsub!(regexp, table)
  str.force_encoding(Encoding::US_ASCII)
end

.const_missing(const)

[ GitHub ]

  
# File 'lib/uri/common.rb', line 43

def self.const_missing(const)
  if const == :REGEXP
    warn "URI::REGEXP is obsolete. Use URI::RFC2396_REGEXP explicitly.", uplevel: 1 if $VERBOSE
    URI::RFC2396_REGEXP
  elsif value = RFC2396_PARSER.regexp[const]
    warn "URI::#{const} is obsolete. Use RFC2396_PARSER.regexp[#{const.inspect}] explicitly.", uplevel: 1 if $VERBOSE
    value
  elsif value = RFC2396_Parser.const_get(const)
    warn "URI::#{const} is obsolete. Use RFC2396_Parser::#{const} explicitly.", uplevel: 1 if $VERBOSE
    value
  else
    super
  end
end

.decode_uri_component(str, enc = Encoding::UTF_8)

Like .decode_www_form_component, except that '+' is preserved.

[ GitHub ]

  
# File 'lib/uri/common.rb', line 402

def self.decode_uri_component(str, enc=Encoding::UTF_8)
  _decode_uri_component(/%\h\h/, str, enc)
end

.decode_www_form(str, enc = Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false)

Returns name/value pairs derived from the given string str, which must be an ASCII string.

The method may be used to decode the body of Net::HTTPResponse object res for which res['Content-Type'] is 'application/x-www-form-urlencoded'.

The returned data is an array of 2-element subarrays; each subarray is a name/value pair (both are strings). Each returned string has encoding enc, and has had invalid characters removed via {String#scrub}.

A simple example:

URI.decode_www_form('foo=0&bar=1&baz')
# => [["foo", "0"], ["bar", "1"], ["baz", ""]]

The returned strings have certain conversions, similar to those performed in .decode_www_form_component:

URI.decode_www_form('f%23o=%2F&b-r=%24&b+z=%40')
# => [["f#o", "/"], ["b-r", "$"], ["b z", "@"]]

The given string may contain consecutive separators:

URI.decode_www_form('foo=0&&bar=1&&baz=2')
# => [["foo", "0"], ["", ""], ["bar", "1"], ["", ""], ["baz", "2"]]

A different separator may be specified:

URI.decode_www_form('foo=0--bar=1--baz', separator: '--')
# => [["foo", "0"], ["bar", "1"], ["baz", ""]]

Raises:

  • (ArgumentError)
[ GitHub ]

  
# File 'lib/uri/common.rb', line 577

def self.decode_www_form(str, enc=Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false)
  raise ArgumentError, "the input of #{self.name}.#{__method__} must be ASCII only string" unless str.ascii_only?
  ary = []
  return ary if str.empty?
  enc = Encoding.find(enc)
  str.b.each_line(separator) do |string|
    string.chomp!(separator)
    key, sep, val = string.partition('=')
    if isindex
      if sep.empty?
        val = key
        key = +''
      end
      isindex = false
    end

    if use__charset_ and key == '_charset_' and e = get_encoding(val)
      enc = e
      use__charset_ = false
    end

    key.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
    if val
      val.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
    else
      val = +''
    end

    ary << [key, val]
  end
  ary.each do |k, v|
    k.force_encoding(enc)
    k.scrub!
    v.force_encoding(enc)
    v.scrub!
  end
  ary
end

.decode_www_form_component(str, enc = Encoding::UTF_8)

Returns a string decoded from the given URL-encoded string str.

The given string is first encoded as Encoding::ASCII-8BIT (using String#b), then decoded (as below), and finally force-encoded to the given encoding enc.

The returned string:

  • Preserves:

    • Characters '*', '.', '-', and '_'.

    • Character in ranges 'a'..'z', 'A'..'Z', and '0'..'9'.

    Example:

    URI.decode_www_form_component('*.-_azAZ09')
    # => "*.-_azAZ09"
  • Converts:

    • Character '+' to character ' '.

    • Each “percent notation” to an ASCII character.

    Example:

    URI.decode_www_form_component('Herearesomepunctuationcharacters%3A+%2C%3B%3F%3A')
    # => "Here are some punctuation characters: ,;?:"

Related: .decode_uri_component (preserves '+').

[ GitHub ]

  
# File 'lib/uri/common.rb', line 391

def self.decode_www_form_component(str, enc=Encoding::UTF_8)
  _decode_uri_component(/\+|%\h\h/, str, enc)
end

.encode_uri_component(str, enc = nil)

Like .encode_www_form_component, except that ' ' (space) is encoded as '%20' (instead of '+').

[ GitHub ]

  
# File 'lib/uri/common.rb', line 397

def self.encode_uri_component(str, enc=nil)
  _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
end

.encode_www_form(enum, enc = nil)

Returns a URL-encoded string derived from the given Enumerable enum.

The result is suitable for use as form data for an HTTP request whose Content-Type is 'application/x-www-form-urlencoded'.

The returned string consists of the elements of enum, each converted to one or more URL-encoded strings, and all joined with character '&'.

Simple examples:

URI.encode_www_form([['foo', 0], ['bar', 1], ['baz', 2]])
# => "foo=0&bar=1&baz=2"
URI.encode_www_form({foo: 0, bar: 1, baz: 2})
# => "foo=0&bar=1&baz=2"

The returned string is formed using method .encode_www_form_component, which converts certain characters:

URI.encode_www_form('f#o': '/', 'b-r': '$', 'b z': '@')
# => "f%23o=%2F&b-r=%24&b+z=%40"

When enum is Array-like, each element ele is converted to a field:

  • If ele is an array of two or more elements, the field is formed from its first two elements (and any additional elements are ignored):

    name = URI.encode_www_form_component(ele[0], enc)
    value = URI.encode_www_form_component(ele[1], enc)
    "#{name}=#{value}"

    Examples:

    URI.encode_www_form([%w[foo bar], %w[baz bat bah]])
    # => "foo=bar&baz=bat"
    URI.encode_www_form([['foo', 0], ['bar', :baz, 'bat']])
    # => "foo=0&bar=baz"
  • If ele is an array of one element, the field is formed from ele[0]:

    URI.encode_www_form_component(ele[0])

    Example:

    URI.encode_www_form([['foo'], [:bar], [0]])
    # => "foo&bar&0"
  • Otherwise the field is formed from ele:

    URI.encode_www_form_component(ele)

    Example:

    URI.encode_www_form(['foo', :bar, 0])
    # => "foo&bar&0"

The elements of an Array-like enum may be mixture:

URI.encode_www_form([['foo', 0], ['bar', 1, 2], ['baz'], :bat])
# => "foo=0&bar=1&baz&bat"

When enum is Hash-like, each key+/+value pair is converted to one or more fields:

  • If value is Array-convertible, each element ele in value is paired with key to form a field:

    name = URI.encode_www_form_component(key, enc)
    value = URI.encode_www_form_component(ele, enc)
    "#{name}=#{value}"

    Example:

    URI.encode_www_form({foo: [:bar, 1], baz: [:bat, :bam, 2]})
    # => "foo=bar&foo=1&baz=bat&baz=bam&baz=2"
  • Otherwise, key and value are paired to form a field:

    name = URI.encode_www_form_component(key, enc)
    value = URI.encode_www_form_component(value, enc)
    "#{name}=#{value}"

    Example:

    URI.encode_www_form({foo: 0, bar: 1, baz: 2})
    # => "foo=0&bar=1&baz=2"

The elements of a Hash-like enum may be mixture:

URI.encode_www_form({foo: [0, 1], bar: 2})
# => "foo=0&foo=1&bar=2"
[ GitHub ]

  
# File 'lib/uri/common.rb', line 524

def self.encode_www_form(enum, enc=nil)
  enum.map do |k,v|
    if v.nil?
      encode_www_form_component(k, enc)
    elsif v.respond_to?(:to_ary)
      v.to_ary.map do |w|
        str = encode_www_form_component(k, enc)
        unless w.nil?
          str << '='
          str << encode_www_form_component(w, enc)
        end
      end.join('&')
    else
      str = encode_www_form_component(k, enc)
      str << '='
      str << encode_www_form_component(v, enc)
    end
  end.join('&')
end

.encode_www_form_component(str, enc = nil)

Returns a URL-encoded string derived from the given string str.

The returned string:

  • Preserves:

    • Characters '*', '.', '-', and '_'.

    • Character in ranges 'a'..'z', 'A'..'Z', and '0'..'9'.

    Example:

    URI.encode_www_form_component('*.-_azAZ09')
    # => "*.-_azAZ09"
  • Converts:

    • Character ' ' to character '+'.

    • Any other character to “percent notation”; the percent notation for character c is '%%%X' % c.ord.

    Example:

    URI.encode_www_form_component('Here are some punctuation characters: ,;?:')
    # => "Herearesomepunctuationcharacters%3A+%2C%3B%3F%3A"

Encoding:

  • If str has encoding Encoding::ASCII_8BIT, argument enc is ignored.

  • Otherwise str is converted first to Encoding::UTF_8 (with suitable character replacements), and then to encoding enc.

In either case, the returned string has forced encoding Encoding::US_ASCII.

Related: .encode_uri_component (encodes ' ' as '%20').

[ GitHub ]

  
# File 'lib/uri/common.rb', line 358

def self.encode_www_form_component(str, enc=nil)
  _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
end

.extract(str, schemes = nil, &block)

This method is for internal use only.

Synopsis

URI::extract(str[, schemes][,&blk])

Args

str

String to extract URIs from.

schemes

Limit URI matching to specific schemes.

Description

Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.

Usage

require "uri"

URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.")
# => ["http://foo.example.com/bla", "mailto:test@example.com"]
[ GitHub ]

  
# File 'lib/uri/common.rb', line 262

def self.extract(str, schemes = nil, &block) # :nodoc:
  warn "URI.extract is obsolete", uplevel: 1 if $VERBOSE
  DEFAULT_PARSER.extract(str, schemes, &block)
end

.for(scheme, *arguments, default: Generic)

Returns a new object constructed from the given scheme, arguments, and default:

  • The new object is an instance of URI.scheme_list[scheme.upcase].

  • The object is initialized by calling the class initializer using scheme and arguments. See URI::Generic.new.

Examples:

values = ['john.doe', 'www.example.com', '123', nil, '/forum/questions/', nil, 'tag=networking&order=newest', 'top']
URI.for('https', *values)
# => #<URI::HTTPS https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top>
URI.for('foo', *values, default: URI::HTTP)
# => #<URI::HTTP foo://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top>
[ GitHub ]

  
# File 'lib/uri/common.rb', line 146

def self.for(scheme, *arguments, default: Generic)
  const_name = scheme.to_s.upcase

  uri_class = INITIAL_SCHEMES[const_name]
  uri_class ||= if /\A[A-Z]\w*\z/.match?(const_name) && Schemes.const_defined?(const_name, false)
    Schemes.const_get(const_name, false)
  end
  uri_class ||= default

  return uri_class.new(scheme, *arguments)
end

.get_encoding(label) (private)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/uri/common.rb', line 847

def self.get_encoding(label)
  Encoding.find(WEB_ENCODINGS_[label.to_str.strip.downcase]) rescue nil
end

.join(*str)

Merges the given URI strings str per RFC 2396.

Each string in str is converted to an RFC3986 URI before being merged.

Examples:

URI.join("http://example.com/","main.rbx")
# => #<URI::HTTP http://example.com/main.rbx>

URI.join('http://example.com', 'foo')
# => #<URI::HTTP http://example.com/foo>

URI.join('http://example.com', '/foo', '/bar')
# => #<URI::HTTP http://example.com/bar>

URI.join('http://example.com', '/foo', 'bar')
# => #<URI::HTTP http://example.com/bar>

URI.join('http://example.com', '/foo/', 'bar')
# => #<URI::HTTP http://example.com/foo/bar>
[ GitHub ]

  
# File 'lib/uri/common.rb', line 234

def self.join(*str)
  DEFAULT_PARSER.join(*str)
end

.parse(uri)

Returns a new URI object constructed from the given string uri:

URI.parse('https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top')
# => #<URI::HTTPS https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top>
URI.parse('http://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top')
# => #<URI::HTTP http://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top>

It’s recommended to first .escape string uri if it may contain invalid URI characters.

[ GitHub ]

  
# File 'lib/uri/common.rb', line 207

def self.parse(uri)
  DEFAULT_PARSER.parse(uri)
end

.parser=(parser = RFC3986_PARSER)

[ GitHub ]

  
# File 'lib/uri/common.rb', line 25

def self.parser=(parser = RFC3986_PARSER)
  remove_const(:Parser) if defined?(::URI::Parser)
  const_set("Parser", parser.class)

  remove_const(:REGEXP) if defined?(::URI::REGEXP)
  remove_const(:PATTERN) if defined?(::URI::PATTERN)
  if Parser == RFC2396_Parser
    const_set("REGEXP", URI::RFC2396_REGEXP)
    const_set("PATTERN", URI::RFC2396_REGEXP::PATTERN)
  end

  Parser.new.regexp.each_pair do |sym, str|
    remove_const(sym) if const_defined?(sym, false)
    const_set(sym, str)
  end
end

.regexp(schemes = nil)

This method is for internal use only.

Synopsis

URI::regexp([match_schemes])

Args

match_schemes

Array of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.

Description

Returns a Regexp object which matches to URI-like strings. The Regexp object returned by this method includes arbitrary number of capture group (parentheses). Never rely on its number.

Usage

require 'uri'

# extract first URI from html_string
html_string.slice(URI.regexp)

# remove ftp URIs
html_string.sub(URI.regexp(['ftp']), '')

# You should not rely on the number of parentheses
html_string.scan(URI.regexp) do |*matches|
  p $&
end
[ GitHub ]

  
# File 'lib/uri/common.rb', line 299

def self.regexp(schemes = nil)# :nodoc:
  warn "URI.regexp is obsolete", uplevel: 1 if $VERBOSE
  DEFAULT_PARSER.make_regexp(schemes)
end

.register_scheme(scheme, klass)

Registers the given klass as the class to be instantiated when parsing a URI with the given scheme:

URI.register_scheme('MS_SEARCH', URI::Generic) # => URI::Generic
URI.scheme_list['MS_SEARCH']                   # => URI::Generic

Note that after calling String#upcase on scheme, it must be a valid constant name.

[ GitHub ]

  
# File 'lib/uri/common.rb', line 102

def self.register_scheme(scheme, klass)
  Schemes.const_set(scheme.to_s.upcase, klass)
end

.scheme_list

Returns a hash of the defined schemes:

URI.scheme_list
# =>
{"MAILTO"=>URI::MailTo,
 "LDAPS"=>URI::LDAPS,
 "WS"=>URI::WS,
 "HTTP"=>URI::HTTP,
 "HTTPS"=>URI::HTTPS,
 "LDAP"=>URI::LDAP,
 "FILE"=>URI::File,
 "FTP"=>URI::FTP}

Related: .register_scheme.

[ GitHub ]

  
# File 'lib/uri/common.rb', line 120

def self.scheme_list
  Schemes.constants.map { |name|
    [name.to_s.upcase, Schemes.const_get(name)]
  }.to_h
end

.split(uri)

Returns a 9-element array representing the parts of the URI formed from the string uri; each array element is a string or nil:

names = %w[scheme userinfo host port registry path opaque query fragment]
values = URI.split('https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top')
names.zip(values)
# =>
[["scheme", "https"],
 ["userinfo", "john.doe"],
 ["host", "www.example.com"],
 ["port", "123"],
 ["registry", nil],
 ["path", "/forum/questions/"],
 ["opaque", nil],
 ["query", "tag=networking&order=newest"],
 ["fragment", "top"]]
[ GitHub ]

  
# File 'lib/uri/common.rb', line 193

def self.split(uri)
  DEFAULT_PARSER.split(uri)
end