Object
class that Parses String's into URI's
It contains a Hash set of patterns and Regexp's that match and validate.
URI::Parser.new([opts])
The constructor accepts a hash as options for parser. Keys of options are pattern names of URI components and values of options are pattern strings. The constructor generates set of regexps for parsing URIs.
You can use the following keys:
* :ESCAPED (URI::PATTERN::ESCAPED in default) * :UNRESERVED (URI::PATTERN::UNRESERVED in default) * :DOMLABEL (URI::PATTERN::DOMLABEL in default) * :TOPLABEL (URI::PATTERN::TOPLABEL in default) * :HOSTNAME (URI::PATTERN::HOSTNAME in default)
p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})") u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD> URI.parse(u.to_s) #=> raises URI::InvalidURIError s = "http://example.com/ABCD" u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD> u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD> u1 == u2 #=> true u1.eql?(u2) #=> false
 
               # File uri/rfc2396_parser.rb, line 100
def initialize(opts = {})
  @pattern = initialize_pattern(opts)
  @pattern.each_value(&:freeze)
  @pattern.freeze
  @regexp = initialize_regexp(@pattern)
  @regexp.each_value(&:freeze)
  @regexp.freeze
end
             
            str
String to make safe
unsafe
Regexp to apply. Defaults to self.regexp
constructs a safe String from str, removing unsafe characters, replacing them with codes.
 
               # File uri/rfc2396_parser.rb, line 300
def escape(str, unsafe = @regexp[:UNSAFE])
  unless unsafe.kind_of?(Regexp)
    # perhaps unsafe is String object
    unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
  end
  str.gsub(unsafe) do
    us = $&
    tmp = ''
    us.each_byte do |uc|
      tmp << sprintf('%%%02X', uc)
    end
    tmp
  end.force_encoding(Encoding::US_ASCII)
end
             
            str
String to search
schemes
Patterns to apply to str
Attempts to parse and merge a set of URIs If no block given , then returns the result, else it calls block for each element in result.
see also URI::Parser.make_regexp
 
               # File uri/rfc2396_parser.rb, line 262
def extract(str, schemes = nil)
  if block_given?
    str.scan(make_regexp(schemes)) { yield $& }
    nil
  else
    result = []
    str.scan(make_regexp(schemes)) { result.push $& }
    result
  end
end
             
             
               # File uri/rfc2396_parser.rb, line 336
def inspect
  @@to_s.bind(self).call
end
             
            returns Regexp that is default self.regexp, unless schemes is provided. Then it is a Regexp.union with self.pattern
 
               # File uri/rfc2396_parser.rb, line 275
def make_regexp(schemes = nil)
  unless schemes
    @regexp[:ABS_URI_REF]
  else
    /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
  end
end
             
            uri
String
parses uri and constructs either matching URI scheme object (FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic
p = URI::Parser.new p.parse("ldap://ldap.example.com/dc=example?user=john") #=> #<URI::LDAP:0x00000000b9e7e8 URL:ldap://ldap.example.com/dc=example?user=john>
 
               # File uri/rfc2396_parser.rb, line 210
def parse(uri)
  scheme, userinfo, host, port,
    registry, path, opaque, query, fragment = self.split(uri)
  if scheme && URI.scheme_list.include?(scheme.upcase)
    URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
                                       registry, path, opaque, query,
                                       fragment, self)
  else
    Generic.new(scheme, userinfo, host, port,
                registry, path, opaque, query,
                fragment, self)
  end
end
             
            Returns a split URI against regexp
 
               # File uri/rfc2396_parser.rb, line 121
def split(uri)
  case uri
  when ''
    # null uri
  when @regexp[:ABS_URI]
    scheme, opaque, userinfo, host, port,
      registry, path, query, fragment = $~[1..-1]
    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    # absoluteURI   = scheme ":" ( hier_part | opaque_part )
    # hier_part     = ( net_path | abs_path ) [ "?" query ]
    # opaque_part   = uric_no_slash *uric
    # abs_path      = "/"  path_segments
    # net_path      = "//" authority [ abs_path ]
    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]
    if !scheme
      raise InvalidURIError,
        "bad URI(absolute but no scheme): #{uri}"
    end
    if !opaque && (!path && (!host && !registry))
      raise InvalidURIError,
        "bad URI(absolute but no path): #{uri}"
    end
  when @regexp[:REL_URI]
    scheme = nil
    opaque = nil
    userinfo, host, port, registry,
      rel_segment, abs_path, query, fragment = $~[1..-1]
    if rel_segment && abs_path
      path = rel_segment + abs_path
    elsif rel_segment
      path = rel_segment
    elsif abs_path
      path = abs_path
    end
    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
    # net_path      = "//" authority [ abs_path ]
    # abs_path      = "/"  path_segments
    # rel_path      = rel_segment [ abs_path ]
    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]
  else
    raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  end
  path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  ret = [
    scheme,
    userinfo, host, port,         # X
    registry,                     # X
    path,                         # Y
    opaque,                       # Y
    query,
    fragment
  ]
  return ret
end
             
            str
String to remove escapes from
unsafe
Regexp to apply. Defaults to self.regexp
Removes escapes from str
 
               # File uri/rfc2396_parser.rb, line 331
def unescape(str, escaped = @regexp[:ESCAPED])
  str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
end