Object
Class that parses String's into URI's.
It contains a Hash set of patterns and Regexp's that match and validate.
URI::Parser.new([opts])
The constructor accepts a hash as options for parser. Keys of options are pattern names of URI components and values of options are pattern strings. The constructor generates set of regexps for parsing URIs.
You can use the following keys:
* :ESCAPED (URI::PATTERN::ESCAPED in default) * :UNRESERVED (URI::PATTERN::UNRESERVED in default) * :DOMLABEL (URI::PATTERN::DOMLABEL in default) * :TOPLABEL (URI::PATTERN::TOPLABEL in default) * :HOSTNAME (URI::PATTERN::HOSTNAME in default)
p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})") u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP http://example.jp/%uABCD> URI.parse(u.to_s) #=> raises URI::InvalidURIError s = "http://example.com/ABCD" u1 = p.parse(s) #=> #<URI::HTTP http://example.com/ABCD> u2 = URI.parse(s) #=> #<URI::HTTP http://example.com/ABCD> u1 == u2 #=> true u1.eql?(u2) #=> false
# File uri/rfc2396_parser.rb, line 99
def initialize(opts = {})
@pattern = initialize_pattern(opts)
@pattern.each_value(&:freeze)
@pattern.freeze
@regexp = initialize_regexp(@pattern)
@regexp.each_value(&:freeze)
@regexp.freeze
end
str
String to make safe
unsafe
Regexp to apply. Defaults to self.regexp
Constructs a safe String from str, removing unsafe characters, replacing them with codes.
# File uri/rfc2396_parser.rb, line 287
def escape(str, unsafe = @regexp[:UNSAFE])
unless unsafe.kind_of?(Regexp)
# perhaps unsafe is String object
unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
end
str.gsub(unsafe) do
us = $&
tmp = ''
us.each_byte do |uc|
tmp << sprintf('%%%02X', uc)
end
tmp
end.force_encoding(Encoding::US_ASCII)
end
str
String to search
schemes
Patterns to apply to str
Attempts to parse and merge a set of URIs. If no block given, then returns the result, else it calls block for each element in result.
See also URI::Parser.make_regexp.
# File uri/rfc2396_parser.rb, line 249
def extract(str, schemes = nil)
if block_given?
str.scan(make_regexp(schemes)) { yield $& }
nil
else
result = []
str.scan(make_regexp(schemes)) { result.push $& }
result
end
end
# File uri/rfc2396_parser.rb, line 325
def inspect
@@to_s.bind_call(self)
end
Returns Regexp that is default self.regexp, unless schemes is provided. Then it is a Regexp.union with self.pattern.
# File uri/rfc2396_parser.rb, line 262
def make_regexp(schemes = nil)
unless schemes
@regexp[:ABS_URI_REF]
else
/(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
end
end
uri
String
Parses uri and constructs either matching URI scheme object (File, FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic.
p = URI::Parser.new p.parse("ldap://ldap.example.com/dc=example?user=john") #=> #<URI::LDAP ldap://ldap.example.com/dc=example?user=john>
# File uri/rfc2396_parser.rb, line 209
def parse(uri)
URI.for(*self.split(uri), self)
end
Returns a split URI against regexp.
# File uri/rfc2396_parser.rb, line 120
def split(uri)
case uri
when ''
# null uri
when @regexp[:ABS_URI]
scheme, opaque, userinfo, host, port,
registry, path, query, fragment = $~[1..-1]
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
# absoluteURI = scheme ":" ( hier_part | opaque_part )
# hier_part = ( net_path | abs_path ) [ "?" query ]
# opaque_part = uric_no_slash *uric
# abs_path = "/" path_segments
# net_path = "//" authority [ abs_path ]
# authority = server | reg_name
# server = [ [ userinfo "@" ] hostport ]
if !scheme
raise InvalidURIError,
"bad URI(absolute but no scheme): #{uri}"
end
if !opaque && (!path && (!host && !registry))
raise InvalidURIError,
"bad URI(absolute but no path): #{uri}"
end
when @regexp[:REL_URI]
scheme = nil
opaque = nil
userinfo, host, port, registry,
rel_segment, abs_path, query, fragment = $~[1..-1]
if rel_segment && abs_path
path = rel_segment + abs_path
elsif rel_segment
path = rel_segment
elsif abs_path
path = abs_path
end
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
# net_path = "//" authority [ abs_path ]
# abs_path = "/" path_segments
# rel_path = rel_segment [ abs_path ]
# authority = server | reg_name
# server = [ [ userinfo "@" ] hostport ]
else
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
end
path = '' if !path && !opaque # (see RFC2396 Section 5.2)
ret = [
scheme,
userinfo, host, port, # X
registry, # X
path, # Y
opaque, # Y
query,
fragment
]
return ret
end
str
String to remove escapes from
escaped
Regexp to apply. Defaults to self.regexp
Removes escapes from str.
# File uri/rfc2396_parser.rb, line 318
def unescape(str, escaped = @regexp[:ESCAPED])
enc = str.encoding
enc = Encoding::UTF_8 if enc == Encoding::US_ASCII
str.gsub(escaped) { [$&[1, 2]].pack('H2').force_encoding(enc) }
end