lib/uri: performance improvements [misc #10628]
* lib/uri/generic.rb (split_userinfo): fstring for 1-byte split (set_port): reduce bytecode size (check_path): reduce garbage via opt_str_freeze (query=): ditto (fragment=): ditto [misc #10628] * lib/uri/rfc3986_parser.rb (regexp): cache as attr (initialize): setup and freeze regexp attr once (split): reduce bytecode size, use opt_str_freeze (parse): minor bytecode and garbage reduction (default_regexp): rename for initialize git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48980 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
007c4dbe84
commit
aa93c62e3c
14
ChangeLog
14
ChangeLog
@ -1,3 +1,17 @@
|
|||||||
|
Thu Dec 25 08:42:11 2014 Eric Wong <e@80x24.org>
|
||||||
|
|
||||||
|
* lib/uri/generic.rb (split_userinfo): fstring for 1-byte split
|
||||||
|
(set_port): reduce bytecode size
|
||||||
|
(check_path): reduce garbage via opt_str_freeze
|
||||||
|
(query=): ditto
|
||||||
|
(fragment=): ditto
|
||||||
|
[misc #10628]
|
||||||
|
* lib/uri/rfc3986_parser.rb (regexp): cache as attr
|
||||||
|
(initialize): setup and freeze regexp attr once
|
||||||
|
(split): reduce bytecode size, use opt_str_freeze
|
||||||
|
(parse): minor bytecode and garbage reduction
|
||||||
|
(default_regexp): rename for initialize
|
||||||
|
|
||||||
Wed Dec 24 20:38:16 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Wed Dec 24 20:38:16 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* dir.c (glob_make_pattern): restrict searching case-insensitive
|
* dir.c (glob_make_pattern): restrict searching case-insensitive
|
||||||
|
@ -543,7 +543,7 @@ module URI
|
|||||||
# if properly formatted as 'user:password'
|
# if properly formatted as 'user:password'
|
||||||
def split_userinfo(ui)
|
def split_userinfo(ui)
|
||||||
return nil, nil unless ui
|
return nil, nil unless ui
|
||||||
user, password = ui.split(/:/, 2)
|
user, password = ui.split(':'.freeze, 2)
|
||||||
|
|
||||||
return user, password
|
return user, password
|
||||||
end
|
end
|
||||||
@ -695,13 +695,7 @@ module URI
|
|||||||
# see also URI::Generic.port=
|
# see also URI::Generic.port=
|
||||||
#
|
#
|
||||||
def set_port(v)
|
def set_port(v)
|
||||||
unless !v || v.kind_of?(Fixnum)
|
v = v.empty? ? nil : v.to_i unless !v || v.kind_of?(Fixnum)
|
||||||
if v.empty?
|
|
||||||
v = nil
|
|
||||||
else
|
|
||||||
v = v.to_i
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@port = v
|
@port = v
|
||||||
end
|
end
|
||||||
protected :set_port
|
protected :set_port
|
||||||
@ -768,13 +762,14 @@ module URI
|
|||||||
|
|
||||||
# If scheme is ftp, path may be relative.
|
# If scheme is ftp, path may be relative.
|
||||||
# See RFC 1738 section 3.2.2, and RFC 2396.
|
# See RFC 1738 section 3.2.2, and RFC 2396.
|
||||||
if @scheme && @scheme != "ftp"
|
if @scheme && @scheme != "ftp".freeze
|
||||||
if v && v != '' && parser.regexp[:ABS_PATH] !~ v
|
if v && v != ''.freeze && parser.regexp[:ABS_PATH] !~ v
|
||||||
raise InvalidComponentError,
|
raise InvalidComponentError,
|
||||||
"bad component(expected absolute path component): #{v}"
|
"bad component(expected absolute path component): #{v}"
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
if v && v != '' && parser.regexp[:ABS_PATH] !~ v && parser.regexp[:REL_PATH] !~ v
|
if v && v != ''.freeze && parser.regexp[:ABS_PATH] !~ v &&
|
||||||
|
parser.regexp[:REL_PATH] !~ v
|
||||||
raise InvalidComponentError,
|
raise InvalidComponentError,
|
||||||
"bad component(expected relative path component): #{v}"
|
"bad component(expected relative path component): #{v}"
|
||||||
end
|
end
|
||||||
@ -849,7 +844,7 @@ module URI
|
|||||||
x = v.to_str
|
x = v.to_str
|
||||||
v = x.dup if x.equal? v
|
v = x.dup if x.equal? v
|
||||||
v.encode!(Encoding::UTF_8) rescue nil
|
v.encode!(Encoding::UTF_8) rescue nil
|
||||||
v.delete!("\t\r\n")
|
v.delete!("\t\r\n".freeze)
|
||||||
v.force_encoding(Encoding::ASCII_8BIT)
|
v.force_encoding(Encoding::ASCII_8BIT)
|
||||||
v.gsub!(/(?!%\h\h|[!$-&(-;=?-_a-~])./n.freeze){'%%%02X'.freeze % $&.ord}
|
v.gsub!(/(?!%\h\h|[!$-&(-;=?-_a-~])./n.freeze){'%%%02X'.freeze % $&.ord}
|
||||||
v.force_encoding(Encoding::US_ASCII)
|
v.force_encoding(Encoding::US_ASCII)
|
||||||
@ -939,9 +934,9 @@ module URI
|
|||||||
x = v.to_str
|
x = v.to_str
|
||||||
v = x.dup if x.equal? v
|
v = x.dup if x.equal? v
|
||||||
v.encode!(Encoding::UTF_8) rescue nil
|
v.encode!(Encoding::UTF_8) rescue nil
|
||||||
v.delete!("\t\r\n")
|
v.delete!("\t\r\n".freeze)
|
||||||
v.force_encoding(Encoding::ASCII_8BIT)
|
v.force_encoding(Encoding::ASCII_8BIT)
|
||||||
v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X' % $&.ord}
|
v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X'.freeze % $&.ord}
|
||||||
v.force_encoding(Encoding::US_ASCII)
|
v.force_encoding(Encoding::US_ASCII)
|
||||||
@fragment = v
|
@fragment = v
|
||||||
end
|
end
|
||||||
|
@ -4,6 +4,11 @@ module URI
|
|||||||
# this regexp is modified not to host is not empty string
|
# this regexp is modified not to host is not empty string
|
||||||
RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
|
RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
|
||||||
RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
|
RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
|
||||||
|
attr_reader :regexp
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
@regexp = default_regexp.each_value(&:freeze).freeze
|
||||||
|
end
|
||||||
|
|
||||||
def split(uri) #:nodoc:
|
def split(uri) #:nodoc:
|
||||||
begin
|
begin
|
||||||
@ -11,42 +16,52 @@ module URI
|
|||||||
rescue NoMethodError
|
rescue NoMethodError
|
||||||
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
|
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
|
||||||
end
|
end
|
||||||
unless uri.ascii_only?
|
uri.ascii_only? or
|
||||||
raise InvalidURIError, "URI must be ascii only #{uri.dump}"
|
raise InvalidURIError, "URI must be ascii only #{uri.dump}"
|
||||||
end
|
|
||||||
if m = RFC3986_URI.match(uri)
|
if m = RFC3986_URI.match(uri)
|
||||||
ary = []
|
query = m["query".freeze]
|
||||||
ary << m["scheme"]
|
scheme = m["scheme".freeze]
|
||||||
if m["path-rootless"] # opaque
|
opaque = m["path-rootless".freeze]
|
||||||
ary << nil # userinfo
|
if opaque
|
||||||
ary << nil # host
|
opaque << "?#{query}" if query
|
||||||
ary << nil # port
|
[ scheme,
|
||||||
ary << nil # registry
|
nil, # userinfo
|
||||||
ary << nil # path
|
nil, # host
|
||||||
ary << m["path-rootless"]
|
nil, # port
|
||||||
ary[-1] << '?' << m["query"] if m["query"]
|
nil, # registry
|
||||||
ary << nil # query
|
nil, # path
|
||||||
ary << m["fragment"]
|
opaque,
|
||||||
|
nil, # query
|
||||||
|
m["fragment".freeze]
|
||||||
|
]
|
||||||
else # normal
|
else # normal
|
||||||
ary << m["userinfo"]
|
[ scheme,
|
||||||
ary << m["host"]
|
m["userinfo".freeze],
|
||||||
ary << m["port"]
|
m["host".freeze],
|
||||||
ary << nil # registry
|
m["port".freeze],
|
||||||
ary << (m["path-abempty"] || m["path-absolute"] || m["path-empty"])
|
nil, # registry
|
||||||
ary << nil # opaque
|
(m["path-abempty".freeze] ||
|
||||||
ary << m["query"]
|
m["path-absolute".freeze] ||
|
||||||
ary << m["fragment"]
|
m["path-empty".freeze]),
|
||||||
|
nil, # opaque
|
||||||
|
query,
|
||||||
|
m["fragment".freeze]
|
||||||
|
]
|
||||||
end
|
end
|
||||||
elsif m = RFC3986_relative_ref.match(uri)
|
elsif m = RFC3986_relative_ref.match(uri)
|
||||||
ary = [nil]
|
[ nil, # scheme
|
||||||
ary << m["userinfo"]
|
m["userinfo".freeze],
|
||||||
ary << m["host"]
|
m["host".freeze],
|
||||||
ary << m["port"]
|
m["port".freeze],
|
||||||
ary << nil # registry
|
nil, # registry,
|
||||||
ary << (m["path-abempty"] || m["path-absolute"] || m["path-noscheme"] || m["path-empty"])
|
(m["path-abempty".freeze] ||
|
||||||
ary << nil # opaque
|
m["path-absolute".freeze] ||
|
||||||
ary << m["query"]
|
m["path-noscheme".freeze] ||
|
||||||
ary << m["fragment"]
|
m["path-empty".freeze]),
|
||||||
|
nil, # opaque
|
||||||
|
m["query".freeze],
|
||||||
|
m["fragment".freeze]
|
||||||
|
]
|
||||||
else
|
else
|
||||||
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
|
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
|
||||||
end
|
end
|
||||||
@ -55,11 +70,11 @@ module URI
|
|||||||
def parse(uri) # :nodoc:
|
def parse(uri) # :nodoc:
|
||||||
scheme, userinfo, host, port,
|
scheme, userinfo, host, port,
|
||||||
registry, path, opaque, query, fragment = self.split(uri)
|
registry, path, opaque, query, fragment = self.split(uri)
|
||||||
|
scheme_list = URI.scheme_list
|
||||||
if scheme && URI.scheme_list.include?(scheme.upcase)
|
if scheme && scheme_list.include?(uc = scheme.upcase)
|
||||||
URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
|
scheme_list[uc].new(scheme, userinfo, host, port,
|
||||||
registry, path, opaque, query,
|
registry, path, opaque, query,
|
||||||
fragment, self)
|
fragment, self)
|
||||||
else
|
else
|
||||||
Generic.new(scheme, userinfo, host, port,
|
Generic.new(scheme, userinfo, host, port,
|
||||||
registry, path, opaque, query,
|
registry, path, opaque, query,
|
||||||
@ -78,7 +93,9 @@ module URI
|
|||||||
@@to_s.bind(self).call
|
@@to_s.bind(self).call
|
||||||
end
|
end
|
||||||
|
|
||||||
def regexp
|
private
|
||||||
|
|
||||||
|
def default_regexp # :nodoc:
|
||||||
{
|
{
|
||||||
SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/,
|
SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/,
|
||||||
USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/,
|
USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/,
|
||||||
@ -92,8 +109,6 @@ module URI
|
|||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def convert_to_uri(uri)
|
def convert_to_uri(uri)
|
||||||
if uri.is_a?(URI::Generic)
|
if uri.is_a?(URI::Generic)
|
||||||
uri
|
uri
|
||||||
|
Loading…
x
Reference in New Issue
Block a user