diff --git a/lib/uri/common.rb b/lib/uri/common.rb index d592fdc9ba..a6d08aa26f 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -295,6 +295,7 @@ module URI 256.times do |i| TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i) end + TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze TBLENCWWWCOMP_[' '] = '+' TBLENCWWWCOMP_.freeze TBLDECWWWCOMP_ = {} # :nodoc: @@ -320,16 +321,7 @@ module URI # # See URI.decode_www_form_component, URI.encode_www_form. def self.encode_www_form_component(str, enc=nil) - str = str.to_s.dup - if str.encoding != Encoding::ASCII_8BIT - if enc && enc != Encoding::ASCII_8BIT - str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace) - str.encode!(enc, fallback: ->(x){"&##{x.ord};"}) - end - str.force_encoding(Encoding::ASCII_8BIT) - end - str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_) - str.force_encoding(Encoding::US_ASCII) + _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc) end # Decodes given +str+ of URL-encoded form data. @@ -338,10 +330,43 @@ module URI # # See URI.encode_www_form_component, URI.decode_www_form. def self.decode_www_form_component(str, enc=Encoding::UTF_8) - raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str) - str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc) + _decode_uri_component(/\+|%\h\h/, str, enc) end + # Encodes +str+ using URL encoding + # + # This encodes SP to %20 instead of +. + def self.encode_uri_component(str, enc=nil) + _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc) + end + + # Decodes given +str+ of URL-encoded data. + # + # This does not decode + to SP. + def self.decode_uri_component(str, enc=Encoding::UTF_8) + _decode_uri_component(/%\h\h/, str, enc) + end + + def self._encode_uri_component(regexp, table, str, enc) + str = str.to_s.dup + if str.encoding != Encoding::ASCII_8BIT + if enc && enc != Encoding::ASCII_8BIT + str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace) + str.encode!(enc, fallback: ->(x){"&##{x.ord};"}) + end + str.force_encoding(Encoding::ASCII_8BIT) + end + str.gsub!(regexp, table) + str.force_encoding(Encoding::US_ASCII) + end + private_class_method :_encode_uri_component + + def self._decode_uri_component(regexp, str, enc) + raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str) + str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc) + end + private_class_method :_decode_uri_component + # Generates URL-encoded form data from given +enum+. # # This generates application/x-www-form-urlencoded data defined in HTML5 diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb index cfa0de6b74..69698c4e2d 100644 --- a/lib/uri/generic.rb +++ b/lib/uri/generic.rb @@ -564,16 +564,26 @@ module URI end end - # Returns the user component. + # Returns the user component (without URI decoding). def user @user end - # Returns the password component. + # Returns the password component (without URI decoding). def password @password end + # Returns the user component after URI decoding. + def decoded_user + URI.decode_uri_component(@user) if @user + end + + # Returns the password component after URI decoding. + def decoded_password + URI.decode_uri_component(@password) if @password + end + # # Checks the host +v+ component for RFC2396 compliance # and against the URI::Parser Regexp for :HOST. diff --git a/test/uri/test_common.rb b/test/uri/test_common.rb index 0fa7e8ac70..8cb23fe167 100644 --- a/test/uri/test_common.rb +++ b/test/uri/test_common.rb @@ -130,6 +130,58 @@ class TestCommon < Test::Unit::TestCase assert_nothing_raised(ArgumentError){URI.decode_www_form_component("x"*(1024*1024))} end + def test_encode_uri_component + assert_equal("%00%20%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \ + "AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E", + URI.encode_uri_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~")) + assert_equal("%95A", URI.encode_uri_component( + "\x95\x41".force_encoding(Encoding::Shift_JIS))) + assert_equal("0B", URI.encode_uri_component( + "\x30\x42".force_encoding(Encoding::UTF_16BE))) + assert_equal("%1B%24B%24%22%1B%28B", URI.encode_uri_component( + "\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP))) + + assert_equal("%E3%81%82", URI.encode_uri_component( + "\u3042", Encoding::ASCII_8BIT)) + assert_equal("%82%A0", URI.encode_uri_component( + "\u3042", Encoding::Windows_31J)) + assert_equal("%E3%81%82", URI.encode_uri_component( + "\u3042", Encoding::UTF_8)) + + assert_equal("%82%A0", URI.encode_uri_component( + "\u3042".encode("sjis"), Encoding::ASCII_8BIT)) + assert_equal("%A4%A2", URI.encode_uri_component( + "\u3042".encode("sjis"), Encoding::EUC_JP)) + assert_equal("%E3%81%82", URI.encode_uri_component( + "\u3042".encode("sjis"), Encoding::UTF_8)) + assert_equal("B0", URI.encode_uri_component( + "\u3042".encode("sjis"), Encoding::UTF_16LE)) + assert_equal("%26%23730%3B", URI.encode_uri_component( + "\u02DA", Encoding::WINDOWS_1252)) + + # invalid + assert_equal("%EF%BF%BD%EF%BF%BD", URI.encode_uri_component( + "\xE3\x81\xFF", "utf-8")) + assert_equal("%E6%9F%8A%EF%BF%BD%EF%BF%BD", URI.encode_uri_component( + "\x95\x41\xff\xff".force_encoding(Encoding::Shift_JIS), "utf-8")) + end + + def test_decode_uri_component + assert_equal(" +!\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~", + URI.decode_uri_component( + "%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \ + "AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E")) + assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP), + URI.decode_uri_component("%A1%A2", "EUC-JP")) + assert_equal("\xE3\x81\x82\xE3\x81\x82".force_encoding("UTF-8"), + URI.decode_uri_component("\xE3\x81\x82%E3%81%82".force_encoding("UTF-8"))) + + assert_raise(ArgumentError){URI.decode_uri_component("%")} + assert_raise(ArgumentError){URI.decode_uri_component("%a")} + assert_raise(ArgumentError){URI.decode_uri_component("x%a_")} + assert_nothing_raised(ArgumentError){URI.decode_uri_component("x"*(1024*1024))} + end + def test_encode_www_form assert_equal("a=1", URI.encode_www_form("a" => "1")) assert_equal("a=1", URI.encode_www_form(a: 1)) diff --git a/test/uri/test_parser.rb b/test/uri/test_parser.rb index 03de137788..f8e9299d09 100644 --- a/test/uri/test_parser.rb +++ b/test/uri/test_parser.rb @@ -50,6 +50,15 @@ class URI::TestParser < Test::Unit::TestCase assert_raise(URI::InvalidURIError) { URI.parse('https://www.example.com/search?q=%XX') } end + def test_parse_auth + str = "http://al%40ice:p%40s%25sword@example.com/dir%2Fname/subdir?foo=bar%40example.com" + uri = URI.parse(str) + assert_equal "al%40ice", uri.user + assert_equal "p%40s%25sword", uri.password + assert_equal "al@ice", uri.decoded_user + assert_equal "p@s%sword", uri.decoded_password + end + def test_raise_bad_uri_for_integer assert_raise(URI::InvalidURIError) do URI.parse(1)