[ruby/cgi] Implement CGI.url_encode
and CGI.url_decode
[Feature #18822] Ruby is somewhat missing an RFC 3986 compliant escape method. https://github.com/ruby/cgi/commit/c2729c7f33
This commit is contained in:
parent
5389c9813b
commit
3850113e20
@ -200,7 +200,7 @@ url_unreserved_char(unsigned char c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
optimized_escape(VALUE str)
|
optimized_escape(VALUE str, int plus_escape)
|
||||||
{
|
{
|
||||||
long i, len, beg = 0;
|
long i, len, beg = 0;
|
||||||
VALUE dest = 0;
|
VALUE dest = 0;
|
||||||
@ -220,7 +220,7 @@ optimized_escape(VALUE str)
|
|||||||
rb_str_cat(dest, cstr + beg, i - beg);
|
rb_str_cat(dest, cstr + beg, i - beg);
|
||||||
beg = i + 1;
|
beg = i + 1;
|
||||||
|
|
||||||
if (c == ' ') {
|
if (plus_escape && c == ' ') {
|
||||||
rb_str_cat_cstr(dest, "+");
|
rb_str_cat_cstr(dest, "+");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -242,7 +242,7 @@ optimized_escape(VALUE str)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
optimized_unescape(VALUE str, VALUE encoding)
|
optimized_unescape(VALUE str, VALUE encoding, int unescape_plus)
|
||||||
{
|
{
|
||||||
long i, len, beg = 0;
|
long i, len, beg = 0;
|
||||||
VALUE dest = 0;
|
VALUE dest = 0;
|
||||||
@ -265,7 +265,7 @@ optimized_unescape(VALUE str, VALUE encoding)
|
|||||||
| char_to_number(cstr[i+2]));
|
| char_to_number(cstr[i+2]));
|
||||||
clen = 2;
|
clen = 2;
|
||||||
}
|
}
|
||||||
else if (c == '+') {
|
else if (unescape_plus && c == '+') {
|
||||||
buf[0] = ' ';
|
buf[0] = ' ';
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -348,7 +348,7 @@ cgiesc_unescape_html(VALUE self, VALUE str)
|
|||||||
* call-seq:
|
* call-seq:
|
||||||
* CGI.escape(string) -> string
|
* CGI.escape(string) -> string
|
||||||
*
|
*
|
||||||
* Returns URL-escaped string.
|
* Returns URL-escaped string (+application/x-www-form-urlencoded+).
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static VALUE
|
static VALUE
|
||||||
@ -357,7 +357,7 @@ cgiesc_escape(VALUE self, VALUE str)
|
|||||||
StringValue(str);
|
StringValue(str);
|
||||||
|
|
||||||
if (rb_enc_str_asciicompat_p(str)) {
|
if (rb_enc_str_asciicompat_p(str)) {
|
||||||
return optimized_escape(str);
|
return optimized_escape(str, 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return rb_call_super(1, &str);
|
return rb_call_super(1, &str);
|
||||||
@ -376,7 +376,7 @@ accept_charset(int argc, VALUE *argv, VALUE self)
|
|||||||
* call-seq:
|
* call-seq:
|
||||||
* CGI.unescape(string, encoding=@@accept_charset) -> string
|
* CGI.unescape(string, encoding=@@accept_charset) -> string
|
||||||
*
|
*
|
||||||
* Returns URL-unescaped string.
|
* Returns URL-unescaped string (+application/x-www-form-urlencoded+).
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static VALUE
|
static VALUE
|
||||||
@ -388,7 +388,50 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self)
|
|||||||
|
|
||||||
if (rb_enc_str_asciicompat_p(str)) {
|
if (rb_enc_str_asciicompat_p(str)) {
|
||||||
VALUE enc = accept_charset(argc-1, argv+1, self);
|
VALUE enc = accept_charset(argc-1, argv+1, self);
|
||||||
return optimized_unescape(str, enc);
|
return optimized_unescape(str, enc, 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return rb_call_super(argc, argv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* CGI.escapeURIComponent(string) -> string
|
||||||
|
*
|
||||||
|
* Returns URL-escaped string following RFC 3986.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
cgiesc_escape_uri_component(VALUE self, VALUE str)
|
||||||
|
{
|
||||||
|
StringValue(str);
|
||||||
|
|
||||||
|
if (rb_enc_str_asciicompat_p(str)) {
|
||||||
|
return optimized_escape(str, 0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return rb_call_super(1, &str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string
|
||||||
|
*
|
||||||
|
* Returns URL-unescaped string following RFC 3986.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self)
|
||||||
|
{
|
||||||
|
VALUE str = (rb_check_arity(argc, 1, 2), argv[0]);
|
||||||
|
|
||||||
|
StringValue(str);
|
||||||
|
|
||||||
|
if (rb_enc_str_asciicompat_p(str)) {
|
||||||
|
VALUE enc = accept_charset(argc-1, argv+1, self);
|
||||||
|
return optimized_unescape(str, enc, 0);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return rb_call_super(argc, argv);
|
return rb_call_super(argc, argv);
|
||||||
@ -414,6 +457,8 @@ InitVM_escape(void)
|
|||||||
rb_mUtil = rb_define_module_under(rb_cCGI, "Util");
|
rb_mUtil = rb_define_module_under(rb_cCGI, "Util");
|
||||||
rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
|
rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
|
||||||
rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
|
rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
|
||||||
|
rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1);
|
||||||
|
rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
|
||||||
rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
|
rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
|
||||||
rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
|
rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
|
||||||
rb_prepend_module(rb_mUtil, rb_mEscape);
|
rb_prepend_module(rb_mUtil, rb_mEscape);
|
||||||
|
@ -5,24 +5,57 @@ class CGI
|
|||||||
extend Util
|
extend Util
|
||||||
end
|
end
|
||||||
module CGI::Util
|
module CGI::Util
|
||||||
@@accept_charset="UTF-8" unless defined?(@@accept_charset)
|
@@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)
|
||||||
# URL-encode a string.
|
|
||||||
|
# URL-encode a string into application/x-www-form-urlencoded.
|
||||||
|
# Space characters (+" "+) are encoded with plus signs (+"+"+)
|
||||||
# url_encoded_string = CGI.escape("'Stop!' said Fred")
|
# url_encoded_string = CGI.escape("'Stop!' said Fred")
|
||||||
# # => "%27Stop%21%27+said+Fred"
|
# # => "%27Stop%21%27+said+Fred"
|
||||||
def escape(string)
|
def escape(string)
|
||||||
encoding = string.encoding
|
encoding = string.encoding
|
||||||
string.b.gsub(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
|
buffer = string.b
|
||||||
|
buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
|
||||||
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
|
||||||
end.tr(' ', '+').force_encoding(encoding)
|
end
|
||||||
|
buffer.tr!(' ', '+')
|
||||||
|
buffer.force_encoding(encoding)
|
||||||
end
|
end
|
||||||
|
|
||||||
# URL-decode a string with encoding(optional).
|
# URL-decode an application/x-www-form-urlencoded string with encoding(optional).
|
||||||
# string = CGI.unescape("%27Stop%21%27+said+Fred")
|
# string = CGI.unescape("%27Stop%21%27+said+Fred")
|
||||||
# # => "'Stop!' said Fred"
|
# # => "'Stop!' said Fred"
|
||||||
def unescape(string,encoding=@@accept_charset)
|
def unescape(string, encoding = @@accept_charset)
|
||||||
str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m|
|
str = string.tr('+', ' ')
|
||||||
|
str = str.b
|
||||||
|
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
|
||||||
[m.delete('%')].pack('H*')
|
[m.delete('%')].pack('H*')
|
||||||
end.force_encoding(encoding)
|
end
|
||||||
|
str.force_encoding(encoding)
|
||||||
|
str.valid_encoding? ? str : str.force_encoding(string.encoding)
|
||||||
|
end
|
||||||
|
|
||||||
|
# URL-encode a string following RFC 3986
|
||||||
|
# Space characters (+" "+) are encoded with (+"%20"+)
|
||||||
|
# url_encoded_string = CGI.escape("'Stop!' said Fred")
|
||||||
|
# # => "%27Stop%21%27%20said%20Fred"
|
||||||
|
def escapeURIComponent(string)
|
||||||
|
encoding = string.encoding
|
||||||
|
buffer = string.b
|
||||||
|
buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
|
||||||
|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
|
||||||
|
end
|
||||||
|
buffer.force_encoding(encoding)
|
||||||
|
end
|
||||||
|
|
||||||
|
# URL-decode a string following RFC 3986 with encoding(optional).
|
||||||
|
# string = CGI.unescape("%27Stop%21%27+said%20Fred")
|
||||||
|
# # => "'Stop!'+said Fred"
|
||||||
|
def unescapeURIComponent(string, encoding = @@accept_charset)
|
||||||
|
str = string.b
|
||||||
|
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
|
||||||
|
[m.delete('%')].pack('H*')
|
||||||
|
end
|
||||||
|
str.force_encoding(encoding)
|
||||||
str.valid_encoding? ? str : str.force_encoding(string.encoding)
|
str.valid_encoding? ? str : str.force_encoding(string.encoding)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -23,7 +23,6 @@ class CGIUtilTest < Test::Unit::TestCase
|
|||||||
ENV.update(@environ)
|
ENV.update(@environ)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
def test_cgi_escape
|
def test_cgi_escape
|
||||||
assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1))
|
assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1))
|
||||||
assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding)
|
assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding)
|
||||||
@ -70,6 +69,54 @@ class CGIUtilTest < Test::Unit::TestCase
|
|||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_cgi_escapeURIComponent
|
||||||
|
assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escapeURIComponent(@str1))
|
||||||
|
assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_cgi_escapeURIComponent_with_unreserved_characters
|
||||||
|
assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~",
|
||||||
|
CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"),
|
||||||
|
"should not encode any unreserved characters, as per RFC3986 Section 2.3")
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_cgi_escapeURIComponent_with_invalid_byte_sequence
|
||||||
|
assert_equal('%C0%3C%3C', CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")))
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_cgi_escapeURIComponent_preserve_encoding
|
||||||
|
assert_equal(Encoding::US_ASCII, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding)
|
||||||
|
assert_equal(Encoding::ASCII_8BIT, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding)
|
||||||
|
assert_equal(Encoding::UTF_8, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")).encoding)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_cgi_unescapeURIComponent
|
||||||
|
str = CGI.unescapeURIComponent('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93')
|
||||||
|
assert_equal(@str1, str)
|
||||||
|
return unless defined?(::Encoding)
|
||||||
|
|
||||||
|
assert_equal("foo+bar", CGI.unescapeURIComponent("foo+bar"))
|
||||||
|
|
||||||
|
assert_equal(@str1.encoding, str.encoding)
|
||||||
|
assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2"))
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_cgi_unescapeURIComponent_preserve_encoding
|
||||||
|
assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding)
|
||||||
|
assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding)
|
||||||
|
assert_equal(Encoding::UTF_8, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_cgi_unescapeURIComponent_accept_charset
|
||||||
|
return unless defined?(::Encoding)
|
||||||
|
|
||||||
|
assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)}
|
||||||
|
assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}")
|
||||||
|
begin;
|
||||||
|
assert_equal("", CGI.unescapeURIComponent(''))
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
|
||||||
def test_cgi_pretty
|
def test_cgi_pretty
|
||||||
assert_equal("<HTML>\n <BODY>\n </BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>"))
|
assert_equal("<HTML>\n <BODY>\n </BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>"))
|
||||||
assert_equal("<HTML>\n\t<BODY>\n\t</BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>","\t"))
|
assert_equal("<HTML>\n\t<BODY>\n\t</BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>","\t"))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user