* lib/cgi.rb (CGI::unescapeHTML): more encoding sensible unescaping.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18798 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
33dd0c35f1
commit
29449d70be
@ -1,3 +1,7 @@
|
|||||||
|
Sun Aug 24 06:39:05 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* lib/cgi.rb (CGI::unescapeHTML): more encoding sensible unescaping.
|
||||||
|
|
||||||
Sun Aug 24 04:23:19 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
Sun Aug 24 04:23:19 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
* encoding.c (enc_compatible_p): raise TypeError when argument is Encoding.
|
* encoding.c (enc_compatible_p): raise TypeError when argument is Encoding.
|
||||||
|
29
lib/cgi.rb
29
lib/cgi.rb
@ -375,6 +375,19 @@ class CGI
|
|||||||
# # => "Usage: foo \"bar\" <baz>"
|
# # => "Usage: foo \"bar\" <baz>"
|
||||||
def CGI::unescapeHTML(string)
|
def CGI::unescapeHTML(string)
|
||||||
enc = string.encoding
|
enc = string.encoding
|
||||||
|
if [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
|
||||||
|
return string.gsub(Regexp.new('&(amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
|
||||||
|
case $1.encode("US-ASCII")
|
||||||
|
when 'amp' then '&'.encode(enc)
|
||||||
|
when 'quot' then '"'.encode(enc)
|
||||||
|
when 'gt' then '>'.encode(enc)
|
||||||
|
when 'lt' then '<'.encode(enc)
|
||||||
|
when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
|
||||||
|
when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
asciicompat = Encoding.compatible?(string, "a")
|
||||||
string.gsub(/&(amp|quot|gt|lt|\#[0-9]+|\#x[0-9A-Fa-f]+);/) do
|
string.gsub(/&(amp|quot|gt|lt|\#[0-9]+|\#x[0-9A-Fa-f]+);/) do
|
||||||
match = $1.dup
|
match = $1.dup
|
||||||
case match
|
case match
|
||||||
@ -382,15 +395,19 @@ class CGI
|
|||||||
when 'quot' then '"'
|
when 'quot' then '"'
|
||||||
when 'gt' then '>'
|
when 'gt' then '>'
|
||||||
when 'lt' then '<'
|
when 'lt' then '<'
|
||||||
when /\A#0*(\d+)\z/ then
|
when /\A#0*(\d+)\z/
|
||||||
if Integer($1) < 256
|
if enc == Encoding::UTF_8
|
||||||
Integer($1).chr.force_encoding(enc)
|
$1.to_i.chr(enc)
|
||||||
|
elsif $1.to_i < 128 && asciicompat
|
||||||
|
$1.to_i.chr
|
||||||
else
|
else
|
||||||
"&##{$1};"
|
"&##{$1};"
|
||||||
end
|
end
|
||||||
when /\A#x([0-9a-f]+)\z/i then
|
when /\A#x([0-9a-f]+)\z/i
|
||||||
if $1.hex < 256
|
if enc == Encoding::UTF_8
|
||||||
$1.hex.chr.force_encoding(enc)
|
$1.hex.chr(enc)
|
||||||
|
elsif $1.hex < 128 && asciicompat
|
||||||
|
$1.hex.chr
|
||||||
else
|
else
|
||||||
"&#x#{$1};"
|
"&#x#{$1};"
|
||||||
end
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user