Support require 'cgi/escape' with extracting CGI::Escape from CGI::Util

This commit is contained in:
Hiroshi SHIBATA 2025-05-08 19:08:11 +09:00
parent c667683768
commit 8a1d45144b
Notes: git 2025-05-09 05:27:44 +00:00
6 changed files with 251 additions and 240 deletions

View File

@ -8,7 +8,7 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[];
#define upper_hexdigits (ruby_hexdigits+16)
#define char_to_number(c) ruby_digit36_to_number_table[(unsigned char)(c)]
static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
static VALUE rb_cCGI, rb_mEscape, rb_mEscapeExt;
static ID id_accept_charset;
#define HTML_ESCAPE_MAX_LEN 6
@ -471,17 +471,17 @@ Init_escape(void)
void
InitVM_escape(void)
{
rb_cCGI = rb_define_class("CGI", rb_cObject);
rb_mEscape = rb_define_module_under(rb_cCGI, "Escape");
rb_mUtil = rb_define_module_under(rb_cCGI, "Util");
rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1);
rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent");
rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent");
rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
rb_prepend_module(rb_mUtil, rb_mEscape);
rb_extend_object(rb_cCGI, rb_mEscape);
rb_cCGI = rb_define_class("CGI", rb_cObject);
rb_mEscapeExt = rb_define_module_under(rb_cCGI, "EscapeExt");
rb_mEscape = rb_define_module_under(rb_cCGI, "Escape");
rb_define_method(rb_mEscapeExt, "escapeHTML", cgiesc_escape_html, 1);
rb_define_method(rb_mEscapeExt, "unescapeHTML", cgiesc_unescape_html, 1);
rb_define_method(rb_mEscapeExt, "escapeURIComponent", cgiesc_escape_uri_component, 1);
rb_define_alias(rb_mEscapeExt, "escape_uri_component", "escapeURIComponent");
rb_define_method(rb_mEscapeExt, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
rb_define_alias(rb_mEscapeExt, "unescape_uri_component", "unescapeURIComponent");
rb_define_method(rb_mEscapeExt, "escape", cgiesc_escape, 1);
rb_define_method(rb_mEscapeExt, "unescape", cgiesc_unescape, -1);
rb_prepend_module(rb_mEscape, rb_mEscapeExt);
rb_extend_object(rb_cCGI, rb_mEscapeExt);
}

View File

@ -294,4 +294,5 @@ end
require 'cgi/core'
require 'cgi/cookie'
require 'cgi/util'
require 'cgi/escape'
CGI.autoload(:HtmlExtension, 'cgi/html')

View File

@ -4,12 +4,12 @@
# generating HTTP responses.
#++
class CGI
unless const_defined?(:Util)
module Util
unless const_defined?(:Escape)
module Escape
@@accept_charset = "UTF-8" # :nodoc:
end
include Util
extend Util
include Escape
extend Escape
end
$CGI_ENV = ENV # for FCGI support

224
lib/cgi/escape.rb Normal file
View File

@ -0,0 +1,224 @@
# frozen_string_literal: true
class CGI
module Escape; end
include Escape
extend Escape
end
module CGI::Escape
@@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)
# URL-encode a string into application/x-www-form-urlencoded.
# Space characters (+" "+) are encoded with plus signs (+"+"+)
# url_encoded_string = CGI.escape("'Stop!' said Fred")
# # => "%27Stop%21%27+said+Fred"
def escape(string)
encoding = string.encoding
buffer = string.b
buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end
buffer.tr!(' ', '+')
buffer.force_encoding(encoding)
end
# URL-decode an application/x-www-form-urlencoded string with encoding(optional).
# string = CGI.unescape("%27Stop%21%27+said+Fred")
# # => "'Stop!' said Fred"
def unescape(string, encoding = @@accept_charset)
str = string.tr('+', ' ')
str = str.b
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
[m.delete('%')].pack('H*')
end
str.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end
# URL-encode a string following RFC 3986
# Space characters (+" "+) are encoded with (+"%20"+)
# url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred")
# # => "%27Stop%21%27%20said%20Fred"
def escapeURIComponent(string)
encoding = string.encoding
buffer = string.b
buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end
buffer.force_encoding(encoding)
end
alias escape_uri_component escapeURIComponent
# URL-decode a string following RFC 3986 with encoding(optional).
# string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred")
# # => "'Stop!'+said Fred"
def unescapeURIComponent(string, encoding = @@accept_charset)
str = string.b
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
[m.delete('%')].pack('H*')
end
str.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end
alias unescape_uri_component unescapeURIComponent
# The set of special characters and their escaped values
TABLE_FOR_ESCAPE_HTML__ = {
"'" => ''',
'&' => '&',
'"' => '"',
'<' => '&lt;',
'>' => '&gt;',
}
# Escape special characters in HTML, namely '&\"<>
# CGI.escapeHTML('Usage: foo "bar" <baz>')
# # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
def escapeHTML(string)
enc = string.encoding
unless enc.ascii_compatible?
if enc.dummy?
origenc = enc
enc = Encoding::Converter.asciicompat_encoding(enc)
string = enc ? string.encode(enc) : string.b
end
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table)
string.encode!(origenc) if origenc
string
else
string = string.b
string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
string.force_encoding(enc)
end
end
# Unescape a string that has been HTML-escaped
# CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
# # => "Usage: foo \"bar\" <baz>"
def unescapeHTML(string)
enc = string.encoding
unless enc.ascii_compatible?
if enc.dummy?
origenc = enc
enc = Encoding::Converter.asciicompat_encoding(enc)
string = enc ? string.encode(enc) : string.b
end
string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
case $1.encode(Encoding::US_ASCII)
when 'apos' then "'".encode(enc)
when 'amp' then '&'.encode(enc)
when 'quot' then '"'.encode(enc)
when 'gt' then '>'.encode(enc)
when 'lt' then '<'.encode(enc)
when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
end
end
string.encode!(origenc) if origenc
return string
end
return string unless string.include? '&'
charlimit = case enc
when Encoding::UTF_8; 0x10ffff
when Encoding::ISO_8859_1; 256
else 128
end
string = string.b
string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
match = $1.dup
case match
when 'apos' then "'"
when 'amp' then '&'
when 'quot' then '"'
when 'gt' then '>'
when 'lt' then '<'
when /\A#0*(\d+)\z/
n = $1.to_i
if n < charlimit
n.chr(enc)
else
"&##{$1};"
end
when /\A#x([0-9a-f]+)\z/i
n = $1.hex
if n < charlimit
n.chr(enc)
else
"&#x#{$1};"
end
else
"&#{match};"
end
end
string.force_encoding enc
end
# Synonym for CGI.escapeHTML(str)
alias escape_html escapeHTML
alias h escapeHTML
# Synonym for CGI.unescapeHTML(str)
alias unescape_html unescapeHTML
# TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there
unless RUBY_ENGINE == 'truffleruby'
begin
require 'cgi/escape.so'
rescue LoadError
end
end
# Escape only the tags of certain HTML elements in +string+.
#
# Takes an element or elements or array of elements. Each element
# is specified by the name of the element, without angle brackets.
# This matches both the start and the end tag of that element.
# The attribute list of the open tag will also be escaped (for
# instance, the double-quotes surrounding attribute values).
#
# print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
# # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
#
# print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
# # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
def escapeElement(string, *elements)
elements = elements[0] if elements[0].kind_of?(Array)
unless elements.empty?
string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do
CGI.escapeHTML($&)
end
else
string
end
end
# Undo escaping such as that done by CGI.escapeElement()
#
# print CGI.unescapeElement(
# CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
# # "&lt;BR&gt;<A HREF="url"></A>"
#
# print CGI.unescapeElement(
# CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
# # "&lt;BR&gt;<A HREF="url"></A>"
def unescapeElement(string, *elements)
elements = elements[0] if elements[0].kind_of?(Array)
unless elements.empty?
string.gsub(/&lt;\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:&gt;)?/im) do
unescapeHTML($&)
end
else
string
end
end
# Synonym for CGI.escapeElement(str)
alias escape_element escapeElement
# Synonym for CGI.unescapeElement(str)
alias unescape_element unescapeElement
end

View File

@ -4,220 +4,8 @@ class CGI
include Util
extend Util
end
module CGI::Util
@@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)
# URL-encode a string into application/x-www-form-urlencoded.
# Space characters (+" "+) are encoded with plus signs (+"+"+)
# url_encoded_string = CGI.escape("'Stop!' said Fred")
# # => "%27Stop%21%27+said+Fred"
def escape(string)
encoding = string.encoding
buffer = string.b
buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end
buffer.tr!(' ', '+')
buffer.force_encoding(encoding)
end
# URL-decode an application/x-www-form-urlencoded string with encoding(optional).
# string = CGI.unescape("%27Stop%21%27+said+Fred")
# # => "'Stop!' said Fred"
def unescape(string, encoding = @@accept_charset)
str = string.tr('+', ' ')
str = str.b
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
[m.delete('%')].pack('H*')
end
str.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end
# URL-encode a string following RFC 3986
# Space characters (+" "+) are encoded with (+"%20"+)
# url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred")
# # => "%27Stop%21%27%20said%20Fred"
def escapeURIComponent(string)
encoding = string.encoding
buffer = string.b
buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end
buffer.force_encoding(encoding)
end
alias escape_uri_component escapeURIComponent
# URL-decode a string following RFC 3986 with encoding(optional).
# string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred")
# # => "'Stop!'+said Fred"
def unescapeURIComponent(string, encoding = @@accept_charset)
str = string.b
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
[m.delete('%')].pack('H*')
end
str.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end
alias unescape_uri_component unescapeURIComponent
# The set of special characters and their escaped values
TABLE_FOR_ESCAPE_HTML__ = {
"'" => '&#39;',
'&' => '&amp;',
'"' => '&quot;',
'<' => '&lt;',
'>' => '&gt;',
}
# Escape special characters in HTML, namely '&\"<>
# CGI.escapeHTML('Usage: foo "bar" <baz>')
# # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
def escapeHTML(string)
enc = string.encoding
unless enc.ascii_compatible?
if enc.dummy?
origenc = enc
enc = Encoding::Converter.asciicompat_encoding(enc)
string = enc ? string.encode(enc) : string.b
end
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table)
string.encode!(origenc) if origenc
string
else
string = string.b
string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
string.force_encoding(enc)
end
end
# TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there
unless RUBY_ENGINE == 'truffleruby'
begin
require 'cgi/escape'
rescue LoadError
end
end
# Unescape a string that has been HTML-escaped
# CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
# # => "Usage: foo \"bar\" <baz>"
def unescapeHTML(string)
enc = string.encoding
unless enc.ascii_compatible?
if enc.dummy?
origenc = enc
enc = Encoding::Converter.asciicompat_encoding(enc)
string = enc ? string.encode(enc) : string.b
end
string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
case $1.encode(Encoding::US_ASCII)
when 'apos' then "'".encode(enc)
when 'amp' then '&'.encode(enc)
when 'quot' then '"'.encode(enc)
when 'gt' then '>'.encode(enc)
when 'lt' then '<'.encode(enc)
when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
end
end
string.encode!(origenc) if origenc
return string
end
return string unless string.include? '&'
charlimit = case enc
when Encoding::UTF_8; 0x10ffff
when Encoding::ISO_8859_1; 256
else 128
end
string = string.b
string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
match = $1.dup
case match
when 'apos' then "'"
when 'amp' then '&'
when 'quot' then '"'
when 'gt' then '>'
when 'lt' then '<'
when /\A#0*(\d+)\z/
n = $1.to_i
if n < charlimit
n.chr(enc)
else
"&##{$1};"
end
when /\A#x([0-9a-f]+)\z/i
n = $1.hex
if n < charlimit
n.chr(enc)
else
"&#x#{$1};"
end
else
"&#{match};"
end
end
string.force_encoding enc
end
# Synonym for CGI.escapeHTML(str)
alias escape_html escapeHTML
# Synonym for CGI.unescapeHTML(str)
alias unescape_html unescapeHTML
# Escape only the tags of certain HTML elements in +string+.
#
# Takes an element or elements or array of elements. Each element
# is specified by the name of the element, without angle brackets.
# This matches both the start and the end tag of that element.
# The attribute list of the open tag will also be escaped (for
# instance, the double-quotes surrounding attribute values).
#
# print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
# # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
#
# print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
# # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
def escapeElement(string, *elements)
elements = elements[0] if elements[0].kind_of?(Array)
unless elements.empty?
string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do
CGI.escapeHTML($&)
end
else
string
end
end
# Undo escaping such as that done by CGI.escapeElement()
#
# print CGI.unescapeElement(
# CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
# # "&lt;BR&gt;<A HREF="url"></A>"
#
# print CGI.unescapeElement(
# CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
# # "&lt;BR&gt;<A HREF="url"></A>"
def unescapeElement(string, *elements)
elements = elements[0] if elements[0].kind_of?(Array)
unless elements.empty?
string.gsub(/&lt;\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:&gt;)?/im) do
unescapeHTML($&)
end
else
string
end
end
# Synonym for CGI.escapeElement(str)
alias escape_element escapeElement
# Synonym for CGI.unescapeElement(str)
alias unescape_element unescapeElement
# Format a +Time+ object as a String using the format specified by RFC 1123.
#
# CGI.rfc1123_date(Time.now)
@ -253,6 +41,4 @@ module CGI::Util
end
lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
end
alias h escapeHTML
end

View File

@ -6,7 +6,7 @@ require_relative 'update_env'
class CGIUtilTest < Test::Unit::TestCase
include CGI::Util
include CGI::Escape
include UpdateEnv
def setup
@ -63,7 +63,7 @@ class CGIUtilTest < Test::Unit::TestCase
return unless defined?(::Encoding)
assert_raise(TypeError) {CGI.unescape('', nil)}
assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}")
assert_separately(%w[-rcgi/escape], "#{<<-"begin;"}\n#{<<-"end;"}")
begin;
assert_equal("", CGI.unescape(''))
end;
@ -120,7 +120,7 @@ class CGIUtilTest < Test::Unit::TestCase
return unless defined?(::Encoding)
assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)}
assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}")
assert_separately(%w[-rcgi/escape], "#{<<-"begin;"}\n#{<<-"end;"}")
begin;
assert_equal("", CGI.unescapeURIComponent(''))
end;
@ -300,21 +300,21 @@ end
class CGIUtilPureRubyTest < Test::Unit::TestCase
def setup
CGI::Escape.module_eval do
CGI::EscapeExt.module_eval do
alias _escapeHTML escapeHTML
remove_method :escapeHTML
alias _unescapeHTML unescapeHTML
remove_method :unescapeHTML
end if defined?(CGI::Escape)
end if defined?(CGI::EscapeExt)
end
def teardown
CGI::Escape.module_eval do
CGI::EscapeExt.module_eval do
alias escapeHTML _escapeHTML
remove_method :_escapeHTML
alias unescapeHTML _unescapeHTML
remove_method :_unescapeHTML
end if defined?(CGI::Escape)
end if defined?(CGI::EscapeExt)
end
include CGIUtilTest::UnescapeHTMLTests