Optimize CGI.escapeHTML by reducing buffer extension
and switch-case branches. Buffer allocation optimization using `ALLOCA_N` would be the main benefit of patch. It eliminates the O(N) buffer extensions. It also reduces the number of branches using escape table like https://mattn.kaoriya.net/software/lang/c/20160817011915.htm. Closes: https://github.com/ruby/ruby/pull/2226 Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org> Co-authored-by: Yasuhiro MATSUMOTO <mattn.jp@gmail.com>
This commit is contained in:
parent
b31e1b4a7c
commit
8d81e59aa7
40
benchmark/cgi_escape_html.yml
Normal file
40
benchmark/cgi_escape_html.yml
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
prelude: require 'cgi/escape'
|
||||||
|
benchmark:
|
||||||
|
- name: escape_html_blank
|
||||||
|
prelude: str = ""
|
||||||
|
script: CGI.escapeHTML(str)
|
||||||
|
loop_count: 20000000
|
||||||
|
- name: escape_html_short_none
|
||||||
|
prelude: str = "abcde"
|
||||||
|
script: CGI.escapeHTML(str)
|
||||||
|
loop_count: 20000000
|
||||||
|
- name: escape_html_short_one
|
||||||
|
prelude: str = "abcd<"
|
||||||
|
script: CGI.escapeHTML(str)
|
||||||
|
loop_count: 20000000
|
||||||
|
- name: escape_html_short_all
|
||||||
|
prelude: str = "'&\"<>"
|
||||||
|
script: CGI.escapeHTML(str)
|
||||||
|
loop_count: 5000000
|
||||||
|
- name: escape_html_long_none
|
||||||
|
prelude: str = "abcde" * 300
|
||||||
|
script: CGI.escapeHTML(str)
|
||||||
|
loop_count: 1000000
|
||||||
|
- name: escape_html_long_all
|
||||||
|
prelude: str = "'&\"<>" * 10
|
||||||
|
script: CGI.escapeHTML(str)
|
||||||
|
loop_count: 1000000
|
||||||
|
- name: escape_html_real
|
||||||
|
prelude: | # http://example.com/
|
||||||
|
str = <<~HTML
|
||||||
|
<body>
|
||||||
|
<div>
|
||||||
|
<h1>Example Domain</h1>
|
||||||
|
<p>This domain is established to be used for illustrative examples in documents. You may use this
|
||||||
|
domain in examples without prior coordination or asking for permission.</p>
|
||||||
|
<p><a href="http://www.iana.org/domains/example">More information...</a></p>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
HTML
|
||||||
|
script: CGI.escapeHTML(str)
|
||||||
|
loop_count: 1000000
|
@ -11,27 +11,20 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[];
|
|||||||
static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
|
static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
|
||||||
static ID id_accept_charset;
|
static ID id_accept_charset;
|
||||||
|
|
||||||
static void
|
#define HTML_ESCAPE_MAX_LEN 6
|
||||||
html_escaped_cat(VALUE str, char c)
|
|
||||||
{
|
static const struct {
|
||||||
switch (c) {
|
uint8_t len;
|
||||||
case '\'':
|
char str[HTML_ESCAPE_MAX_LEN+1];
|
||||||
rb_str_cat_cstr(str, "'");
|
} html_escape_table[UCHAR_MAX+1] = {
|
||||||
break;
|
#define HTML_ESCAPE(c, str) [c] = {rb_strlen_lit(str), str}
|
||||||
case '&':
|
HTML_ESCAPE('\'', "'"),
|
||||||
rb_str_cat_cstr(str, "&");
|
HTML_ESCAPE('&', "&"),
|
||||||
break;
|
HTML_ESCAPE('"', """),
|
||||||
case '"':
|
HTML_ESCAPE('<', "<"),
|
||||||
rb_str_cat_cstr(str, """);
|
HTML_ESCAPE('>', ">"),
|
||||||
break;
|
#undef HTML_ESCAPE
|
||||||
case '<':
|
};
|
||||||
rb_str_cat_cstr(str, "<");
|
|
||||||
break;
|
|
||||||
case '>':
|
|
||||||
rb_str_cat_cstr(str, ">");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
preserve_original_state(VALUE orig, VALUE dest)
|
preserve_original_state(VALUE orig, VALUE dest)
|
||||||
@ -44,36 +37,27 @@ preserve_original_state(VALUE orig, VALUE dest)
|
|||||||
static VALUE
|
static VALUE
|
||||||
optimized_escape_html(VALUE str)
|
optimized_escape_html(VALUE str)
|
||||||
{
|
{
|
||||||
long i, len, beg = 0;
|
const char *cstr = RSTRING_PTR(str);
|
||||||
VALUE dest = 0;
|
const char *end = cstr + RSTRING_LEN(str);
|
||||||
const char *cstr;
|
char *buf = ALLOCA_N(char, RSTRING_LEN(str) * HTML_ESCAPE_MAX_LEN);
|
||||||
|
|
||||||
len = RSTRING_LEN(str);
|
char *dest = buf;
|
||||||
cstr = RSTRING_PTR(str);
|
while (cstr < end) {
|
||||||
|
const unsigned char c = *cstr++;
|
||||||
for (i = 0; i < len; i++) {
|
uint8_t len = html_escape_table[c].len;
|
||||||
switch (cstr[i]) {
|
if (len) {
|
||||||
case '\'':
|
memcpy(dest, html_escape_table[c].str, len);
|
||||||
case '&':
|
dest += len;
|
||||||
case '"':
|
}
|
||||||
case '<':
|
else {
|
||||||
case '>':
|
*dest++ = c;
|
||||||
if (!dest) {
|
}
|
||||||
dest = rb_str_buf_new(len);
|
|
||||||
}
|
|
||||||
|
|
||||||
rb_str_cat(dest, cstr + beg, i - beg);
|
|
||||||
beg = i + 1;
|
|
||||||
|
|
||||||
html_escaped_cat(dest, cstr[i]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dest) {
|
if (RSTRING_LEN(str) < (dest - buf)) {
|
||||||
rb_str_cat(dest, cstr + beg, len - beg);
|
VALUE escaped = rb_str_new(buf, dest - buf);
|
||||||
preserve_original_state(str, dest);
|
preserve_original_state(str, escaped);
|
||||||
return dest;
|
return escaped;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return rb_str_dup(str);
|
return rb_str_dup(str);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user