Reuse Regexp ptr when recompiling
When matching an incompatible encoding, the Regexp needs to recompile. If `usecnt == 0`, then we can reuse the `ptr` because nothing else is using it. This avoids allocating another `regex_t`. This speeds up matches that switch to incompatible encodings by 15%. Branch: ``` Regex#match? with different encoding 1.431M (± 1.3%) i/s - 7.264M in 5.076153s Regex#match? with same encoding 16.858M (± 1.1%) i/s - 85.347M in 5.063279s ``` Base: ``` Regex#match? with different encoding 1.248M (± 2.0%) i/s - 6.342M in 5.083151s Regex#match? with same encoding 16.377M (± 1.1%) i/s - 82.519M in 5.039504s ``` Script: ``` regex = /foo/ str1 = "日本語" str2 = "English".force_encoding("ASCII-8BIT") Benchmark.ips do |x| x.report("Regex#match? with different encoding") do |times| i = 0 while i < times regex.match?(str1) regex.match?(str2) i += 1 end end x.report("Regex#match? with same encoding") do |times| i = 0 while i < times regex.match?(str1) i += 1 end end end ```
This commit is contained in:
parent
a542512b7c
commit
d42b9ffb20
35
re.c
35
re.c
@ -1606,9 +1606,30 @@ rb_reg_prepare_re(VALUE re, VALUE str)
|
|||||||
const char *ptr;
|
const char *ptr;
|
||||||
long len;
|
long len;
|
||||||
RSTRING_GETMEM(unescaped, ptr, len);
|
RSTRING_GETMEM(unescaped, ptr, len);
|
||||||
r = onig_new(®, (UChar *)ptr, (UChar *)(ptr + len),
|
|
||||||
reg->options, enc,
|
/* If there are no other users of this regex, then we can directly overwrite it. */
|
||||||
OnigDefaultSyntax, &einfo);
|
if (RREGEXP(re)->usecnt == 0) {
|
||||||
|
regex_t tmp_reg;
|
||||||
|
r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr + len),
|
||||||
|
reg->options, enc,
|
||||||
|
OnigDefaultSyntax, &einfo);
|
||||||
|
|
||||||
|
if (r) {
|
||||||
|
/* There was an error so perform cleanups. */
|
||||||
|
onig_free_body(&tmp_reg);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
onig_free_body(reg);
|
||||||
|
/* There are no errors so set reg to tmp_reg. */
|
||||||
|
*reg = tmp_reg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
r = onig_new(®, (UChar *)ptr, (UChar *)(ptr + len),
|
||||||
|
reg->options, enc,
|
||||||
|
OnigDefaultSyntax, &einfo);
|
||||||
|
}
|
||||||
|
|
||||||
if (r) {
|
if (r) {
|
||||||
onig_error_code_to_str((UChar*)err, r, &einfo);
|
onig_error_code_to_str((UChar*)err, r, &einfo);
|
||||||
rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
|
rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
|
||||||
@ -1634,13 +1655,7 @@ rb_reg_onig_match(VALUE re, VALUE str,
|
|||||||
|
|
||||||
if (!tmpreg) RREGEXP(re)->usecnt--;
|
if (!tmpreg) RREGEXP(re)->usecnt--;
|
||||||
if (tmpreg) {
|
if (tmpreg) {
|
||||||
if (RREGEXP(re)->usecnt) {
|
onig_free(reg);
|
||||||
onig_free(reg);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
onig_free(RREGEXP_PTR(re));
|
|
||||||
RREGEXP_PTR(re) = reg;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result < 0) {
|
if (result < 0) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user