From a2797a454c94318c2e4448f61a72074220c7d880 Mon Sep 17 00:00:00 2001 From: nobu Date: Fri, 17 Oct 2014 06:06:43 +0000 Subject: [PATCH] re.c: mak eregexps with binary escapes ASCII-8BIT * re.c (unescape_nonascii): make dynamically compiled US-ASCII regexps ASCII-8BIT encoding if binary (hexadecimal, control, meta) escapes are contained, as well as literal regexps. [ruby-dev:48626] [Bug #10382] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@47992 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++++++ re.c | 11 +++++++++-- test/ruby/test_m17n.rb | 3 +++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8c5f936f66..5283c86d49 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Fri Oct 17 15:06:49 2014 Nobuyoshi Nakada + + * re.c (unescape_nonascii): make dynamically compiled US-ASCII + regexps ASCII-8BIT encoding if binary (hexadecimal, control, + meta) escapes are contained, as well as literal regexps. + [ruby-dev:48626] [Bug #10382] + Fri Oct 17 03:05:08 2014 Eric Wong * test/-ext-/bug_reporter/test_bug_reporter.rb diff --git a/re.c b/re.c index d110fe63ef..9258f83ee4 100644 --- a/re.c +++ b/re.c @@ -2284,8 +2284,15 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc, case 'C': /* \C-X, \C-\M-X */ case 'M': /* \M-X, \M-\C-X, \M-\cX */ p = p-2; - if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0) - return -1; + if (enc == rb_usascii_encoding()) { + c = read_escaped_byte(&p, end, err); + if (c == -1) return -1; + rb_str_buf_cat(buf, &c, 1); + } + else { + if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0) + return -1; + } break; case 'u': diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 7d00b1aec6..6537b070c5 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -480,6 +480,9 @@ class TestM17N < Test::Unit::TestCase assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/n}))) assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc2\xa1/}))) + s = '\xc2\xa1' + assert_regexp_fixed_ascii8bit(/#{s}/) + assert_raise(SyntaxError) { eval("/\xa1\xa1/n".force_encoding("euc-jp")) } [/\xc2\xa1/n, eval(a(%{/\xc2\xa1/})), eval(a(%{/\xc2\xa1/n}))].each {|r|