parse.y: limit codepoint length

* parse.y (parser_tokadd_codepoint): limit Unicode codepoint
  length.  too long codepoint has been split unexpectedly since
  r57050.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59417 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2017-07-25 08:30:11 +00:00
parent 8e2d0deb88
commit f29e5013ad
2 changed files with 10 additions and 9 deletions

17
parse.y
View File

@ -5661,20 +5661,20 @@ parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp,
int regexp_literal, int wide) int regexp_literal, int wide)
{ {
size_t numlen; size_t numlen;
int codepoint = scan_hex(lex_p, wide ? 6 : 4, &numlen); int codepoint = scan_hex(lex_p, wide ? lex_pend - lex_p : 4, &numlen);
literal_flush(lex_p); literal_flush(lex_p);
lex_p += numlen; lex_p += numlen;
if (wide ? (numlen == 0) : (numlen < 4)) { if (wide ? (numlen == 0 || numlen > 6) : (numlen < 4)) {
yyerror("invalid Unicode escape"); yyerror("invalid Unicode escape");
return FALSE; return wide && numlen > 0;
} }
if (codepoint > 0x10ffff) { if (codepoint > 0x10ffff) {
yyerror("invalid Unicode codepoint (too large)"); yyerror("invalid Unicode codepoint (too large)");
return FALSE; return wide;
} }
if ((codepoint & 0xfffff800) == 0xd800) { if ((codepoint & 0xfffff800) == 0xd800) {
yyerror("invalid Unicode codepoint"); yyerror("invalid Unicode codepoint");
return FALSE; return wide;
} }
if (regexp_literal) { if (regexp_literal) {
tokcopy((int)numlen); tokcopy((int)numlen);
@ -5687,7 +5687,7 @@ parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp,
char *mesg = alloca(len); char *mesg = alloca(len);
snprintf(mesg, len, mixed_utf8, rb_enc_name(*encp)); snprintf(mesg, len, mixed_utf8, rb_enc_name(*encp));
yyerror(mesg); yyerror(mesg);
return TRUE; return wide;
} }
*encp = utf8; *encp = utf8;
tokaddmbc(codepoint, *encp); tokaddmbc(codepoint, *encp);
@ -5718,7 +5718,7 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
int c, last = nextc(); int c, last = nextc();
if (lex_p >= lex_pend) goto unterminated; if (lex_p >= lex_pend) goto unterminated;
while (ISSPACE(c = *lex_p) && ++lex_p < lex_pend); while (ISSPACE(c = *lex_p) && ++lex_p < lex_pend);
while (!string_literal || c != close_brace) { do {
if (regexp_literal) tokadd(last); if (regexp_literal) tokadd(last);
if (!parser_tokadd_codepoint(parser, encp, regexp_literal, TRUE)) { if (!parser_tokadd_codepoint(parser, encp, regexp_literal, TRUE)) {
break; break;
@ -5727,8 +5727,7 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
if (++lex_p >= lex_pend) goto unterminated; if (++lex_p >= lex_pend) goto unterminated;
last = c; last = c;
} }
if (!string_literal) break; } while (c != close_brace);
}
if (c != close_brace) { if (c != close_brace) {
unterminated: unterminated:

View File

@ -516,6 +516,8 @@ class TestParse < Test::Unit::TestCase
src = '"\xD0\u{90'"\n""000000000000000000000000" src = '"\xD0\u{90'"\n""000000000000000000000000"
assert_syntax_error(src, /:#{__LINE__}: unterminated/o) assert_syntax_error(src, /:#{__LINE__}: unterminated/o)
assert_syntax_error('"\u{100000000}"', /invalid Unicode escape/)
assert_equal("\x81", eval('"\C-\M-a"')) assert_equal("\x81", eval('"\C-\M-a"'))
assert_equal("\177", eval('"\c?"')) assert_equal("\177", eval('"\c?"'))
end end