fix escapes in undump
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@61379 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
7176eb2df9
commit
29c6ca423c
75
string.c
75
string.c
@ -6163,21 +6163,19 @@ unescape_ascii(unsigned int c)
|
||||
static long
|
||||
undump_after_backslash(VALUE undumped, const char *s, const char *s_end, rb_encoding **penc)
|
||||
{
|
||||
unsigned int c, c2;
|
||||
long n;
|
||||
const char *s0 = s;
|
||||
unsigned int c;
|
||||
int codelen;
|
||||
size_t hexlen;
|
||||
char buf[6];
|
||||
static rb_encoding *enc_utf8 = NULL;
|
||||
|
||||
c = rb_enc_codepoint_len(s, s_end, &codelen, *penc);
|
||||
n = codelen;
|
||||
switch (c) {
|
||||
switch (*s) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case '#':
|
||||
rb_str_cat(undumped, s, n); /* cat itself */
|
||||
n++;
|
||||
rb_str_cat(undumped, s, 1); /* cat itself */
|
||||
s++;
|
||||
break;
|
||||
case 'n':
|
||||
case 'r':
|
||||
@ -6187,77 +6185,78 @@ undump_after_backslash(VALUE undumped, const char *s, const char *s_end, rb_enco
|
||||
case 'b':
|
||||
case 'a':
|
||||
case 'e':
|
||||
*buf = (char)unescape_ascii(c);
|
||||
rb_str_cat(undumped, buf, n);
|
||||
n++;
|
||||
*buf = (char)unescape_ascii(*s);
|
||||
rb_str_cat(undumped, buf, 1);
|
||||
s++;
|
||||
break;
|
||||
case 'u':
|
||||
if (s+1 >= s_end) {
|
||||
if (++s >= s_end) {
|
||||
rb_raise(rb_eRuntimeError, "invalid Unicode escape");
|
||||
}
|
||||
if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
|
||||
if (*penc != enc_utf8) {
|
||||
*penc = enc_utf8;
|
||||
rb_enc_associate(undumped, enc_utf8);
|
||||
ENC_CODERANGE_CLEAR(undumped);
|
||||
}
|
||||
c2 = rb_enc_codepoint_len(s+1, s_end, NULL, *penc);
|
||||
if (c2 == '{') { /* handle \u{...} form */
|
||||
const char *hexstr = s + 2;
|
||||
int hex;
|
||||
|
||||
while ((hex = rb_enc_ascget(hexstr, s_end, &codelen, *penc)) != '}') {
|
||||
if (hex == -1) {
|
||||
if (*s == '{') { /* handle \u{...} form */
|
||||
s++;
|
||||
for (;;) {
|
||||
if (s >= s_end) {
|
||||
rb_raise(rb_eRuntimeError, "unterminated Unicode escape");
|
||||
}
|
||||
if (ISSPACE(hex)) {
|
||||
hexstr += codelen;
|
||||
if (*s == '}') {
|
||||
s++;
|
||||
break;
|
||||
}
|
||||
if (ISSPACE(*s)) {
|
||||
s++;
|
||||
continue;
|
||||
}
|
||||
hex = scan_hex(hexstr, s_end-hexstr, &hexlen);
|
||||
c = scan_hex(s, s_end-s, &hexlen);
|
||||
if (hexlen == 0 || hexlen > 6) {
|
||||
rb_raise(rb_eRuntimeError, "invalid Unicode escape");
|
||||
}
|
||||
if (hex > 0x10ffff) {
|
||||
if (c > 0x10ffff) {
|
||||
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint (too large)");
|
||||
}
|
||||
if ((hex & 0xfffff800) == 0xd800) {
|
||||
if (0xd800 <= c && c <= 0xdfff) {
|
||||
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
|
||||
}
|
||||
codelen = rb_enc_mbcput(hex, buf, *penc);
|
||||
codelen = rb_enc_mbcput(c, buf, *penc);
|
||||
rb_str_cat(undumped, buf, codelen);
|
||||
hexstr += hexlen;
|
||||
s += hexlen;
|
||||
}
|
||||
n += hexstr - s + 1;
|
||||
}
|
||||
else { /* handle \uXXXX form */
|
||||
int hex = scan_hex(s+1, 4, &hexlen);
|
||||
c = scan_hex(s, 4, &hexlen);
|
||||
if (hexlen != 4) {
|
||||
rb_raise(rb_eRuntimeError, "invalid Unicode escape");
|
||||
}
|
||||
codelen = rb_enc_codelen(hex, *penc);
|
||||
rb_enc_mbcput(hex, buf, *penc);
|
||||
if (0xd800 <= c && c <= 0xdfff) {
|
||||
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
|
||||
}
|
||||
codelen = rb_enc_mbcput(c, buf, *penc);
|
||||
rb_str_cat(undumped, buf, codelen);
|
||||
n += rb_strlen_lit("uXXXX");
|
||||
s += hexlen;
|
||||
}
|
||||
break;
|
||||
case 'x':
|
||||
if (s+1 >= s_end) {
|
||||
if (++s >= s_end) {
|
||||
rb_raise(rb_eRuntimeError, "invalid hex escape");
|
||||
}
|
||||
c2 = scan_hex(s+1, 2, &hexlen);
|
||||
*buf = scan_hex(s, 2, &hexlen);
|
||||
if (hexlen != 2) {
|
||||
rb_raise(rb_eRuntimeError, "invalid hex escape");
|
||||
}
|
||||
*buf = (char)c2;
|
||||
rb_str_cat(undumped, buf, 1L);
|
||||
n += rb_strlen_lit("xXX");
|
||||
rb_str_cat(undumped, buf, 1);
|
||||
s += hexlen;
|
||||
break;
|
||||
default:
|
||||
rb_str_cat(undumped, "\\", 1L); /* keep backslash */
|
||||
rb_str_cat(undumped, s-1, 2);
|
||||
s++;
|
||||
}
|
||||
|
||||
return n;
|
||||
return s - s0 + 1;
|
||||
}
|
||||
|
||||
static VALUE rb_str_is_ascii_only_p(VALUE str);
|
||||
|
@ -756,13 +756,18 @@ CODE
|
||||
def test_undump
|
||||
a = S("Test") << 1 << 2 << 3 << 9 << 13 << 10
|
||||
assert_equal(a, S('"Test\\x01\\x02\\x03\\t\\r\\n"').undump)
|
||||
assert_equal(S("\\ca"), S('"\\ca"').undump)
|
||||
assert_equal(S("\u{7F}"), S('"\\x7F"').undump)
|
||||
assert_equal(S("\u{7F}A"), S('"\\x7FA"').undump)
|
||||
assert_equal(S("\u{AB}"), S('"\\u00AB"').undump)
|
||||
assert_equal(S("\u{ABC}"), S('"\\u0ABC"').undump)
|
||||
assert_equal(S("\uABCD"), S('"\\uABCD"').undump)
|
||||
assert_equal(S("\uABCD"), S('"\\uABCD"').undump)
|
||||
assert_equal(S("\u{ABCDE}"), S('"\\u{ABCDE}"').undump)
|
||||
assert_equal(S("\u{10ABCD}"), S('"\\u{10ABCD}"').undump)
|
||||
assert_equal(S("\u{ABCDE 10ABCD}"), S('"\\u{ABCDE 10ABCD}"').undump)
|
||||
assert_equal(S(""), S('"\\u{}"').undump)
|
||||
assert_equal(S(""), S('"\\u{ }"').undump)
|
||||
|
||||
assert_equal(S("äöü"), S('"\u00E4\u00F6\u00FC"').undump)
|
||||
assert_equal(S("äöü"), S('"\xC3\xA4\xC3\xB6\xC3\xBC"').undump)
|
||||
@ -783,8 +788,16 @@ CODE
|
||||
|
||||
assert_raise(RuntimeError) { S('"\u"').undump }
|
||||
assert_raise(RuntimeError) { S('"\u{"').undump }
|
||||
assert_raise(RuntimeError) { S('"\u304"').undump }
|
||||
assert_raise(RuntimeError) { S('"\u304Z"').undump }
|
||||
assert_raise(RuntimeError) { S('"\udfff"').undump }
|
||||
assert_raise(RuntimeError) { S('"\u{dfff}"').undump }
|
||||
assert_raise(RuntimeError) { S('"\u{3042"').undump }
|
||||
assert_raise(RuntimeError) { S('"\u{3042 "').undump }
|
||||
assert_raise(RuntimeError) { S('"\u{110000}"').undump }
|
||||
assert_raise(RuntimeError) { S('"\u{1234567}"').undump }
|
||||
assert_raise(RuntimeError) { S('"\x"').undump }
|
||||
assert_raise(RuntimeError) { S('"\xA"').undump }
|
||||
assert_raise(RuntimeError) { S('"\\"').undump }
|
||||
assert_raise(RuntimeError) { S(%("\0")).undump }
|
||||
end
|
||||
|
Loading…
x
Reference in New Issue
Block a user