string.c: multiple codepoints
* string.c (undump_after_backslash): fix multiple codepoints in braces. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@61290 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
ae18c8f5b6
commit
7c18db61a1
59
string.c
59
string.c
@ -6160,16 +6160,18 @@ unescape_ascii(unsigned int c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static long
|
||||||
undump_after_backslash(VALUE undumped, const char *s, const char *s_end, rb_encoding **penc)
|
undump_after_backslash(VALUE undumped, const char *s, const char *s_end, rb_encoding **penc)
|
||||||
{
|
{
|
||||||
unsigned int c, c2;
|
unsigned int c, c2;
|
||||||
int n, codelen;
|
long n;
|
||||||
|
int codelen;
|
||||||
size_t hexlen;
|
size_t hexlen;
|
||||||
char buf[6];
|
char buf[6];
|
||||||
static rb_encoding *enc_utf8 = NULL;
|
static rb_encoding *enc_utf8 = NULL;
|
||||||
|
|
||||||
c = rb_enc_codepoint_len(s, s_end, &n, *penc);
|
c = rb_enc_codepoint_len(s, s_end, &codelen, *penc);
|
||||||
|
n = codelen;
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '\\':
|
case '\\':
|
||||||
case '"':
|
case '"':
|
||||||
@ -6203,31 +6205,30 @@ undump_after_backslash(VALUE undumped, const char *s, const char *s_end, rb_enco
|
|||||||
if (c2 == '{') { /* handle \u{...} form */
|
if (c2 == '{') { /* handle \u{...} form */
|
||||||
const char *hexstr = s + 2;
|
const char *hexstr = s + 2;
|
||||||
int hex;
|
int hex;
|
||||||
static const char* const close_brace = "}";
|
|
||||||
long pos;
|
|
||||||
|
|
||||||
if (hexstr >= s_end) {
|
while ((hex = rb_enc_ascget(hexstr, s_end, &codelen, *penc)) != '}') {
|
||||||
rb_raise(rb_eRuntimeError, "unterminated Unicode escape");
|
if (hex == -1) {
|
||||||
|
rb_raise(rb_eRuntimeError, "unterminated Unicode escape");
|
||||||
|
}
|
||||||
|
if (ISSPACE(hex)) {
|
||||||
|
hexstr += codelen;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
hex = scan_hex(hexstr, s_end-hexstr, &hexlen);
|
||||||
|
if (hexlen == 0 || hexlen > 6) {
|
||||||
|
rb_raise(rb_eRuntimeError, "invalid Unicode escape");
|
||||||
|
}
|
||||||
|
if (hex > 0x10ffff) {
|
||||||
|
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint (too large)");
|
||||||
|
}
|
||||||
|
if ((hex & 0xfffff800) == 0xd800) {
|
||||||
|
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
|
||||||
|
}
|
||||||
|
codelen = rb_enc_mbcput(hex, buf, *penc);
|
||||||
|
rb_str_cat(undumped, buf, codelen);
|
||||||
|
hexstr += hexlen;
|
||||||
}
|
}
|
||||||
/* find close brace */
|
n += hexstr - s + 1;
|
||||||
pos = strseq_core(hexstr, s_end, s_end - hexstr, close_brace, 1, 0, *penc);
|
|
||||||
if (pos < 0) {
|
|
||||||
rb_raise(rb_eRuntimeError, "unterminated Unicode escape");
|
|
||||||
}
|
|
||||||
hex = scan_hex(hexstr, pos, &hexlen);
|
|
||||||
if (hexlen == 0 || hexlen > 6) {
|
|
||||||
rb_raise(rb_eRuntimeError, "invalid Unicode escape");
|
|
||||||
}
|
|
||||||
if (hex > 0x10ffff) {
|
|
||||||
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint (too large)");
|
|
||||||
}
|
|
||||||
if ((hex & 0xfffff800) == 0xd800) {
|
|
||||||
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
|
|
||||||
}
|
|
||||||
codelen = rb_enc_codelen(hex, *penc);
|
|
||||||
rb_enc_mbcput(hex, buf, *penc);
|
|
||||||
rb_str_cat(undumped, buf, codelen);
|
|
||||||
n += rb_strlen_lit("u{}") + hexlen;
|
|
||||||
}
|
}
|
||||||
else { /* handle \uXXXX form */
|
else { /* handle \uXXXX form */
|
||||||
int hex = scan_hex(s+1, 4, &hexlen);
|
int hex = scan_hex(s+1, 4, &hexlen);
|
||||||
@ -6276,9 +6277,8 @@ str_undump(VALUE str)
|
|||||||
{
|
{
|
||||||
const char *s = RSTRING_PTR(str);
|
const char *s = RSTRING_PTR(str);
|
||||||
const char *s_end = RSTRING_END(str);
|
const char *s_end = RSTRING_END(str);
|
||||||
long len = RSTRING_LEN(str);
|
long len = RSTRING_LEN(str), n;
|
||||||
rb_encoding *enc = rb_enc_get(str), *forced_enc = NULL;
|
rb_encoding *enc = rb_enc_get(str), *forced_enc = NULL;
|
||||||
int n;
|
|
||||||
unsigned int c;
|
unsigned int c;
|
||||||
enum undump_source_format source_format;
|
enum undump_source_format source_format;
|
||||||
VALUE undumped = rb_enc_str_new(s, 0L, enc);
|
VALUE undumped = rb_enc_str_new(s, 0L, enc);
|
||||||
@ -6317,7 +6317,7 @@ str_undump(VALUE str)
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (; s < s_end; s += n) {
|
for (; s < s_end; s += n) {
|
||||||
c = rb_enc_codepoint_len(s, s_end, &n, enc);
|
c = rb_enc_codepoint_len(s, s_end, &w, enc);
|
||||||
if (c == '\\') {
|
if (c == '\\') {
|
||||||
if (s+1 >= s_end) {
|
if (s+1 >= s_end) {
|
||||||
rb_raise(rb_eRuntimeError, "invalid escape");
|
rb_raise(rb_eRuntimeError, "invalid escape");
|
||||||
@ -6328,6 +6328,7 @@ str_undump(VALUE str)
|
|||||||
rb_raise(rb_eRuntimeError, "non-escaped double quote detected");
|
rb_raise(rb_eRuntimeError, "non-escaped double quote detected");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
n = w;
|
||||||
rb_str_cat(undumped, s, n);
|
rb_str_cat(undumped, s, n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -762,6 +762,7 @@ CODE
|
|||||||
assert_equal(S("\uABCD"), S('"\\uABCD"').undump)
|
assert_equal(S("\uABCD"), S('"\\uABCD"').undump)
|
||||||
assert_equal(S("\u{ABCDE}"), S('"\\u{ABCDE}"').undump)
|
assert_equal(S("\u{ABCDE}"), S('"\\u{ABCDE}"').undump)
|
||||||
assert_equal(S("\u{10ABCD}"), S('"\\u{10ABCD}"').undump)
|
assert_equal(S("\u{10ABCD}"), S('"\\u{10ABCD}"').undump)
|
||||||
|
assert_equal(S("\u{ABCDE 10ABCD}"), S('"\\u{ABCDE 10ABCD}"').undump)
|
||||||
|
|
||||||
assert_equal(S("äöü"), S('"\u00E4\u00F6\u00FC"').undump)
|
assert_equal(S("äöü"), S('"\u00E4\u00F6\u00FC"').undump)
|
||||||
assert_equal(S("äöü"), S('"\xC3\xA4\xC3\xB6\xC3\xBC"').undump)
|
assert_equal(S("äöü"), S('"\xC3\xA4\xC3\xB6\xC3\xBC"').undump)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user