[Bug #20280] Raise SyntaxError on invalid encoding symbol
This commit is contained in:
parent
a52fc5df7a
commit
fcc55dc226
@ -46,6 +46,7 @@ int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc);
|
|||||||
int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code);
|
int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code);
|
||||||
VALUE rb_str_include(VALUE str, VALUE arg);
|
VALUE rb_str_include(VALUE str, VALUE arg);
|
||||||
VALUE rb_str_byte_substr(VALUE str, VALUE beg, VALUE len);
|
VALUE rb_str_byte_substr(VALUE str, VALUE beg, VALUE len);
|
||||||
|
VALUE rb_str_valid_encoding_p(VALUE str);
|
||||||
|
|
||||||
static inline bool STR_EMBED_P(VALUE str);
|
static inline bool STR_EMBED_P(VALUE str);
|
||||||
static inline bool STR_SHARED_P(VALUE str);
|
static inline bool STR_SHARED_P(VALUE str);
|
||||||
|
51
parse.y
51
parse.y
@ -13727,6 +13727,17 @@ new_defined(struct parser_params *p, NODE *expr, const YYLTYPE *loc)
|
|||||||
return NEW_DEFINED(n, loc);
|
return NEW_DEFINED(n, loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static NODE*
|
||||||
|
str_to_sym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc)
|
||||||
|
{
|
||||||
|
VALUE lit = rb_node_str_string_val(node);
|
||||||
|
if (!rb_str_valid_encoding_p(lit)) {
|
||||||
|
yyerror1(loc, "invalid symbol");
|
||||||
|
lit = STR_NEW0();
|
||||||
|
}
|
||||||
|
return NEW_SYM(lit, loc);
|
||||||
|
}
|
||||||
|
|
||||||
static NODE*
|
static NODE*
|
||||||
symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
|
symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
|
||||||
{
|
{
|
||||||
@ -13736,7 +13747,7 @@ symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
|
|||||||
nd_set_type(symbol, NODE_DSYM);
|
nd_set_type(symbol, NODE_DSYM);
|
||||||
break;
|
break;
|
||||||
case NODE_STR:
|
case NODE_STR:
|
||||||
symbol = NEW_SYM(rb_node_str_string_val(symbol), &RNODE(symbol)->nd_loc);
|
symbol = str_to_sym_node(p, symbol, &RNODE(symbol)->nd_loc);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
compile_error(p, "unexpected node as symbol: %s", parser_node_name(type));
|
compile_error(p, "unexpected node as symbol: %s", parser_node_name(type));
|
||||||
@ -15465,8 +15476,6 @@ new_hash_pattern_tail(struct parser_params *p, NODE *kw_args, ID kw_rest_arg, co
|
|||||||
static NODE*
|
static NODE*
|
||||||
dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc)
|
dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc)
|
||||||
{
|
{
|
||||||
VALUE lit;
|
|
||||||
|
|
||||||
if (!node) {
|
if (!node) {
|
||||||
return NEW_SYM(STR_NEW0(), loc);
|
return NEW_SYM(STR_NEW0(), loc);
|
||||||
}
|
}
|
||||||
@ -15477,8 +15486,7 @@ dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc)
|
|||||||
nd_set_loc(node, loc);
|
nd_set_loc(node, loc);
|
||||||
break;
|
break;
|
||||||
case NODE_STR:
|
case NODE_STR:
|
||||||
lit = rb_node_str_string_val(node);
|
node = str_to_sym_node(p, node, loc);
|
||||||
node = NEW_SYM(lit, loc);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
node = NEW_DSYM(0, 1, NEW_LIST(node, loc), loc);
|
node = NEW_DSYM(0, 1, NEW_LIST(node, loc), loc);
|
||||||
@ -15559,23 +15567,6 @@ nd_value(struct parser_params *p, NODE *node)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
warn_duplicate_keys_check_key(struct parser_params *p, st_data_t key, st_table *literal_keys)
|
|
||||||
{
|
|
||||||
if (OBJ_BUILTIN_TYPE(key) == T_NODE && nd_type(key) == NODE_SYM) {
|
|
||||||
rb_parser_string_t *parser_str = RNODE_SYM(key)->string;
|
|
||||||
struct RString fake_str;
|
|
||||||
VALUE str = rb_setup_fake_str(&fake_str, parser_str->ptr, parser_str->len, parser_str->enc);
|
|
||||||
if (rb_enc_asciicompat(parser_str->enc) && rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
|
|
||||||
st_free_table(literal_keys);
|
|
||||||
/* Since we have a ASCII compatible encoding and the coderange is
|
|
||||||
* broken, sym_check_asciionly should raise an EncodingError. */
|
|
||||||
rb_check_id_cstr(parser_str->ptr, parser_str->len, parser_str->enc);
|
|
||||||
rb_bug("unreachable");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
warn_duplicate_keys(struct parser_params *p, NODE *hash)
|
warn_duplicate_keys(struct parser_params *p, NODE *hash)
|
||||||
{
|
{
|
||||||
@ -15594,18 +15585,12 @@ warn_duplicate_keys(struct parser_params *p, NODE *hash)
|
|||||||
if (!head) {
|
if (!head) {
|
||||||
key = (st_data_t)value;
|
key = (st_data_t)value;
|
||||||
}
|
}
|
||||||
else if (nd_type_st_key_enable_p(head)) {
|
else if (nd_type_st_key_enable_p(head) &&
|
||||||
warn_duplicate_keys_check_key(p, (st_data_t)head, literal_keys);
|
st_delete(literal_keys, (key = (st_data_t)nd_st_key(p, head), &key), &data)) {
|
||||||
|
rb_compile_warn(p->ruby_sourcefile, nd_line((NODE *)data),
|
||||||
key = (st_data_t)nd_st_key(p, head);
|
"key %+"PRIsVALUE" is duplicated and overwritten on line %d",
|
||||||
if (st_delete(literal_keys, &key, &data)) {
|
nd_value(p, head), nd_line(head));
|
||||||
rb_compile_warn(p->ruby_sourcefile, nd_line((NODE *)data),
|
|
||||||
"key %+"PRIsVALUE" is duplicated and overwritten on line %d",
|
|
||||||
nd_value(p, head), nd_line(head));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
warn_duplicate_keys_check_key(p, key, literal_keys);
|
|
||||||
st_insert(literal_keys, (st_data_t)key, (st_data_t)hash);
|
st_insert(literal_keys, (st_data_t)key, (st_data_t)hash);
|
||||||
hash = next;
|
hash = next;
|
||||||
}
|
}
|
||||||
|
@ -292,18 +292,6 @@ enc_symname_type(const char *name, long len, void *enc, unsigned int allowed_att
|
|||||||
return rb_enc_symname_type(name, len, (rb_encoding *)enc, allowed_attrset);
|
return rb_enc_symname_type(name, len, (rb_encoding *)enc, allowed_attrset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ID
|
|
||||||
check_id_cstr(const char *ptr, long len, void *enc)
|
|
||||||
{
|
|
||||||
return rb_check_id_cstr(ptr, len, (rb_encoding *)enc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE
|
|
||||||
setup_fake_str(struct RString *fake_str, const char *name, long len, void *enc)
|
|
||||||
{
|
|
||||||
return rb_setup_fake_str(fake_str, name, len, (rb_encoding *)enc);
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
struct parser_params *parser;
|
struct parser_params *parser;
|
||||||
rb_encoding *enc;
|
rb_encoding *enc;
|
||||||
@ -563,7 +551,7 @@ static const rb_parser_config_t rb_global_parser_config = {
|
|||||||
.id2str = rb_id2str,
|
.id2str = rb_id2str,
|
||||||
.id2sym = rb_id2sym,
|
.id2sym = rb_id2sym,
|
||||||
.sym2id = rb_sym2id,
|
.sym2id = rb_sym2id,
|
||||||
.check_id_cstr = check_id_cstr,
|
.str_valid_encoding_p = rb_str_valid_encoding_p,
|
||||||
|
|
||||||
.str_catf = rb_str_catf,
|
.str_catf = rb_str_catf,
|
||||||
.str_cat_cstr = rb_str_cat_cstr,
|
.str_cat_cstr = rb_str_cat_cstr,
|
||||||
@ -578,12 +566,10 @@ static const rb_parser_config_t rb_global_parser_config = {
|
|||||||
.str_resize = rb_str_resize,
|
.str_resize = rb_str_resize,
|
||||||
.str_new = rb_str_new,
|
.str_new = rb_str_new,
|
||||||
.str_new_cstr = rb_str_new_cstr,
|
.str_new_cstr = rb_str_new_cstr,
|
||||||
.setup_fake_str = setup_fake_str,
|
|
||||||
.fstring = rb_fstring,
|
.fstring = rb_fstring,
|
||||||
.is_ascii_string = is_ascii_string2,
|
.is_ascii_string = is_ascii_string2,
|
||||||
.enc_str_new = enc_str_new,
|
.enc_str_new = enc_str_new,
|
||||||
.enc_str_buf_cat = enc_str_buf_cat,
|
.enc_str_buf_cat = enc_str_buf_cat,
|
||||||
.enc_str_coderange = rb_enc_str_coderange,
|
|
||||||
.str_buf_append = rb_str_buf_append,
|
.str_buf_append = rb_str_buf_append,
|
||||||
.str_vcatf = rb_str_vcatf,
|
.str_vcatf = rb_str_vcatf,
|
||||||
.string_value_cstr = rb_string_value_cstr,
|
.string_value_cstr = rb_string_value_cstr,
|
||||||
@ -643,7 +629,6 @@ static const rb_parser_config_t rb_global_parser_config = {
|
|||||||
.encoding_set = encoding_set,
|
.encoding_set = encoding_set,
|
||||||
.encoding_is_ascii8bit = encoding_is_ascii8bit,
|
.encoding_is_ascii8bit = encoding_is_ascii8bit,
|
||||||
.usascii_encoding = usascii_encoding,
|
.usascii_encoding = usascii_encoding,
|
||||||
.enc_coderange_broken = ENC_CODERANGE_BROKEN,
|
|
||||||
|
|
||||||
.ractor_make_shareable = rb_ractor_make_shareable,
|
.ractor_make_shareable = rb_ractor_make_shareable,
|
||||||
|
|
||||||
|
@ -1277,7 +1277,7 @@ typedef struct rb_parser_config_struct {
|
|||||||
VALUE (*id2str)(ID id);
|
VALUE (*id2str)(ID id);
|
||||||
VALUE (*id2sym)(ID x);
|
VALUE (*id2sym)(ID x);
|
||||||
ID (*sym2id)(VALUE sym);
|
ID (*sym2id)(VALUE sym);
|
||||||
ID (*check_id_cstr)(const char *ptr, long len, rb_encoding *enc);
|
VALUE (*str_valid_encoding_p)(VALUE str);
|
||||||
|
|
||||||
/* String */
|
/* String */
|
||||||
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
|
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
|
||||||
@ -1294,12 +1294,10 @@ typedef struct rb_parser_config_struct {
|
|||||||
VALUE (*str_resize)(VALUE str, long len);
|
VALUE (*str_resize)(VALUE str, long len);
|
||||||
VALUE (*str_new)(const char *ptr, long len);
|
VALUE (*str_new)(const char *ptr, long len);
|
||||||
VALUE (*str_new_cstr)(const char *ptr);
|
VALUE (*str_new_cstr)(const char *ptr);
|
||||||
VALUE (*setup_fake_str)(struct RString *fake_str, const char *name, long len, rb_encoding *enc);
|
|
||||||
VALUE (*fstring)(VALUE);
|
VALUE (*fstring)(VALUE);
|
||||||
int (*is_ascii_string)(VALUE str);
|
int (*is_ascii_string)(VALUE str);
|
||||||
VALUE (*enc_str_new)(const char *ptr, long len, rb_encoding *enc);
|
VALUE (*enc_str_new)(const char *ptr, long len, rb_encoding *enc);
|
||||||
VALUE (*enc_str_buf_cat)(VALUE str, const char *ptr, long len, rb_encoding *enc);
|
VALUE (*enc_str_buf_cat)(VALUE str, const char *ptr, long len, rb_encoding *enc);
|
||||||
int (*enc_str_coderange)(VALUE str);
|
|
||||||
VALUE (*str_buf_append)(VALUE str, VALUE str2);
|
VALUE (*str_buf_append)(VALUE str, VALUE str2);
|
||||||
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0)
|
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0)
|
||||||
VALUE (*str_vcatf)(VALUE str, const char *fmt, va_list ap);
|
VALUE (*str_vcatf)(VALUE str, const char *fmt, va_list ap);
|
||||||
@ -1364,7 +1362,6 @@ typedef struct rb_parser_config_struct {
|
|||||||
void (*encoding_set)(VALUE obj, int encindex);
|
void (*encoding_set)(VALUE obj, int encindex);
|
||||||
int (*encoding_is_ascii8bit)(VALUE obj);
|
int (*encoding_is_ascii8bit)(VALUE obj);
|
||||||
rb_encoding *(*usascii_encoding)(void);
|
rb_encoding *(*usascii_encoding)(void);
|
||||||
int enc_coderange_broken;
|
|
||||||
|
|
||||||
/* Ractor */
|
/* Ractor */
|
||||||
VALUE (*ractor_make_shareable)(VALUE obj);
|
VALUE (*ractor_make_shareable)(VALUE obj);
|
||||||
|
2
string.c
2
string.c
@ -11030,7 +11030,7 @@ rb_str_b(VALUE str)
|
|||||||
* "\x80".force_encoding("UTF-8").valid_encoding? # => false
|
* "\x80".force_encoding("UTF-8").valid_encoding? # => false
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static VALUE
|
VALUE
|
||||||
rb_str_valid_encoding_p(VALUE str)
|
rb_str_valid_encoding_p(VALUE str)
|
||||||
{
|
{
|
||||||
int cr = rb_enc_str_coderange(str);
|
int cr = rb_enc_str_coderange(str);
|
||||||
|
@ -1365,13 +1365,17 @@ eom
|
|||||||
assert_valid_syntax 'p :foo, {proc do end => proc do end, b: proc do end}', bug13073
|
assert_valid_syntax 'p :foo, {proc do end => proc do end, b: proc do end}', bug13073
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_invalid_encoding_symbol
|
||||||
|
assert_syntax_error('{"\xC3": 1}', "invalid symbol")
|
||||||
|
end
|
||||||
|
|
||||||
def test_invalid_symbol_in_hash_memory_leak
|
def test_invalid_symbol_in_hash_memory_leak
|
||||||
assert_no_memory_leak([], "#{<<-'begin;'}", "#{<<-'end;'}", rss: true)
|
assert_no_memory_leak([], "#{<<-'begin;'}", "#{<<-'end;'}", rss: true)
|
||||||
str = '{"\xC3": 1}'.force_encoding("UTF-8")
|
str = '{"\xC3": 1}'.force_encoding("UTF-8")
|
||||||
code = proc do
|
code = proc do
|
||||||
eval(str)
|
eval(str)
|
||||||
raise "unreachable"
|
raise "unreachable"
|
||||||
rescue EncodingError
|
rescue SyntaxError
|
||||||
end
|
end
|
||||||
|
|
||||||
1_000.times(&code)
|
1_000.times(&code)
|
||||||
|
@ -173,7 +173,7 @@ struct rb_imemo_tmpbuf_struct {
|
|||||||
#define ID2SYM p->config->id2sym
|
#define ID2SYM p->config->id2sym
|
||||||
#undef SYM2ID
|
#undef SYM2ID
|
||||||
#define SYM2ID p->config->sym2id
|
#define SYM2ID p->config->sym2id
|
||||||
#define rb_check_id_cstr p->config->check_id_cstr
|
#define rb_str_valid_encoding_p p->config->str_valid_encoding_p
|
||||||
|
|
||||||
#define rb_str_catf p->config->str_catf
|
#define rb_str_catf p->config->str_catf
|
||||||
#undef rb_str_cat_cstr
|
#undef rb_str_cat_cstr
|
||||||
@ -192,12 +192,10 @@ struct rb_imemo_tmpbuf_struct {
|
|||||||
#define rb_str_new p->config->str_new
|
#define rb_str_new p->config->str_new
|
||||||
#undef rb_str_new_cstr
|
#undef rb_str_new_cstr
|
||||||
#define rb_str_new_cstr p->config->str_new_cstr
|
#define rb_str_new_cstr p->config->str_new_cstr
|
||||||
#define rb_setup_fake_str p->config->setup_fake_str
|
|
||||||
#define rb_fstring p->config->fstring
|
#define rb_fstring p->config->fstring
|
||||||
#define is_ascii_string p->config->is_ascii_string
|
#define is_ascii_string p->config->is_ascii_string
|
||||||
#define rb_enc_str_new p->config->enc_str_new
|
#define rb_enc_str_new p->config->enc_str_new
|
||||||
#define rb_enc_str_buf_cat p->config->enc_str_buf_cat
|
#define rb_enc_str_buf_cat p->config->enc_str_buf_cat
|
||||||
#define rb_enc_str_coderange p->config->enc_str_coderange
|
|
||||||
#define rb_str_buf_append p->config->str_buf_append
|
#define rb_str_buf_append p->config->str_buf_append
|
||||||
#define rb_str_vcatf p->config->str_vcatf
|
#define rb_str_vcatf p->config->str_vcatf
|
||||||
#undef StringValueCStr
|
#undef StringValueCStr
|
||||||
@ -262,7 +260,6 @@ struct rb_imemo_tmpbuf_struct {
|
|||||||
#define ENCODING_SET p->config->encoding_set
|
#define ENCODING_SET p->config->encoding_set
|
||||||
#define ENCODING_IS_ASCII8BIT p->config->encoding_is_ascii8bit
|
#define ENCODING_IS_ASCII8BIT p->config->encoding_is_ascii8bit
|
||||||
#define rb_usascii_encoding p->config->usascii_encoding
|
#define rb_usascii_encoding p->config->usascii_encoding
|
||||||
#define ENC_CODERANGE_BROKEN p->config->enc_coderange_broken
|
|
||||||
|
|
||||||
#define rb_ractor_make_shareable p->config->ractor_make_shareable
|
#define rb_ractor_make_shareable p->config->ractor_make_shareable
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user