Introduce NODE_REGX to manage regexp literal

This commit is contained in:
yui-knk 2024-02-10 10:05:18 +09:00 committed by Yuichiro Kaneko
parent 97d4363d3b
commit e7ab5d891c
10 changed files with 283 additions and 52 deletions

2
ast.c
View File

@ -567,6 +567,8 @@ node_children(rb_ast_t *ast, const NODE *node)
return rb_ary_new_from_args(1, rb_node_rational_literal_val(node));
case NODE_IMAGINARY:
return rb_ary_new_from_args(1, rb_node_imaginary_literal_val(node));
case NODE_REGX:
return rb_ary_new_from_args(1, rb_node_regx_string_val(node));
case NODE_ONCE:
return rb_ary_new_from_node_args(ast, 1, RNODE_ONCE(node)->nd_body);
case NODE_DSTR:

View File

@ -15931,6 +15931,7 @@ ruby_parser.$(OBJEXT): $(top_srcdir)/internal/fixnum.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/imemo.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/numeric.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/rational.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/re.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/serial.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/static_assert.h

View File

@ -1931,6 +1931,9 @@ iseq_set_arguments_keywords(rb_iseq_t *iseq, LINK_ANCHOR *const optargs,
case NODE_SYM:
dv = rb_node_sym_string_val(val_node);
break;
case NODE_REGX:
dv = rb_node_regx_string_val(val_node);
break;
case NODE_LINE:
dv = rb_node_line_lineno_val(val_node);
break;
@ -4499,6 +4502,7 @@ compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *ret, const NODE *cond,
case NODE_IMAGINARY: /* NODE_IMAGINARY is always true */
case NODE_TRUE:
case NODE_STR:
case NODE_REGX:
case NODE_ZLIST:
case NODE_LAMBDA:
/* printf("useless condition eliminate (%s)\n", ruby_node_name(nd_type(cond))); */
@ -4702,6 +4706,7 @@ static_literal_node_p(const NODE *node, const rb_iseq_t *iseq, bool hash_key)
switch (nd_type(node)) {
case NODE_LIT:
case NODE_SYM:
case NODE_REGX:
case NODE_LINE:
case NODE_ENCODING:
case NODE_INTEGER:
@ -4740,6 +4745,8 @@ static_literal_value(const NODE *node, rb_iseq_t *iseq)
return Qfalse;
case NODE_SYM:
return rb_node_sym_string_val(node);
case NODE_REGX:
return rb_node_regx_string_val(node);
case NODE_LINE:
return rb_node_line_lineno_val(node);
case NODE_ENCODING:
@ -5785,6 +5792,7 @@ defined_expr0(rb_iseq_t *iseq, LINK_ANCHOR *const ret,
case NODE_STR:
case NODE_LIT:
case NODE_SYM:
case NODE_REGX:
case NODE_LINE:
case NODE_FILE:
case NODE_ENCODING:
@ -7212,6 +7220,7 @@ iseq_compile_pattern_each(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *c
}
case NODE_LIT:
case NODE_SYM:
case NODE_REGX:
case NODE_LINE:
case NODE_INTEGER:
case NODE_FLOAT:
@ -9637,7 +9646,7 @@ compile_match(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, i
INIT_ANCHOR(val);
switch ((int)type) {
case NODE_MATCH:
ADD_INSN1(recv, node, putobject, RNODE_MATCH(node)->nd_lit);
ADD_INSN1(recv, node, putobject, rb_node_regx_string_val(node));
ADD_INSN2(val, node, getspecial, INT2FIX(0),
INT2FIX(0));
break;
@ -9799,6 +9808,7 @@ compile_kw_arg(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node,
}
else if (nd_type_p(default_value, NODE_LIT) ||
nd_type_p(default_value, NODE_SYM) ||
nd_type_p(default_value, NODE_REGX) ||
nd_type_p(default_value, NODE_LINE) ||
nd_type_p(default_value, NODE_INTEGER) ||
nd_type_p(default_value, NODE_FLOAT) ||
@ -10385,6 +10395,14 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
case NODE_EVSTR:
CHECK(compile_evstr(iseq, ret, RNODE_EVSTR(node)->nd_body, popped));
break;
case NODE_REGX:{
if (!popped) {
VALUE lit = rb_node_regx_string_val(node);
ADD_INSN1(ret, node, putobject, lit);
RB_OBJ_WRITTEN(iseq, Qundef, lit);
}
break;
}
case NODE_DREGX:
compile_dregx(iseq, ret, node, popped);
break;

View File

@ -23,6 +23,7 @@ VALUE rb_str_new_parser_string(rb_parser_string_t *str);
VALUE rb_node_str_string_val(const NODE *);
VALUE rb_node_sym_string_val(const NODE *);
VALUE rb_node_dstr_string_val(const NODE *);
VALUE rb_node_regx_string_val(const NODE *);
VALUE rb_node_dregx_string_val(const NODE *);
VALUE rb_node_line_lineno_val(const NODE *);
VALUE rb_node_file_path_val(const NODE *);

View File

@ -379,6 +379,8 @@ class RbInspector(LLDBInterface):
self._append_expression("*(struct RNode_DXSTR *) %0#x" % val.GetValueAsUnsigned())
elif nd_type == self.ruby_globals["NODE_EVSTR"]:
self._append_expression("*(struct RNode_EVSTR *) %0#x" % val.GetValueAsUnsigned())
elif nd_type == self.ruby_globals["NODE_REGX"]:
self._append_expression("*(struct RNode_REGX *) %0#x" % val.GetValueAsUnsigned())
elif nd_type == self.ruby_globals["NODE_DREGX"]:
self._append_expression("*(struct RNode_DREGX *) %0#x" % val.GetValueAsUnsigned())
elif nd_type == self.ruby_globals["NODE_ONCE"]:

6
node.c
View File

@ -195,6 +195,10 @@ free_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
case NODE_SYM:
parser_string_free(ast, RNODE_SYM(node)->string);
break;
case NODE_REGX:
case NODE_MATCH:
parser_string_free(ast, RNODE_REGX(node)->string);
break;
case NODE_DSYM:
parser_string_free(ast, RNODE_DSYM(node)->string);
break;
@ -268,7 +272,6 @@ static bool
nodetype_markable_p(enum node_type type)
{
switch (type) {
case NODE_MATCH:
case NODE_LIT:
return true;
default:
@ -374,7 +377,6 @@ mark_and_move_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
#endif
switch (nd_type(node)) {
case NODE_MATCH:
case NODE_LIT:
rb_gc_mark_and_move(&RNODE_LIT(node)->nd_lit);
break;

View File

@ -678,7 +678,8 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node)
ANN("match expression (against $_ implicitly)");
ANN("format: [nd_lit] (in condition)");
ANN("example: if /foo/; foo; end");
F_LIT(nd_lit, RNODE_MATCH, "regexp");
LAST_NODE;
F_VALUE(string, rb_node_regx_string_val(node), "string");
return;
case NODE_MATCH2:
@ -750,6 +751,14 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node)
F_VALUE(val, rb_node_imaginary_literal_val(node), "val");
return;
case NODE_REGX:
ANN("regexp literal");
ANN("format: [string]");
ANN("example: /foo/");
LAST_NODE;
F_VALUE(string, rb_node_regx_string_val(node), "string");
return;
case NODE_ONCE:
ANN("once evaluation");
ANN("format: [nd_body]");

265
parse.y
View File

@ -88,6 +88,7 @@ hash_literal_key_p(VALUE k)
case NODE_IMAGINARY:
case NODE_STR:
case NODE_SYM:
case NODE_REGX:
case NODE_LINE:
case NODE_FILE:
case NODE_ENCODING:
@ -136,6 +137,13 @@ node_imaginary_cmp(rb_node_imaginary_t *n1, rb_node_imaginary_t *n2)
strcmp(n1->val, n2->val));
}
static int
rb_parser_regx_hash_cmp(rb_node_regx_t *n1, rb_node_regx_t *n2)
{
return (n1->options != n2->options ||
rb_parser_string_hash_cmp(n1->string, n2->string));
}
static int
node_integer_line_cmp(const NODE *node_i, const NODE *line)
{
@ -190,6 +198,8 @@ node_cdhash_cmp(VALUE val, VALUE lit)
return rb_parser_string_hash_cmp(RNODE_STR(node_val)->string, RNODE_STR(node_lit)->string);
case NODE_SYM:
return rb_parser_string_hash_cmp(RNODE_SYM(node_val)->string, RNODE_SYM(node_lit)->string);
case NODE_REGX:
return rb_parser_regx_hash_cmp(RNODE_REGX(node_val), RNODE_REGX(node_lit));
case NODE_LINE:
return node_val->nd_loc.beg_pos.lineno != node_lit->nd_loc.beg_pos.lineno;
case NODE_FILE:
@ -236,6 +246,8 @@ node_cdhash_hash(VALUE a)
return rb_parser_str_hash(RNODE_STR(node)->string);
case NODE_SYM:
return rb_parser_str_hash(RNODE_SYM(node)->string);
case NODE_REGX:
return rb_parser_str_hash(RNODE_REGX(node)->string);
case NODE_LINE:
/* Same with NODE_INTEGER FIXNUM case */
return (st_index_t)node->nd_loc.beg_pos.lineno;
@ -1211,6 +1223,7 @@ static rb_node_dstr_t *rb_node_dstr_new(struct parser_params *p, rb_parser_strin
static rb_node_xstr_t *rb_node_xstr_new(struct parser_params *p, rb_parser_string_t *string, const YYLTYPE *loc);
static rb_node_dxstr_t *rb_node_dxstr_new(struct parser_params *p, rb_parser_string_t *string, long nd_alen, NODE *nd_next, const YYLTYPE *loc);
static rb_node_evstr_t *rb_node_evstr_new(struct parser_params *p, NODE *nd_body, const YYLTYPE *loc);
static rb_node_regx_t *rb_node_regx_new(struct parser_params *p, rb_parser_string_t *string, int options, const YYLTYPE *loc);
static rb_node_once_t *rb_node_once_new(struct parser_params *p, NODE *nd_body, const YYLTYPE *loc);
static rb_node_args_t *rb_node_args_new(struct parser_params *p, const YYLTYPE *loc);
static rb_node_args_aux_t *rb_node_args_aux_new(struct parser_params *p, ID nd_pid, long nd_plen, const YYLTYPE *loc);
@ -1319,6 +1332,7 @@ static rb_node_error_t *rb_node_error_new(struct parser_params *p, const YYLTYPE
#define NEW_XSTR(s,loc) (NODE *)rb_node_xstr_new(p,s,loc)
#define NEW_DXSTR(s,l,n,loc) (NODE *)rb_node_dxstr_new(p,s,l,n,loc)
#define NEW_EVSTR(n,loc) (NODE *)rb_node_evstr_new(p,n,loc)
#define NEW_REGX(str,opts,loc) (NODE *)rb_node_regx_new(p,str,opts,loc)
#define NEW_ONCE(b,loc) (NODE *)rb_node_once_new(p,b,loc)
#define NEW_ARGS(loc) rb_node_args_new(p,loc)
#define NEW_ARGS_AUX(r,b,loc) rb_node_args_aux_new(p,r,b,loc)
@ -1567,8 +1581,8 @@ static NODE *match_op(struct parser_params*,NODE*,NODE*,const YYLTYPE*,const YYL
static rb_ast_id_table_t *local_tbl(struct parser_params*);
static VALUE reg_compile(struct parser_params*, VALUE, int);
static void reg_fragment_setenc(struct parser_params*, VALUE, int);
static VALUE reg_compile(struct parser_params*, rb_parser_string_t*, int);
static void reg_fragment_setenc(struct parser_params*, rb_parser_string_t*, int);
#define reg_fragment_check rb_parser_reg_fragment_check
int reg_fragment_check(struct parser_params*, rb_parser_string_t*, int);
@ -1592,7 +1606,7 @@ static int id_is_var(struct parser_params *p, ID id);
RUBY_SYMBOL_EXPORT_BEGIN
VALUE rb_parser_reg_compile(struct parser_params* p, VALUE str, int options);
int rb_reg_fragment_setenc(struct parser_params*, VALUE, int);
int rb_reg_fragment_setenc(struct parser_params*, rb_parser_string_t *, int);
enum lex_state_e rb_parser_trace_lex_state(struct parser_params *, enum lex_state_e, enum lex_state_e, int);
VALUE rb_parser_lex_state_name(struct parser_params *p, enum lex_state_e state);
void rb_parser_show_bitstack(struct parser_params *, stack_type, const char *, int);
@ -1647,6 +1661,9 @@ static void numparam_pop(struct parser_params *p, NODE *prev_inner);
#define idFWD_ALL idDot3
#define arg_FWD_BLOCK idFWD_BLOCK
#define RE_ONIG_OPTION_IGNORECASE 1
#define RE_ONIG_OPTION_EXTEND (RE_ONIG_OPTION_IGNORECASE<<1)
#define RE_ONIG_OPTION_MULTILINE (RE_ONIG_OPTION_EXTEND<<1)
#define RE_OPTION_ONCE (1<<16)
#define RE_OPTION_ENCODING_SHIFT 8
#define RE_OPTION_ENCODING(e) (((e)&0xff)<<RE_OPTION_ENCODING_SHIFT)
@ -2237,6 +2254,14 @@ rb_parser_str_get_encoding(rb_parser_string_t *str)
return str->enc;
}
#ifndef RIPPER
static bool
PARSER_ENCODING_IS_ASCII8BIT(struct parser_params *p, rb_parser_string_t *str)
{
return rb_parser_str_get_encoding(str) == rb_ascii8bit_encoding();
}
#endif
static int
PARSER_ENC_CODERANGE(rb_parser_string_t *str)
{
@ -2257,11 +2282,19 @@ PARSER_ENCODING_CODERANGE_SET(rb_parser_string_t *str, rb_encoding *enc, enum rb
}
static void
PARSER_ENCODING_CODERANGE_CLEAR(rb_parser_string_t *str)
PARSER_ENC_CODERANGE_CLEAR(rb_parser_string_t *str)
{
str->coderange = RB_PARSER_ENC_CODERANGE_UNKNOWN;
}
#ifndef RIPPER
static bool
PARSER_ENC_CODERANGE_ASCIIONLY(rb_parser_string_t *str)
{
return PARSER_ENC_CODERANGE(str) == RB_PARSER_ENC_CODERANGE_7BIT;
}
#endif
static bool
PARSER_ENC_CODERANGE_CLEAN_P(int cr)
{
@ -2325,6 +2358,21 @@ rb_parser_enc_str_coderange(struct parser_params *p, rb_parser_string_t *str)
return cr;
}
#ifndef RIPPER
static rb_parser_string_t *
rb_parser_enc_associate(struct parser_params *p, rb_parser_string_t *str, rb_encoding *enc)
{
if (rb_parser_str_get_encoding(str) == enc)
return str;
if (!PARSER_ENC_CODERANGE_ASCIIONLY(str) ||
!rb_enc_asciicompat(enc)) {
PARSER_ENC_CODERANGE_CLEAR(str);
}
rb_parser_string_set_encoding(str, enc);
return str;
}
#endif
static bool
rb_parser_is_ascii_string(struct parser_params *p, rb_parser_string_t *str)
{
@ -2394,7 +2442,7 @@ rb_parser_enc_compatible(struct parser_params *p, rb_parser_string_t *str1, rb_p
static void
rb_parser_str_modify(rb_parser_string_t *str)
{
PARSER_ENCODING_CODERANGE_CLEAR(str);
PARSER_ENC_CODERANGE_CLEAR(str);
}
static void
@ -2557,7 +2605,7 @@ rb_parser_str_resize(struct parser_params *p, rb_parser_string_t *str, long len)
long slen = PARSER_STRING_LEN(str);
if (slen > len && PARSER_ENC_CODERANGE(str) != RB_PARSER_ENC_CODERANGE_7BIT) {
PARSER_ENCODING_CODERANGE_CLEAR(str);
PARSER_ENC_CODERANGE_CLEAR(str);
}
{
@ -6828,6 +6876,7 @@ singleton : var_ref
case NODE_DSTR:
case NODE_XSTR:
case NODE_DXSTR:
case NODE_REGX:
case NODE_DREGX:
case NODE_LIT:
case NODE_SYM:
@ -8393,6 +8442,61 @@ tokadd_escape(struct parser_params *p)
return 0;
}
static int
char_to_option(int c)
{
int val;
switch (c) {
case 'i':
val = RE_ONIG_OPTION_IGNORECASE;
break;
case 'x':
val = RE_ONIG_OPTION_EXTEND;
break;
case 'm':
val = RE_ONIG_OPTION_MULTILINE;
break;
default:
val = 0;
break;
}
return val;
}
#define ARG_ENCODING_FIXED 16
#define ARG_ENCODING_NONE 32
#define ENC_ASCII8BIT 1
#define ENC_EUC_JP 2
#define ENC_Windows_31J 3
#define ENC_UTF8 4
static int
char_to_option_kcode(int c, int *option, int *kcode)
{
*option = 0;
switch (c) {
case 'n':
*kcode = ENC_ASCII8BIT;
return (*option = ARG_ENCODING_NONE);
case 'e':
*kcode = ENC_EUC_JP;
break;
case 's':
*kcode = ENC_Windows_31J;
break;
case 'u':
*kcode = ENC_UTF8;
break;
default:
*kcode = -1;
return (*option = char_to_option(c));
}
*option = ARG_ENCODING_FIXED;
return 1;
}
static int
regx_options(struct parser_params *p)
{
@ -8406,9 +8510,9 @@ regx_options(struct parser_params *p)
if (c == 'o') {
options |= RE_OPTION_ONCE;
}
else if (rb_char_to_option_kcode(c, &opt, &kc)) {
else if (char_to_option_kcode(c, &opt, &kc)) {
if (kc >= 0) {
if (kc != rb_ascii8bit_encindex()) kcode = c;
if (kc != ENC_ASCII8BIT) kcode = c;
kopt = opt;
}
else {
@ -12222,6 +12326,16 @@ rb_node_evstr_new(struct parser_params *p, NODE *nd_body, const YYLTYPE *loc)
return n;
}
static rb_node_regx_t *
rb_node_regx_new(struct parser_params *p, rb_parser_string_t *string, int options, const YYLTYPE *loc)
{
rb_node_regx_t *n = NODE_NEWNODE(NODE_REGX, rb_node_regx_t, loc);
n->string = string;
n->options = options & RE_OPTION_MASK;
return n;
}
static rb_node_call_t *
rb_node_call_new(struct parser_params *p, NODE *nd_recv, ID nd_mid, NODE *nd_args, const YYLTYPE *loc)
{
@ -12847,6 +12961,18 @@ str2dstr(struct parser_params *p, NODE *node)
return new_node;
}
static NODE *
str2regx(struct parser_params *p, NODE *node, int options)
{
NODE *new_node = (NODE *)NODE_NEW_INTERNAL(NODE_REGX, rb_node_regx_t);
nd_copy_flag(new_node, node);
RNODE_REGX(new_node)->string = RNODE_STR(node)->string;
RNODE_REGX(new_node)->options = options;
RNODE_STR(node)->string = 0;
return new_node;
}
static NODE *
evstr2dstr(struct parser_params *p, NODE *node)
{
@ -12949,9 +13075,9 @@ match_op(struct parser_params *p, NODE *node1, NODE *node2, const YYLTYPE *op_lo
return match;
}
case NODE_LIT:
if (RB_TYPE_P(RNODE_LIT(n)->nd_lit, T_REGEXP)) {
const VALUE lit = RNODE_LIT(n)->nd_lit;
case NODE_REGX:
{
const VALUE lit = rb_node_regx_string_val(n);
NODE *match = NEW_MATCH2(node1, node2, loc);
RNODE_MATCH2(match)->nd_args = reg_named_capture_assign(p, lit, loc);
nd_set_line(match, line);
@ -12964,9 +13090,6 @@ match_op(struct parser_params *p, NODE *node1, NODE *node2, const YYLTYPE *op_lo
NODE *match3;
switch (nd_type(n)) {
case NODE_LIT:
if (!RB_TYPE_P(RNODE_LIT(n)->nd_lit, T_REGEXP)) break;
/* fallthru */
case NODE_DREGX:
match3 = NEW_MATCH3(node2, node1, loc);
return match3;
@ -13210,16 +13333,18 @@ new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc)
NODE *prev;
if (!node) {
node = NEW_LIT(reg_compile(p, STR_NEW0(), options), loc);
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(node)->nd_lit);
/* Check string is valid regex */
rb_parser_string_t *str = STRING_NEW0();
reg_compile(p, str, options);
node = NEW_REGX(str, options, loc);
return node;
}
switch (nd_type(node)) {
case NODE_STR:
{
VALUE src = rb_node_str_string_val(node);
node = NEW_LIT(reg_compile(p, src, options), loc);
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(node)->nd_lit);
/* Check string is valid regex */
reg_compile(p, RNODE_STR(node)->string, options);
node = str2regx(p, node, options);
}
break;
default:
@ -13255,9 +13380,8 @@ new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc)
}
}
if (!RNODE_DREGX(node)->nd_next) {
VALUE src = rb_node_dregx_string_val(node);
/* Check string is valid regex */
reg_compile(p, src, options);
reg_compile(p, RNODE_DREGX(node)->string, options);
}
if (options & RE_OPTION_ONCE) {
node = NEW_ONCE(node, loc);
@ -13916,6 +14040,8 @@ shareable_literal_value(struct parser_params *p, NODE *node)
return rb_node_imaginary_literal_val(node);
case NODE_ENCODING:
return rb_node_encoding_val(node);
case NODE_REGX:
return rb_node_regx_string_val(node);
case NODE_LIT:
return RNODE_LIT(node)->nd_lit;
default:
@ -13943,6 +14069,7 @@ shareable_literal_constant(struct parser_params *p, enum shareability shareable,
case NODE_NIL:
case NODE_LIT:
case NODE_SYM:
case NODE_REGX:
case NODE_LINE:
case NODE_INTEGER:
case NODE_FLOAT:
@ -14305,6 +14432,7 @@ void_expr(struct parser_params *p, NODE *node)
case NODE_IMAGINARY:
case NODE_STR:
case NODE_DSTR:
case NODE_REGX:
case NODE_DREGX:
useless = "a literal";
break;
@ -14441,6 +14569,7 @@ is_static_content(NODE *node)
} while ((node = RNODE_LIST(node)->nd_next) != 0);
case NODE_LIT:
case NODE_SYM:
case NODE_REGX:
case NODE_LINE:
case NODE_FILE:
case NODE_ENCODING:
@ -14537,6 +14666,11 @@ cond0(struct parser_params *p, NODE *node, enum cond_type type, const YYLTYPE *l
SWITCH_BY_COND_TYPE(type, warn, "string ");
break;
case NODE_REGX:
if (!e_option_supplied(p)) SWITCH_BY_COND_TYPE(type, warn, "regex ");
nd_set_type(node, NODE_MATCH);
break;
case NODE_DREGX:
if (!e_option_supplied(p)) SWITCH_BY_COND_TYPE(type, warning, "regex ");
@ -14573,11 +14707,7 @@ cond0(struct parser_params *p, NODE *node, enum cond_type type, const YYLTYPE *l
break;
case NODE_LIT:
if (RB_TYPE_P(RNODE_LIT(node)->nd_lit, T_REGEXP)) {
if (!e_option_supplied(p)) SWITCH_BY_COND_TYPE(type, warn, "regex ");
nd_set_type(node, NODE_MATCH);
}
else if (RNODE_LIT(node)->nd_lit == Qtrue ||
if (RNODE_LIT(node)->nd_lit == Qtrue ||
RNODE_LIT(node)->nd_lit == Qfalse) {
/* booleans are OK, e.g., while true */
}
@ -14963,6 +15093,7 @@ nd_type_st_key_enable_p(NODE *node)
case NODE_IMAGINARY:
case NODE_STR:
case NODE_SYM:
case NODE_REGX:
case NODE_LINE:
case NODE_FILE:
case NODE_ENCODING:
@ -14984,6 +15115,7 @@ nd_st_key(struct parser_params *p, NODE *node)
case NODE_RATIONAL:
case NODE_IMAGINARY:
case NODE_SYM:
case NODE_REGX:
case NODE_LINE:
case NODE_ENCODING:
case NODE_FILE:
@ -15012,6 +15144,8 @@ nd_value(struct parser_params *p, NODE *node)
return rb_node_imaginary_literal_val(node);
case NODE_SYM:
return rb_node_sym_string_val(node);
case NODE_REGX:
return rb_node_regx_string_val(node);
case NODE_LINE:
return rb_node_line_lineno_val(node);
case NODE_ENCODING:
@ -15634,43 +15768,83 @@ dvar_curr(struct parser_params *p, ID id)
}
static void
reg_fragment_enc_error(struct parser_params* p, VALUE str, int c)
reg_fragment_enc_error(struct parser_params* p, rb_parser_string_t *str, int c)
{
compile_error(p,
"regexp encoding option '%c' differs from source encoding '%s'",
c, rb_enc_name(rb_enc_get(str)));
c, rb_enc_name(rb_parser_str_get_encoding(str)));
}
#ifndef RIPPER
static rb_encoding *
find_enc(struct parser_params* p, const char *name)
{
int idx = rb_enc_find_index(name);
if (idx < 0) {
rb_bug("unknown encoding name: %s", name);
}
return rb_enc_from_index(idx);
}
static rb_encoding *
kcode_to_enc(struct parser_params* p, int kcode)
{
rb_encoding *enc;
switch (kcode) {
case ENC_ASCII8BIT:
enc = rb_ascii8bit_encoding();
break;
case ENC_EUC_JP:
enc = find_enc(p, "EUC-JP");
break;
case ENC_Windows_31J:
enc = find_enc(p, "Windows-31J");
break;
case ENC_UTF8:
enc = rb_utf8_encoding();
break;
default:
enc = NULL;
break;
}
return enc;
}
int
rb_reg_fragment_setenc(struct parser_params* p, VALUE str, int options)
rb_reg_fragment_setenc(struct parser_params* p, rb_parser_string_t *str, int options)
{
int c = RE_OPTION_ENCODING_IDX(options);
if (c) {
int opt, idx;
rb_char_to_option_kcode(c, &opt, &idx);
if (idx != ENCODING_GET(str) &&
!is_ascii_string(str)) {
rb_encoding *enc;
char_to_option_kcode(c, &opt, &idx);
enc = kcode_to_enc(p, idx);
if (enc != rb_parser_str_get_encoding(str) &&
!rb_parser_is_ascii_string(p, str)) {
goto error;
}
ENCODING_SET(str, idx);
rb_parser_string_set_encoding(str, enc);
}
else if (RE_OPTION_ENCODING_NONE(options)) {
if (!ENCODING_IS_ASCII8BIT(str) &&
!is_ascii_string(str)) {
if (!PARSER_ENCODING_IS_ASCII8BIT(p, str) &&
!rb_parser_is_ascii_string(p, str)) {
c = 'n';
goto error;
}
rb_enc_associate(str, rb_ascii8bit_encoding());
rb_parser_enc_associate(p, str, rb_ascii8bit_encoding());
}
else if (rb_is_usascii_enc(p->enc)) {
if (!is_ascii_string(str)) {
if (!rb_parser_is_ascii_string(p, str)) {
/* raise in re.c */
rb_enc_associate(str, rb_usascii_encoding());
rb_parser_enc_associate(p, str, rb_usascii_encoding());
}
else {
rb_enc_associate(str, rb_ascii8bit_encoding());
rb_parser_enc_associate(p, str, rb_ascii8bit_encoding());
}
}
return 0;
@ -15681,7 +15855,7 @@ rb_reg_fragment_setenc(struct parser_params* p, VALUE str, int options)
#endif
static void
reg_fragment_setenc(struct parser_params* p, VALUE str, int options)
reg_fragment_setenc(struct parser_params* p, rb_parser_string_t *str, int options)
{
int c = rb_reg_fragment_setenc(p, str, options);
if (c) reg_fragment_enc_error(p, str, c);
@ -15692,10 +15866,9 @@ int
reg_fragment_check(struct parser_params* p, rb_parser_string_t *str, int options)
{
VALUE err, str2;
reg_fragment_setenc(p, str, options);
/* TODO */
str2 = rb_str_new_parser_string(str);
reg_fragment_setenc(p, str2, options);
str->enc = rb_enc_get(str2);
err = rb_reg_check_preprocess(str2);
if (err != Qnil) {
err = rb_obj_as_string(err);
@ -15769,10 +15942,12 @@ rb_reg_named_capture_assign_iter_impl(struct parser_params *p, const char *s, lo
#endif
static VALUE
parser_reg_compile(struct parser_params* p, VALUE str, int options)
parser_reg_compile(struct parser_params* p, rb_parser_string_t *str, int options)
{
VALUE str2;
reg_fragment_setenc(p, str, options);
return rb_parser_reg_compile(p, str, options);
str2 = rb_str_new_parser_string(str);
return rb_parser_reg_compile(p, str2, options);
}
#ifndef RIPPER
@ -15784,7 +15959,7 @@ rb_parser_reg_compile(struct parser_params* p, VALUE str, int options)
#endif
static VALUE
reg_compile(struct parser_params* p, VALUE str, int options)
reg_compile(struct parser_params* p, rb_parser_string_t *str, int options)
{
VALUE re;
VALUE err;

View File

@ -1,5 +1,6 @@
/* This is a wrapper for parse.y */
#include "internal/re.h"
#include "internal/ruby_parser.h"
#include "node.h"
@ -1010,6 +1011,16 @@ rb_node_dregx_string_val(const NODE *node)
return rb_str_new_parser_string(str);
}
VALUE
rb_node_regx_string_val(const NODE *node)
{
rb_node_regx_t *node_reg = RNODE_REGX(node);
rb_parser_string_t *string = node_reg->string;
VALUE str = rb_enc_str_new(string->ptr, string->len, string->enc);
return rb_reg_compile(str, node_reg->options, NULL, 0);
}
VALUE
rb_node_line_lineno_val(const NODE *node)
{

View File

@ -127,6 +127,7 @@ enum node_type {
NODE_XSTR,
NODE_DXSTR,
NODE_EVSTR,
NODE_REGX,
NODE_DREGX,
NODE_ONCE,
NODE_ARGS,
@ -612,11 +613,12 @@ typedef struct RNode_BACK_REF {
long nd_nth;
} rb_node_back_ref_t;
/* RNode_MATCH and RNode_LIT should be same structure */
/* RNode_MATCH and RNode_REGX should be same structure */
typedef struct RNode_MATCH {
NODE node;
VALUE nd_lit;
struct rb_parser_string *string;
int options;
} rb_node_match_t;
typedef struct RNode_MATCH2 {
@ -719,6 +721,13 @@ typedef struct RNode_EVSTR {
struct RNode *nd_body;
} rb_node_evstr_t;
typedef struct RNode_REGX {
NODE node;
struct rb_parser_string *string;
int options;
} rb_node_regx_t;
typedef struct RNode_DREGX {
NODE node;
@ -1093,6 +1102,7 @@ typedef struct RNode_ERROR {
#define RNODE_XSTR(node) ((struct RNode_XSTR *)(node))
#define RNODE_DXSTR(node) ((struct RNode_DXSTR *)(node))
#define RNODE_EVSTR(node) ((struct RNode_EVSTR *)(node))
#define RNODE_REGX(node) ((struct RNode_REGX *)(node))
#define RNODE_DREGX(node) ((struct RNode_DREGX *)(node))
#define RNODE_ONCE(node) ((struct RNode_ONCE *)(node))
#define RNODE_ARGS(node) ((struct RNode_ARGS *)(node))