diff --git a/compile.c b/compile.c index 3f894cbe69..826260d182 100644 --- a/compile.c +++ b/compile.c @@ -3820,6 +3820,24 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal } ELEM_REMOVE(&iobj->link); } + if (IS_NEXT_INSN_ID(&iobj->link, toregexp)) { + INSN *next = (INSN *)iobj->link.next; + if (OPERAND_AT(next, 1) == INT2FIX(1)) { + VALUE src = OPERAND_AT(iobj, 0); + int opt = (int)FIX2LONG(OPERAND_AT(next, 0)); + VALUE path = rb_iseq_path(iseq); + int line = iobj->insn_info.line_no; + VALUE errinfo = rb_errinfo(); + VALUE re = rb_reg_compile(src, opt, RSTRING_PTR(path), line); + if (NIL_P(re)) { + VALUE message = rb_attr_get(rb_errinfo(), idMesg); + rb_set_errinfo(errinfo); + COMPILE_ERROR(iseq, line, "%" PRIsVALUE, message); + } + RB_OBJ_WRITE(iseq, &OPERAND_AT(iobj, 0), re); + ELEM_REMOVE(iobj->link.next); + } + } } if (IS_INSN_ID(iobj, concatstrings)) { @@ -4502,47 +4520,91 @@ all_string_result_p(const NODE *node) } } -static int -compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int *cntp) -{ - const struct RNode_LIST *list = RNODE_DSTR(node)->nd_next; - VALUE lit = rb_node_dstr_string_val(node); - LINK_ELEMENT *first_lit = 0; - int cnt = 0; +struct dstr_ctxt { + rb_iseq_t *const iseq; + LINK_ANCHOR *const ret; + VALUE lit; + const NODE *lit_node; + int cnt; + int dregx; +}; - debugp_param("nd_lit", lit); - if (!NIL_P(lit)) { - cnt++; - if (!RB_TYPE_P(lit, T_STRING)) { - COMPILE_ERROR(ERROR_ARGS "dstr: must be string: %s", - rb_builtin_type_name(TYPE(lit))); +static int +append_dstr_fragment(struct dstr_ctxt *args, const NODE *const node, rb_parser_string_t *str) +{ + VALUE s = rb_str_new_mutable_parser_string(str); + if (args->dregx) { + VALUE error = rb_reg_check_preprocess(s); + if (!NIL_P(error)) { + COMPILE_ERROR(args->iseq, nd_line(node), "%" PRIsVALUE, error); return COMPILE_NG; } + } + if (NIL_P(args->lit)) { + args->lit = s; + args->lit_node = node; + } + else { + rb_str_buf_append(args->lit, s); + } + return COMPILE_OK; +} + +static void +flush_dstr_fragment(struct dstr_ctxt *args) +{ + if (!NIL_P(args->lit)) { + rb_iseq_t *iseq = args->iseq; + VALUE lit = args->lit; + args->lit = Qnil; lit = rb_fstring(lit); - ADD_INSN1(ret, node, putobject, lit); - RB_OBJ_WRITTEN(iseq, Qundef, lit); - if (RSTRING_LEN(lit) == 0) first_lit = LAST_ELEMENT(ret); + ADD_INSN1(args->ret, args->lit_node, putobject, lit); + RB_OBJ_WRITTEN(args->iseq, Qundef, lit); + args->cnt++; + } +} + +static int +compile_dstr_fragments_0(struct dstr_ctxt *args, const NODE *const node) +{ + const struct RNode_LIST *list = RNODE_DSTR(node)->nd_next; + rb_parser_string_t *str = RNODE_DSTR(node)->string; + + if (str) { + CHECK(append_dstr_fragment(args, node, str)); } while (list) { const NODE *const head = list->nd_head; if (nd_type_p(head, NODE_STR)) { - lit = rb_node_str_string_val(head); - ADD_INSN1(ret, head, putobject, lit); - RB_OBJ_WRITTEN(iseq, Qundef, lit); - lit = Qnil; + CHECK(append_dstr_fragment(args, node, RNODE_STR(head)->string)); + } + else if (nd_type_p(head, NODE_DSTR)) { + CHECK(compile_dstr_fragments_0(args, head)); } else { - CHECK(COMPILE(ret, "each string", head)); + flush_dstr_fragment(args); + rb_iseq_t *iseq = args->iseq; + CHECK(COMPILE(args->ret, "each string", head)); + args->cnt++; } - cnt++; list = (struct RNode_LIST *)list->nd_next; } - if (NIL_P(lit) && first_lit) { - ELEM_REMOVE(first_lit); - --cnt; - } - *cntp = cnt; + return COMPILE_OK; +} + +static int +compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int *cntp, int dregx) +{ + struct dstr_ctxt args = { + .iseq = iseq, .ret = ret, + .lit = Qnil, .lit_node = NULL, + .cnt = 0, .dregx = dregx, + }; + CHECK(compile_dstr_fragments_0(&args, node)); + flush_dstr_fragment(&args); + + *cntp = args.cnt; return COMPILE_OK; } @@ -4571,7 +4633,7 @@ compile_dstr(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node) RB_OBJ_WRITTEN(iseq, Qundef, lit); } else { - CHECK(compile_dstr_fragments(iseq, ret, node, &cnt)); + CHECK(compile_dstr_fragments(iseq, ret, node, &cnt, FALSE)); ADD_INSN1(ret, node, concatstrings, INT2FIX(cnt)); } return COMPILE_OK; @@ -4593,7 +4655,7 @@ compile_dregx(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, i return COMPILE_OK; } - CHECK(compile_dstr_fragments(iseq, ret, node, &cnt)); + CHECK(compile_dstr_fragments(iseq, ret, node, &cnt, TRUE)); ADD_INSN2(ret, node, toregexp, INT2FIX(cflag), INT2FIX(cnt)); if (popped) { diff --git a/parse.y b/parse.y index 12fac7d50e..c8629c779b 100644 --- a/parse.y +++ b/parse.y @@ -1480,9 +1480,6 @@ static rb_ast_id_table_t *local_tbl(struct parser_params*); static VALUE reg_compile(struct parser_params*, rb_parser_string_t*, int); static void reg_fragment_setenc(struct parser_params*, rb_parser_string_t*, int); -int rb_parser_reg_fragment_check(struct parser_params*, rb_parser_string_t*, int, rb_parser_reg_fragment_error_func); -static void reg_fragment_error(struct parser_params *, VALUE); -#define reg_fragment_check(p, str, option) rb_parser_reg_fragment_check(p, str, option, reg_fragment_error) static int literal_concat0(struct parser_params *p, rb_parser_string_t *head, rb_parser_string_t *tail); static NODE *heredoc_dedent(struct parser_params*,NODE*); @@ -13161,12 +13158,26 @@ symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol) return list_append(p, symbols, symbol); } +static void +dregex_fragment_setenc(struct parser_params *p, rb_node_dregx_t *const dreg, int options) +{ + if (dreg->string) { + reg_fragment_setenc(p, dreg->string, options); + } + for (struct RNode_LIST *list = dreg->nd_next; list; list = RNODE_LIST(list->nd_next)) { + NODE *frag = list->nd_head; + if (nd_type_p(frag, NODE_STR)) { + reg_fragment_setenc(p, RNODE_STR(frag)->string, options); + } + else if (nd_type_p(frag, NODE_DSTR)) { + dregex_fragment_setenc(p, RNODE_DSTR(frag), options); + } + } +} + static NODE * new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc) { - struct RNode_LIST *list; - NODE *prev; - if (!node) { /* Check string is valid regex */ rb_parser_string_t *str = STRING_NEW0(); @@ -13190,37 +13201,8 @@ new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc) nd_set_loc(node, loc); rb_node_dregx_t *const dreg = RNODE_DREGX(node); dreg->as.nd_cflag = options & RE_OPTION_MASK; - if (!dreg->nd_next) { - /* Check string is valid regex */ - reg_compile(p, dreg->string, options); - } - else if (dreg->string) { - reg_fragment_check(p, dreg->string, options); - } - prev = node; - for (list = dreg->nd_next; list; list = RNODE_LIST(list->nd_next)) { - NODE *frag = list->nd_head; - enum node_type type = nd_type(frag); - if (type == NODE_STR || (type == NODE_DSTR && !RNODE_DSTR(frag)->nd_next)) { - rb_parser_string_t *tail = RNODE_STR(frag)->string; - if (reg_fragment_check(p, tail, options) && prev && RNODE_DREGX(prev)->string) { - rb_parser_string_t *lit = prev == node ? dreg->string : RNODE_STR(RNODE_LIST(prev)->nd_head)->string; - if (!literal_concat0(p, lit, tail)) { - return NEW_NIL(loc); /* dummy node on error */ - } - rb_parser_str_resize(p, tail, 0); - RNODE_LIST(prev)->nd_next = list->nd_next; - rb_discard_node(p, list->nd_head); - rb_discard_node(p, (NODE *)list); - list = RNODE_LIST(prev); - } - else { - prev = (NODE *)list; - } - } - else { - prev = 0; - } + if (dreg->nd_next) { + dregex_fragment_setenc(p, dreg, options); } if (options & RE_OPTION_ONCE) { node = NEW_ONCE(node, loc); @@ -15363,13 +15345,7 @@ rb_reg_fragment_setenc(struct parser_params* p, rb_parser_string_t *str, int opt rb_parser_enc_associate(p, str, rb_ascii8bit_encoding()); } else if (rb_is_usascii_enc(p->enc)) { - if (!rb_parser_is_ascii_string(p, str)) { - /* raise in re.c */ - rb_parser_enc_associate(p, str, rb_usascii_encoding()); - } - else { - rb_parser_enc_associate(p, str, rb_ascii8bit_encoding()); - } + rb_parser_enc_associate(p, str, rb_ascii8bit_encoding()); } return 0; @@ -15385,30 +15361,6 @@ reg_fragment_setenc(struct parser_params* p, rb_parser_string_t *str, int option if (c) reg_fragment_enc_error(p, str, c); } -static void -reg_fragment_error(struct parser_params* p, VALUE err) -{ - compile_error(p, "%"PRIsVALUE, err); -} - -#ifndef RIPPER -int -rb_parser_reg_fragment_check(struct parser_params* p, rb_parser_string_t *str, int options, rb_parser_reg_fragment_error_func error) -{ - VALUE err, str2; - reg_fragment_setenc(p, str, options); - /* TODO */ - str2 = rb_str_new_parser_string(str); - err = rb_reg_check_preprocess(str2); - if (err != Qnil) { - err = rb_obj_as_string(err); - error(p, err); - return 0; - } - return 1; -} -#endif - #ifndef UNIVERSAL_PARSER typedef struct { struct parser_params* parser; @@ -15507,7 +15459,7 @@ reg_compile(struct parser_params* p, rb_parser_string_t *str, int options) if (NIL_P(re)) { VALUE m = rb_attr_get(rb_errinfo(), idMesg); rb_set_errinfo(err); - reg_fragment_error(p, m); + compile_error(p, "%"PRIsVALUE, m); return Qnil; } return re; diff --git a/test/.excludes-parsey/TestM17N.rb b/test/.excludes-parsey/TestM17N.rb deleted file mode 100644 index 7f8c44d02a..0000000000 --- a/test/.excludes-parsey/TestM17N.rb +++ /dev/null @@ -1 +0,0 @@ -exclude(:test_regexp_usascii, "https://bugs.ruby-lang.org/issues/20504") diff --git a/test/.excludes-parsey/TestMixedUnicodeEscape.rb b/test/.excludes-parsey/TestMixedUnicodeEscape.rb deleted file mode 100644 index 7bf964ebf1..0000000000 --- a/test/.excludes-parsey/TestMixedUnicodeEscape.rb +++ /dev/null @@ -1 +0,0 @@ -exclude(:test_basic, "https://bugs.ruby-lang.org/issues/20504") diff --git a/test/.excludes-parsey/TestRubyLiteral.rb b/test/.excludes-parsey/TestRubyLiteral.rb deleted file mode 100644 index 853f23a3b9..0000000000 --- a/test/.excludes-parsey/TestRubyLiteral.rb +++ /dev/null @@ -1 +0,0 @@ -exclude(:test_dregexp, "https://bugs.ruby-lang.org/issues/20504") diff --git a/test/ripper/test_ripper.rb b/test/ripper/test_ripper.rb index 5ca79e136f..070023b536 100644 --- a/test/ripper/test_ripper.rb +++ b/test/ripper/test_ripper.rb @@ -164,7 +164,6 @@ end assert_equal([[1, 8], :on_tstring_end, "\"", state(:EXPR_END)], lex.shift) assert_equal([[1, 9], :on_embexpr_end, "}", state(:EXPR_END)], lex.shift) assert_equal([[1, 10], :on_regexp_end, "/", state(:EXPR_BEG)], lex.shift) - assert_equal([[1, 11], :compile_error, "", state(:EXPR_END), "invalid multibyte character: /\\xCD/"], lex.shift) assert_empty(lex) end