[Bug #20504] Move dynamic regexp concatenation to iseq compiler
This commit is contained in:
parent
77fe82286b
commit
6bbb470dc7
Notes:
git
2025-01-03 02:06:04 +00:00
120
compile.c
120
compile.c
@ -3820,6 +3820,24 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
|
||||
}
|
||||
ELEM_REMOVE(&iobj->link);
|
||||
}
|
||||
if (IS_NEXT_INSN_ID(&iobj->link, toregexp)) {
|
||||
INSN *next = (INSN *)iobj->link.next;
|
||||
if (OPERAND_AT(next, 1) == INT2FIX(1)) {
|
||||
VALUE src = OPERAND_AT(iobj, 0);
|
||||
int opt = (int)FIX2LONG(OPERAND_AT(next, 0));
|
||||
VALUE path = rb_iseq_path(iseq);
|
||||
int line = iobj->insn_info.line_no;
|
||||
VALUE errinfo = rb_errinfo();
|
||||
VALUE re = rb_reg_compile(src, opt, RSTRING_PTR(path), line);
|
||||
if (NIL_P(re)) {
|
||||
VALUE message = rb_attr_get(rb_errinfo(), idMesg);
|
||||
rb_set_errinfo(errinfo);
|
||||
COMPILE_ERROR(iseq, line, "%" PRIsVALUE, message);
|
||||
}
|
||||
RB_OBJ_WRITE(iseq, &OPERAND_AT(iobj, 0), re);
|
||||
ELEM_REMOVE(iobj->link.next);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_INSN_ID(iobj, concatstrings)) {
|
||||
@ -4502,47 +4520,91 @@ all_string_result_p(const NODE *node)
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int *cntp)
|
||||
{
|
||||
const struct RNode_LIST *list = RNODE_DSTR(node)->nd_next;
|
||||
VALUE lit = rb_node_dstr_string_val(node);
|
||||
LINK_ELEMENT *first_lit = 0;
|
||||
int cnt = 0;
|
||||
struct dstr_ctxt {
|
||||
rb_iseq_t *const iseq;
|
||||
LINK_ANCHOR *const ret;
|
||||
VALUE lit;
|
||||
const NODE *lit_node;
|
||||
int cnt;
|
||||
int dregx;
|
||||
};
|
||||
|
||||
debugp_param("nd_lit", lit);
|
||||
if (!NIL_P(lit)) {
|
||||
cnt++;
|
||||
if (!RB_TYPE_P(lit, T_STRING)) {
|
||||
COMPILE_ERROR(ERROR_ARGS "dstr: must be string: %s",
|
||||
rb_builtin_type_name(TYPE(lit)));
|
||||
static int
|
||||
append_dstr_fragment(struct dstr_ctxt *args, const NODE *const node, rb_parser_string_t *str)
|
||||
{
|
||||
VALUE s = rb_str_new_mutable_parser_string(str);
|
||||
if (args->dregx) {
|
||||
VALUE error = rb_reg_check_preprocess(s);
|
||||
if (!NIL_P(error)) {
|
||||
COMPILE_ERROR(args->iseq, nd_line(node), "%" PRIsVALUE, error);
|
||||
return COMPILE_NG;
|
||||
}
|
||||
}
|
||||
if (NIL_P(args->lit)) {
|
||||
args->lit = s;
|
||||
args->lit_node = node;
|
||||
}
|
||||
else {
|
||||
rb_str_buf_append(args->lit, s);
|
||||
}
|
||||
return COMPILE_OK;
|
||||
}
|
||||
|
||||
static void
|
||||
flush_dstr_fragment(struct dstr_ctxt *args)
|
||||
{
|
||||
if (!NIL_P(args->lit)) {
|
||||
rb_iseq_t *iseq = args->iseq;
|
||||
VALUE lit = args->lit;
|
||||
args->lit = Qnil;
|
||||
lit = rb_fstring(lit);
|
||||
ADD_INSN1(ret, node, putobject, lit);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, lit);
|
||||
if (RSTRING_LEN(lit) == 0) first_lit = LAST_ELEMENT(ret);
|
||||
ADD_INSN1(args->ret, args->lit_node, putobject, lit);
|
||||
RB_OBJ_WRITTEN(args->iseq, Qundef, lit);
|
||||
args->cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
compile_dstr_fragments_0(struct dstr_ctxt *args, const NODE *const node)
|
||||
{
|
||||
const struct RNode_LIST *list = RNODE_DSTR(node)->nd_next;
|
||||
rb_parser_string_t *str = RNODE_DSTR(node)->string;
|
||||
|
||||
if (str) {
|
||||
CHECK(append_dstr_fragment(args, node, str));
|
||||
}
|
||||
|
||||
while (list) {
|
||||
const NODE *const head = list->nd_head;
|
||||
if (nd_type_p(head, NODE_STR)) {
|
||||
lit = rb_node_str_string_val(head);
|
||||
ADD_INSN1(ret, head, putobject, lit);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, lit);
|
||||
lit = Qnil;
|
||||
CHECK(append_dstr_fragment(args, node, RNODE_STR(head)->string));
|
||||
}
|
||||
else if (nd_type_p(head, NODE_DSTR)) {
|
||||
CHECK(compile_dstr_fragments_0(args, head));
|
||||
}
|
||||
else {
|
||||
CHECK(COMPILE(ret, "each string", head));
|
||||
flush_dstr_fragment(args);
|
||||
rb_iseq_t *iseq = args->iseq;
|
||||
CHECK(COMPILE(args->ret, "each string", head));
|
||||
args->cnt++;
|
||||
}
|
||||
cnt++;
|
||||
list = (struct RNode_LIST *)list->nd_next;
|
||||
}
|
||||
if (NIL_P(lit) && first_lit) {
|
||||
ELEM_REMOVE(first_lit);
|
||||
--cnt;
|
||||
}
|
||||
*cntp = cnt;
|
||||
return COMPILE_OK;
|
||||
}
|
||||
|
||||
static int
|
||||
compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int *cntp, int dregx)
|
||||
{
|
||||
struct dstr_ctxt args = {
|
||||
.iseq = iseq, .ret = ret,
|
||||
.lit = Qnil, .lit_node = NULL,
|
||||
.cnt = 0, .dregx = dregx,
|
||||
};
|
||||
CHECK(compile_dstr_fragments_0(&args, node));
|
||||
flush_dstr_fragment(&args);
|
||||
|
||||
*cntp = args.cnt;
|
||||
|
||||
return COMPILE_OK;
|
||||
}
|
||||
@ -4571,7 +4633,7 @@ compile_dstr(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node)
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, lit);
|
||||
}
|
||||
else {
|
||||
CHECK(compile_dstr_fragments(iseq, ret, node, &cnt));
|
||||
CHECK(compile_dstr_fragments(iseq, ret, node, &cnt, FALSE));
|
||||
ADD_INSN1(ret, node, concatstrings, INT2FIX(cnt));
|
||||
}
|
||||
return COMPILE_OK;
|
||||
@ -4593,7 +4655,7 @@ compile_dregx(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, i
|
||||
return COMPILE_OK;
|
||||
}
|
||||
|
||||
CHECK(compile_dstr_fragments(iseq, ret, node, &cnt));
|
||||
CHECK(compile_dstr_fragments(iseq, ret, node, &cnt, TRUE));
|
||||
ADD_INSN2(ret, node, toregexp, INT2FIX(cflag), INT2FIX(cnt));
|
||||
|
||||
if (popped) {
|
||||
|
90
parse.y
90
parse.y
@ -1480,9 +1480,6 @@ static rb_ast_id_table_t *local_tbl(struct parser_params*);
|
||||
|
||||
static VALUE reg_compile(struct parser_params*, rb_parser_string_t*, int);
|
||||
static void reg_fragment_setenc(struct parser_params*, rb_parser_string_t*, int);
|
||||
int rb_parser_reg_fragment_check(struct parser_params*, rb_parser_string_t*, int, rb_parser_reg_fragment_error_func);
|
||||
static void reg_fragment_error(struct parser_params *, VALUE);
|
||||
#define reg_fragment_check(p, str, option) rb_parser_reg_fragment_check(p, str, option, reg_fragment_error)
|
||||
|
||||
static int literal_concat0(struct parser_params *p, rb_parser_string_t *head, rb_parser_string_t *tail);
|
||||
static NODE *heredoc_dedent(struct parser_params*,NODE*);
|
||||
@ -13161,12 +13158,26 @@ symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
|
||||
return list_append(p, symbols, symbol);
|
||||
}
|
||||
|
||||
static void
|
||||
dregex_fragment_setenc(struct parser_params *p, rb_node_dregx_t *const dreg, int options)
|
||||
{
|
||||
if (dreg->string) {
|
||||
reg_fragment_setenc(p, dreg->string, options);
|
||||
}
|
||||
for (struct RNode_LIST *list = dreg->nd_next; list; list = RNODE_LIST(list->nd_next)) {
|
||||
NODE *frag = list->nd_head;
|
||||
if (nd_type_p(frag, NODE_STR)) {
|
||||
reg_fragment_setenc(p, RNODE_STR(frag)->string, options);
|
||||
}
|
||||
else if (nd_type_p(frag, NODE_DSTR)) {
|
||||
dregex_fragment_setenc(p, RNODE_DSTR(frag), options);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static NODE *
|
||||
new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc)
|
||||
{
|
||||
struct RNode_LIST *list;
|
||||
NODE *prev;
|
||||
|
||||
if (!node) {
|
||||
/* Check string is valid regex */
|
||||
rb_parser_string_t *str = STRING_NEW0();
|
||||
@ -13190,37 +13201,8 @@ new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc)
|
||||
nd_set_loc(node, loc);
|
||||
rb_node_dregx_t *const dreg = RNODE_DREGX(node);
|
||||
dreg->as.nd_cflag = options & RE_OPTION_MASK;
|
||||
if (!dreg->nd_next) {
|
||||
/* Check string is valid regex */
|
||||
reg_compile(p, dreg->string, options);
|
||||
}
|
||||
else if (dreg->string) {
|
||||
reg_fragment_check(p, dreg->string, options);
|
||||
}
|
||||
prev = node;
|
||||
for (list = dreg->nd_next; list; list = RNODE_LIST(list->nd_next)) {
|
||||
NODE *frag = list->nd_head;
|
||||
enum node_type type = nd_type(frag);
|
||||
if (type == NODE_STR || (type == NODE_DSTR && !RNODE_DSTR(frag)->nd_next)) {
|
||||
rb_parser_string_t *tail = RNODE_STR(frag)->string;
|
||||
if (reg_fragment_check(p, tail, options) && prev && RNODE_DREGX(prev)->string) {
|
||||
rb_parser_string_t *lit = prev == node ? dreg->string : RNODE_STR(RNODE_LIST(prev)->nd_head)->string;
|
||||
if (!literal_concat0(p, lit, tail)) {
|
||||
return NEW_NIL(loc); /* dummy node on error */
|
||||
}
|
||||
rb_parser_str_resize(p, tail, 0);
|
||||
RNODE_LIST(prev)->nd_next = list->nd_next;
|
||||
rb_discard_node(p, list->nd_head);
|
||||
rb_discard_node(p, (NODE *)list);
|
||||
list = RNODE_LIST(prev);
|
||||
}
|
||||
else {
|
||||
prev = (NODE *)list;
|
||||
}
|
||||
}
|
||||
else {
|
||||
prev = 0;
|
||||
}
|
||||
if (dreg->nd_next) {
|
||||
dregex_fragment_setenc(p, dreg, options);
|
||||
}
|
||||
if (options & RE_OPTION_ONCE) {
|
||||
node = NEW_ONCE(node, loc);
|
||||
@ -15363,13 +15345,7 @@ rb_reg_fragment_setenc(struct parser_params* p, rb_parser_string_t *str, int opt
|
||||
rb_parser_enc_associate(p, str, rb_ascii8bit_encoding());
|
||||
}
|
||||
else if (rb_is_usascii_enc(p->enc)) {
|
||||
if (!rb_parser_is_ascii_string(p, str)) {
|
||||
/* raise in re.c */
|
||||
rb_parser_enc_associate(p, str, rb_usascii_encoding());
|
||||
}
|
||||
else {
|
||||
rb_parser_enc_associate(p, str, rb_ascii8bit_encoding());
|
||||
}
|
||||
rb_parser_enc_associate(p, str, rb_ascii8bit_encoding());
|
||||
}
|
||||
return 0;
|
||||
|
||||
@ -15385,30 +15361,6 @@ reg_fragment_setenc(struct parser_params* p, rb_parser_string_t *str, int option
|
||||
if (c) reg_fragment_enc_error(p, str, c);
|
||||
}
|
||||
|
||||
static void
|
||||
reg_fragment_error(struct parser_params* p, VALUE err)
|
||||
{
|
||||
compile_error(p, "%"PRIsVALUE, err);
|
||||
}
|
||||
|
||||
#ifndef RIPPER
|
||||
int
|
||||
rb_parser_reg_fragment_check(struct parser_params* p, rb_parser_string_t *str, int options, rb_parser_reg_fragment_error_func error)
|
||||
{
|
||||
VALUE err, str2;
|
||||
reg_fragment_setenc(p, str, options);
|
||||
/* TODO */
|
||||
str2 = rb_str_new_parser_string(str);
|
||||
err = rb_reg_check_preprocess(str2);
|
||||
if (err != Qnil) {
|
||||
err = rb_obj_as_string(err);
|
||||
error(p, err);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef UNIVERSAL_PARSER
|
||||
typedef struct {
|
||||
struct parser_params* parser;
|
||||
@ -15507,7 +15459,7 @@ reg_compile(struct parser_params* p, rb_parser_string_t *str, int options)
|
||||
if (NIL_P(re)) {
|
||||
VALUE m = rb_attr_get(rb_errinfo(), idMesg);
|
||||
rb_set_errinfo(err);
|
||||
reg_fragment_error(p, m);
|
||||
compile_error(p, "%"PRIsVALUE, m);
|
||||
return Qnil;
|
||||
}
|
||||
return re;
|
||||
|
@ -1 +0,0 @@
|
||||
exclude(:test_regexp_usascii, "https://bugs.ruby-lang.org/issues/20504")
|
@ -1 +0,0 @@
|
||||
exclude(:test_basic, "https://bugs.ruby-lang.org/issues/20504")
|
@ -1 +0,0 @@
|
||||
exclude(:test_dregexp, "https://bugs.ruby-lang.org/issues/20504")
|
@ -164,7 +164,6 @@ end
|
||||
assert_equal([[1, 8], :on_tstring_end, "\"", state(:EXPR_END)], lex.shift)
|
||||
assert_equal([[1, 9], :on_embexpr_end, "}", state(:EXPR_END)], lex.shift)
|
||||
assert_equal([[1, 10], :on_regexp_end, "/", state(:EXPR_BEG)], lex.shift)
|
||||
assert_equal([[1, 11], :compile_error, "", state(:EXPR_END), "invalid multibyte character: /\\xCD/"], lex.shift)
|
||||
assert_empty(lex)
|
||||
end
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user