From 76c77b5eca3f02db894819823d3a1ab55455754d Mon Sep 17 00:00:00 2001 From: "Thomas E. Enebo" Date: Thu, 27 Jul 2023 14:46:19 -0400 Subject: [PATCH] [ruby/yarp] WIP - Introduce contextually parsing programs vs evals This is more or less the code I used in my POC in JRuby to parse evals. Evals depend on parent variable scopes and will produce a different syntax tree. Questions: 1. How does MRI compile evals currently? I cannot find anything. 2. This passes in a char * of data. It does not encode the variables we pass in because the system calling this already knows. Is this adequate though? 3. Can I get guidance on how best to test this? https://github.com/ruby/yarp/commit/f441b6fd2c --- yarp/extension.c | 10 +++++----- yarp/yarp.c | 42 ++++++++++++++++++++++++++++++++++++------ yarp/yarp.h | 4 ++-- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/yarp/extension.c b/yarp/extension.c index 4e801b3b05..7ede50bb0f 100644 --- a/yarp/extension.c +++ b/yarp/extension.c @@ -194,7 +194,7 @@ dump_input(input_t *input, const char *filepath) { yp_parser_t parser; yp_parser_init(&parser, input->source, input->size, filepath); - yp_node_t *node = yp_parse(&parser); + yp_node_t *node = yp_parse(&parser, false); yp_serialize(&parser, node, &buffer); VALUE result = rb_str_new(buffer.value, buffer.length); @@ -378,7 +378,7 @@ lex_input(input_t *input, const char *filepath) { }; parser.lex_callback = &lex_callback; - yp_node_t *node = yp_parse(&parser); + yp_node_t *node = yp_parse(&parser, false); // Here we need to update the source range to have the correct newline // offsets. We do it here because we've already created the object and given @@ -439,7 +439,7 @@ parse_input(input_t *input, const char *filepath) { yp_parser_t parser; yp_parser_init(&parser, input->source, input->size, filepath); - yp_node_t *node = yp_parse(&parser); + yp_node_t *node = yp_parse(&parser, false); rb_encoding *encoding = rb_enc_find(parser.encoding.name); VALUE source = yp_source_new(&parser); @@ -582,7 +582,7 @@ memsize(VALUE self, VALUE string) { size_t length = RSTRING_LEN(string); yp_parser_init(&parser, RSTRING_PTR(string), length, NULL); - yp_node_t *node = yp_parse(&parser); + yp_node_t *node = yp_parse(&parser, false); yp_memsize_t memsize; yp_node_memsize(node, &memsize); @@ -608,7 +608,7 @@ profile_file(VALUE self, VALUE filepath) { yp_parser_t parser; yp_parser_init(&parser, input.source, input.size, checked); - yp_node_t *node = yp_parse(&parser); + yp_node_t *node = yp_parse(&parser, false); yp_node_destroy(&parser, node); yp_parser_free(&parser); diff --git a/yarp/yarp.c b/yarp/yarp.c index 2ae3b3ea28..50871beedf 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -12735,8 +12735,8 @@ parse_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const ch } static yp_node_t * -parse_program(yp_parser_t *parser) { - yp_parser_scope_push(parser, true); +parse_program(yp_parser_t *parser, bool eval) { + yp_parser_scope_push(parser, !eval); parser_lex(parser); yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_MAIN); @@ -12756,6 +12756,34 @@ parse_program(yp_parser_t *parser) { return (yp_node_t *) yp_program_node_create(parser, &locals, statements); } +// Assume always a valid string since it is from trusted source (Ruby impl internals). +// Format: [num_scopes, (num_vars1, (var_char1*, 0)*)*] +static void +yp_populate_eval_scopes(yp_parser_t *parser, const char *data) { + const char *p = data; + size_t number_of_scopes = (size_t) *p; + + p++; + for (size_t scope_index = 0; scope_index < number_of_scopes; scope_index++) { + size_t number_of_variables = (size_t) *p++; + + yp_parser_scope_push(parser, scope_index == 0); + + for (size_t variable_index = 0; variable_index < number_of_variables; variable_index++) { + char *eos = strchr(p, 0); + + yp_token_t lvar = (yp_token_t) { + .type = YP_TOKEN_IDENTIFIER, + .start = p, + .end = eos + }; + yp_parser_local_add_token(parser, &lvar); + + p = ++eos; + } + } +} + /******************************************************************************/ /* External functions */ /******************************************************************************/ @@ -12892,8 +12920,8 @@ yp_parser_free(yp_parser_t *parser) { // Parse the Ruby source associated with the given parser and return the tree. YP_EXPORTED_FUNCTION yp_node_t * -yp_parse(yp_parser_t *parser) { - return parse_program(parser); +yp_parse(yp_parser_t *parser, bool eval) { + return parse_program(parser, eval); } YP_EXPORTED_FUNCTION void @@ -12910,11 +12938,13 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { // Parse and serialize the AST represented by the given source to the given // buffer. YP_EXPORTED_FUNCTION void -yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) { +yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *parent_scopes) { + bool eval = parent_scopes != NULL; yp_parser_t parser; yp_parser_init(&parser, source, size, NULL); + if (eval) yp_populate_eval_scopes(&parser, parent_scopes); - yp_node_t *node = yp_parse(&parser); + yp_node_t *node = yp_parse(&parser, eval); yp_serialize(&parser, node, buffer); yp_node_destroy(&parser, node); diff --git a/yarp/yarp.h b/yarp/yarp.h index 4bbffdbb10..492038a6e2 100644 --- a/yarp/yarp.h +++ b/yarp/yarp.h @@ -51,7 +51,7 @@ YP_EXPORTED_FUNCTION void yp_parser_register_encoding_decode_callback(yp_parser_ YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser); // Parse the Ruby source associated with the given parser and return the tree. -YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser); +YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser, bool eval); // Pretty-prints the AST represented by the given node to the given buffer. YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer); @@ -61,7 +61,7 @@ YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_ // Parse and serialize the AST represented by the given source to the given // buffer. -YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer); +YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *parent_scopes); // Returns a string representation of the given token type. YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);