[ruby/yarp] WIP - Introduce contextually parsing programs vs evals

This is more or less the code I used in my POC in JRuby
to parse evals.  Evals depend on parent variable scopes
and will produce a different syntax tree.

Questions:
  1. How does MRI compile evals currently?  I cannot find anything.
  2. This passes in a char * of data.  It does not encode the
     variables we pass in because the system calling this already
     knows.  Is this adequate though?
  3. Can I get guidance on how best to test this?

https://github.com/ruby/yarp/commit/f441b6fd2c
This commit is contained in:
Thomas E. Enebo 2023-07-27 14:46:19 -04:00 committed by Takashi Kokubun
parent 0f8091947d
commit 76c77b5eca
Notes: git 2023-08-17 00:48:17 +00:00
3 changed files with 43 additions and 13 deletions

View File

@ -194,7 +194,7 @@ dump_input(input_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, input->source, input->size, filepath);
yp_node_t *node = yp_parse(&parser);
yp_node_t *node = yp_parse(&parser, false);
yp_serialize(&parser, node, &buffer);
VALUE result = rb_str_new(buffer.value, buffer.length);
@ -378,7 +378,7 @@ lex_input(input_t *input, const char *filepath) {
};
parser.lex_callback = &lex_callback;
yp_node_t *node = yp_parse(&parser);
yp_node_t *node = yp_parse(&parser, false);
// Here we need to update the source range to have the correct newline
// offsets. We do it here because we've already created the object and given
@ -439,7 +439,7 @@ parse_input(input_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, input->source, input->size, filepath);
yp_node_t *node = yp_parse(&parser);
yp_node_t *node = yp_parse(&parser, false);
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
VALUE source = yp_source_new(&parser);
@ -582,7 +582,7 @@ memsize(VALUE self, VALUE string) {
size_t length = RSTRING_LEN(string);
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
yp_node_t *node = yp_parse(&parser);
yp_node_t *node = yp_parse(&parser, false);
yp_memsize_t memsize;
yp_node_memsize(node, &memsize);
@ -608,7 +608,7 @@ profile_file(VALUE self, VALUE filepath) {
yp_parser_t parser;
yp_parser_init(&parser, input.source, input.size, checked);
yp_node_t *node = yp_parse(&parser);
yp_node_t *node = yp_parse(&parser, false);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);

View File

@ -12735,8 +12735,8 @@ parse_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const ch
}
static yp_node_t *
parse_program(yp_parser_t *parser) {
yp_parser_scope_push(parser, true);
parse_program(yp_parser_t *parser, bool eval) {
yp_parser_scope_push(parser, !eval);
parser_lex(parser);
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_MAIN);
@ -12756,6 +12756,34 @@ parse_program(yp_parser_t *parser) {
return (yp_node_t *) yp_program_node_create(parser, &locals, statements);
}
// Assume always a valid string since it is from trusted source (Ruby impl internals).
// Format: [num_scopes, (num_vars1, (var_char1*, 0)*)*]
static void
yp_populate_eval_scopes(yp_parser_t *parser, const char *data) {
const char *p = data;
size_t number_of_scopes = (size_t) *p;
p++;
for (size_t scope_index = 0; scope_index < number_of_scopes; scope_index++) {
size_t number_of_variables = (size_t) *p++;
yp_parser_scope_push(parser, scope_index == 0);
for (size_t variable_index = 0; variable_index < number_of_variables; variable_index++) {
char *eos = strchr(p, 0);
yp_token_t lvar = (yp_token_t) {
.type = YP_TOKEN_IDENTIFIER,
.start = p,
.end = eos
};
yp_parser_local_add_token(parser, &lvar);
p = ++eos;
}
}
}
/******************************************************************************/
/* External functions */
/******************************************************************************/
@ -12892,8 +12920,8 @@ yp_parser_free(yp_parser_t *parser) {
// Parse the Ruby source associated with the given parser and return the tree.
YP_EXPORTED_FUNCTION yp_node_t *
yp_parse(yp_parser_t *parser) {
return parse_program(parser);
yp_parse(yp_parser_t *parser, bool eval) {
return parse_program(parser, eval);
}
YP_EXPORTED_FUNCTION void
@ -12910,11 +12938,13 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
// Parse and serialize the AST represented by the given source to the given
// buffer.
YP_EXPORTED_FUNCTION void
yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *parent_scopes) {
bool eval = parent_scopes != NULL;
yp_parser_t parser;
yp_parser_init(&parser, source, size, NULL);
if (eval) yp_populate_eval_scopes(&parser, parent_scopes);
yp_node_t *node = yp_parse(&parser);
yp_node_t *node = yp_parse(&parser, eval);
yp_serialize(&parser, node, buffer);
yp_node_destroy(&parser, node);

View File

@ -51,7 +51,7 @@ YP_EXPORTED_FUNCTION void yp_parser_register_encoding_decode_callback(yp_parser_
YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
// Parse the Ruby source associated with the given parser and return the tree.
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser, bool eval);
// Pretty-prints the AST represented by the given node to the given buffer.
YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
@ -61,7 +61,7 @@ YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_
// Parse and serialize the AST represented by the given source to the given
// buffer.
YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer);
YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *parent_scopes);
// Returns a string representation of the given token type.
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);