[ruby/prism] Freeze AST option

To make it so that you can pass `freeze: true` to Prism parse
methods and get back a deeply-frozen AST that is Ractor-
shareable.

https://github.com/ruby/prism/commit/8e6a93b2d2
This commit is contained in:
Kevin Newton 2025-01-11 21:10:09 -05:00 committed by git
parent 1758137ead
commit 713f31872a
9 changed files with 539 additions and 192 deletions

View File

@ -59,11 +59,11 @@ module Prism
end
# :call-seq:
# Prism::load(source, serialized) -> ParseResult
# Prism::load(source, serialized, freeze) -> ParseResult
#
# Load the serialized AST using the source as a reference into a tree.
def self.load(source, serialized)
Serialize.load(source, serialized)
def self.load(source, serialized, freeze = false)
Serialize.load(source, serialized, freeze)
end
end

View File

@ -279,7 +279,7 @@ module Prism
# access to the IO object already through the closure of the lambda, we
# can pass a null pointer here and not worry.
LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options))
Prism.load(source, buffer.read)
Prism.load(source, buffer.read, options.fetch(:freeze, false))
end
end
@ -354,22 +354,37 @@ module Prism
def dump_common(string, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
buffer.read
dumped = buffer.read
dumped.freeze if options.fetch(:freeze, false)
dumped
end
end
def lex_common(string, code, options) # :nodoc:
serialized = LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
buffer.read
serialized =
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
buffer.read
end
freeze = options.fetch(:freeze, false)
source = Source.for(code)
result = Serialize.load_tokens(source, serialized, freeze)
if freeze
source.source.freeze
source.offsets.freeze
source.freeze
end
Serialize.load_tokens(Source.for(code), serialized)
result
end
def parse_common(string, code, options) # :nodoc:
serialized = dump_common(string, options)
Prism.load(code, serialized)
Prism.load(code, serialized, options.fetch(:freeze, false))
end
def parse_comments_common(string, code, options) # :nodoc:
@ -382,7 +397,14 @@ module Prism
loader.load_header
loader.load_encoding
loader.load_start_line
loader.load_comments
if (freeze = options.fetch(:freeze, false))
source.source.freeze
source.offsets.freeze
source.freeze
end
loader.load_comments(freeze)
end
end
@ -392,12 +414,35 @@ module Prism
source = Source.for(code)
loader = Serialize::Loader.new(source, buffer.read)
freeze = options.fetch(:freeze, false)
tokens = loader.load_tokens
node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes
tokens.each { |token,| token.value.force_encoding(loader.encoding) }
tokens = loader.load_tokens(false)
node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes(freeze)
ParseLexResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
tokens.each do |token,|
token.value.force_encoding(loader.encoding)
if freeze
token.value.freeze
token.location.freeze
token.freeze
end
end
value = [node, tokens]
result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source)
if freeze
source.source.freeze
source.offsets.freeze
source.freeze
tokens.each(&:freeze)
tokens.freeze
value.freeze
result.freeze
end
result
end
end
@ -482,6 +527,9 @@ module Prism
template << "C"
values << (options.fetch(:partial_script, false) ? 1 : 0)
template << "C"
values << (options.fetch(:freeze, false) ? 1 : 0)
template << "L"
if (scopes = options[:scopes])
values << scopes.length

View File

@ -30,6 +30,7 @@ VALUE rb_cPrismDebugEncoding;
ID rb_id_option_command_line;
ID rb_id_option_encoding;
ID rb_id_option_filepath;
ID rb_id_option_freeze;
ID rb_id_option_frozen_string_literal;
ID rb_id_option_line;
ID rb_id_option_main_script;
@ -180,6 +181,8 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value));
} else if (key_id == rb_id_option_partial_script) {
if (!NIL_P(value)) pm_options_partial_script_set(options, RTEST(value));
} else if (key_id == rb_id_option_freeze) {
if (!NIL_P(value)) pm_options_freeze_set(options, RTEST(value));
} else {
rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
}
@ -344,6 +347,7 @@ dump(int argc, VALUE *argv, VALUE self) {
#endif
VALUE value = dump_input(&input, &options);
if (options.freeze) rb_obj_freeze(value);
#ifdef PRISM_BUILD_DEBUG
xfree(dup);
@ -387,7 +391,7 @@ dump_file(int argc, VALUE *argv, VALUE self) {
* Extract the comments out of the parser into an array.
*/
static VALUE
parser_comments(pm_parser_t *parser, VALUE source) {
parser_comments(pm_parser_t *parser, VALUE source, bool freeze) {
VALUE comments = rb_ary_new_capa(parser->comment_list.size);
for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
@ -397,11 +401,19 @@ parser_comments(pm_parser_t *parser, VALUE source) {
LONG2FIX(comment->location.end - comment->location.start)
};
VALUE location = rb_class_new_instance(3, location_argv, rb_cPrismLocation);
if (freeze) rb_obj_freeze(location);
VALUE comment_argv[] = { location };
VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
VALUE value = rb_class_new_instance(1, comment_argv, type);
if (freeze) rb_obj_freeze(value);
rb_ary_push(comments, value);
}
if (freeze) rb_obj_freeze(comments);
return comments;
}
@ -409,7 +421,7 @@ parser_comments(pm_parser_t *parser, VALUE source) {
* Extract the magic comments out of the parser into an array.
*/
static VALUE
parser_magic_comments(pm_parser_t *parser, VALUE source) {
parser_magic_comments(pm_parser_t *parser, VALUE source, bool freeze) {
VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size);
for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
@ -419,20 +431,26 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
LONG2FIX(magic_comment->key_length)
};
VALUE key_loc = rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation);
if (freeze) rb_obj_freeze(key_loc);
VALUE value_loc_argv[] = {
source,
LONG2FIX(magic_comment->value_start - parser->start),
LONG2FIX(magic_comment->value_length)
};
VALUE magic_comment_argv[] = {
rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
};
VALUE value_loc = rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation);
if (freeze) rb_obj_freeze(value_loc);
rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
VALUE magic_comment_argv[] = { key_loc, value_loc };
VALUE value = rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment);
if (freeze) rb_obj_freeze(value);
rb_ary_push(magic_comments, value);
}
if (freeze) rb_obj_freeze(magic_comments);
return magic_comments;
}
@ -441,7 +459,7 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
* exists.
*/
static VALUE
parser_data_loc(const pm_parser_t *parser, VALUE source) {
parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
if (parser->data_loc.end == NULL) {
return Qnil;
} else {
@ -451,7 +469,10 @@ parser_data_loc(const pm_parser_t *parser, VALUE source) {
LONG2FIX(parser->data_loc.end - parser->data_loc.start)
};
return rb_class_new_instance(3, argv, rb_cPrismLocation);
VALUE location = rb_class_new_instance(3, argv, rb_cPrismLocation);
if (freeze) rb_obj_freeze(location);
return location;
}
}
@ -459,7 +480,7 @@ parser_data_loc(const pm_parser_t *parser, VALUE source) {
* Extract the errors out of the parser into an array.
*/
static VALUE
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
VALUE errors = rb_ary_new_capa(parser->error_list.size);
pm_diagnostic_t *error;
@ -470,6 +491,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
LONG2FIX(error->location.end - error->location.start)
};
VALUE location = rb_class_new_instance(3, location_argv, rb_cPrismLocation);
if (freeze) rb_obj_freeze(location);
VALUE level = Qnil;
switch (error->level) {
case PM_ERROR_LEVEL_SYNTAX:
@ -485,16 +509,19 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
}
VALUE error_argv[] = {
ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))),
rb_enc_str_new_cstr(error->message, encoding),
rb_class_new_instance(3, location_argv, rb_cPrismLocation),
level
};
VALUE message = rb_enc_str_new_cstr(error->message, encoding);
if (freeze) rb_obj_freeze(message);
rb_ary_push(errors, rb_class_new_instance(4, error_argv, rb_cPrismParseError));
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
VALUE error_argv[] = { type, message, location, level };
VALUE value = rb_class_new_instance(4, error_argv, rb_cPrismParseError);
if (freeze) rb_obj_freeze(value);
rb_ary_push(errors, value);
}
if (freeze) rb_obj_freeze(errors);
return errors;
}
@ -502,7 +529,7 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
* Extract the warnings out of the parser into an array.
*/
static VALUE
parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
VALUE warnings = rb_ary_new_capa(parser->warning_list.size);
pm_diagnostic_t *warning;
@ -513,6 +540,9 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
LONG2FIX(warning->location.end - warning->location.start)
};
VALUE location = rb_class_new_instance(3, location_argv, rb_cPrismLocation);
if (freeze) rb_obj_freeze(location);
VALUE level = Qnil;
switch (warning->level) {
case PM_WARNING_LEVEL_DEFAULT:
@ -525,16 +555,19 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
}
VALUE warning_argv[] = {
ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))),
rb_enc_str_new_cstr(warning->message, encoding),
rb_class_new_instance(3, location_argv, rb_cPrismLocation),
level
};
VALUE message = rb_enc_str_new_cstr(warning->message, encoding);
if (freeze) rb_obj_freeze(message);
rb_ary_push(warnings, rb_class_new_instance(4, warning_argv, rb_cPrismParseWarning));
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
VALUE warning_argv[] = { type, message, location, level };
VALUE value = rb_class_new_instance(4, warning_argv, rb_cPrismParseWarning);
if (freeze) rb_obj_freeze(value);
rb_ary_push(warnings, value);
}
if (freeze) rb_obj_freeze(warnings);
return warnings;
}
@ -542,18 +575,21 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
* Create a new parse result from the given parser, value, encoding, and source.
*/
static VALUE
parse_result_create(VALUE class, pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
parse_result_create(VALUE class, pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source, bool freeze) {
VALUE result_argv[] = {
value,
parser_comments(parser, source),
parser_magic_comments(parser, source),
parser_data_loc(parser, source),
parser_errors(parser, encoding, source),
parser_warnings(parser, encoding, source),
parser_comments(parser, source, freeze),
parser_magic_comments(parser, source, freeze),
parser_data_loc(parser, source, freeze),
parser_errors(parser, encoding, source, freeze),
parser_warnings(parser, encoding, source, freeze),
source
};
return rb_class_new_instance(7, result_argv, class);
VALUE result = rb_class_new_instance(7, result_argv, class);
if (freeze) rb_obj_freeze(result);
return result;
}
/******************************************************************************/
@ -569,6 +605,7 @@ typedef struct {
VALUE source;
VALUE tokens;
rb_encoding *encoding;
bool freeze;
} parse_lex_data_t;
/**
@ -580,10 +617,13 @@ static void
parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
VALUE yields = rb_assoc_new(
pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source),
INT2FIX(parser->lex_state)
);
VALUE value = pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source, parse_lex_data->freeze);
VALUE yields = rb_assoc_new(value, INT2FIX(parser->lex_state));
if (parse_lex_data->freeze) {
rb_obj_freeze(value);
rb_obj_freeze(yields);
}
rb_ary_push(parse_lex_data->tokens, yields);
}
@ -603,14 +643,37 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
// one or two tokens, since the encoding can only change at the top of the
// file.
VALUE tokens = parse_lex_data->tokens;
VALUE next_tokens = rb_ary_new();
for (long index = 0; index < RARRAY_LEN(tokens); index++) {
VALUE yields = rb_ary_entry(tokens, index);
VALUE token = rb_ary_entry(yields, 0);
VALUE value = rb_ivar_get(token, rb_intern("@value"));
rb_enc_associate(value, parse_lex_data->encoding);
ENC_CODERANGE_CLEAR(value);
VALUE next_value = rb_str_dup(value);
rb_enc_associate(next_value, parse_lex_data->encoding);
if (parse_lex_data->freeze) rb_obj_freeze(next_value);
VALUE next_token_argv[] = {
parse_lex_data->source,
rb_ivar_get(token, rb_intern("@type")),
next_value,
rb_ivar_get(token, rb_intern("@location"))
};
VALUE next_token = rb_class_new_instance(4, next_token_argv, rb_cPrismToken);
VALUE next_yields = rb_assoc_new(next_token, rb_ary_entry(yields, 1));
if (parse_lex_data->freeze) {
rb_obj_freeze(next_token);
rb_obj_freeze(next_yields);
}
rb_ary_push(next_tokens, next_yields);
}
rb_ary_replace(parse_lex_data->tokens, next_tokens);
}
/**
@ -630,7 +693,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
parse_lex_data_t parse_lex_data = {
.source = source,
.tokens = rb_ary_new(),
.encoding = rb_utf8_encoding()
.encoding = rb_utf8_encoding(),
.freeze = options->freeze,
};
parse_lex_data_t *data = &parse_lex_data;
@ -653,14 +717,22 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
}
if (options->freeze) {
rb_obj_freeze(source_string);
rb_obj_freeze(offsets);
rb_obj_freeze(source);
rb_obj_freeze(parse_lex_data.tokens);
}
VALUE result;
if (return_nodes) {
VALUE value = rb_ary_new_capa(2);
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, options->freeze));
rb_ary_push(value, parse_lex_data.tokens);
result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source);
if (options->freeze) rb_obj_freeze(value);
result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, options->freeze);
} else {
result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source);
result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze);
}
pm_node_destroy(&parser, node);
@ -726,9 +798,13 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
pm_node_t *node = pm_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
VALUE source = pm_source_new(&parser, encoding);
VALUE value = pm_ast_new(&parser, node, encoding, source);
VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source) ;
VALUE source = pm_source_new(&parser, encoding, options->freeze);
VALUE value = pm_ast_new(&parser, node, encoding, source, options->freeze);
VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options->freeze);
if (options->freeze) {
rb_obj_freeze(source);
}
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@ -750,6 +826,8 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
* encoding or nil.
* * `filepath` - the filepath of the source being parsed. This should be a
* string or nil.
* * `freeze` - whether or not to deeply freeze the AST. This should be a
* boolean or nil.
* * `frozen_string_literal` - whether or not the frozen string literal pragma
* has been set. This should be a boolean or nil.
* * `line` - the line number that the parse starts on. This should be an
@ -769,12 +847,12 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
* parsed. This should be an array of arrays of symbols or nil. Scopes are
* ordered from the outermost scope to the innermost one.
* * `version` - the version of Ruby syntax that prism should used to parse Ruby
* code. By default prism assumes you want to parse with the latest version
* of Ruby syntax (which you can trigger with `nil` or `"latest"`). You
* may also restrict the syntax to a specific version of Ruby, e.g., with `"3.3.0"`.
* To parse with the same syntax version that the current Ruby is running
* use `version: RUBY_VERSION`. Raises ArgumentError if the version is not
* currently supported by Prism.
* code. By default prism assumes you want to parse with the latest
* version of Ruby syntax (which you can trigger with `nil` or
* `"latest"`). You may also restrict the syntax to a specific version of
* Ruby, e.g., with `"3.3.0"`. To parse with the same syntax version that
* the current Ruby is running use `version: RUBY_VERSION`. Raises
* ArgumentError if the version is not currently supported by Prism.
*/
static VALUE
parse(int argc, VALUE *argv, VALUE self) {
@ -922,9 +1000,9 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
VALUE source = pm_source_new(&parser, encoding);
VALUE value = pm_ast_new(&parser, node, encoding, source);
VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source);
VALUE source = pm_source_new(&parser, encoding, options.freeze);
VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze);
VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze);
pm_node_destroy(&parser, node);
pm_buffer_free(&buffer);
@ -944,8 +1022,8 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
pm_node_t *node = pm_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
VALUE source = pm_source_new(&parser, encoding);
VALUE comments = parser_comments(&parser, source);
VALUE source = pm_source_new(&parser, encoding, options->freeze);
VALUE comments = parser_comments(&parser, source, options->freeze);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@ -1240,6 +1318,7 @@ Init_prism(void) {
rb_id_option_command_line = rb_intern_const("command_line");
rb_id_option_encoding = rb_intern_const("encoding");
rb_id_option_filepath = rb_intern_const("filepath");
rb_id_option_freeze = rb_intern_const("freeze");
rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal");
rb_id_option_line = rb_intern_const("line");
rb_id_option_main_script = rb_intern_const("main_script");

View File

@ -7,9 +7,9 @@
#include <ruby/encoding.h>
#include "prism.h"
VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding);
VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source);
VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source);
VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze);
VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze);
VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze);
VALUE pm_integer_new(const pm_integer_t *integer);
void Init_prism_api_node(void);

View File

@ -139,6 +139,14 @@ pm_options_partial_script_set(pm_options_t *options, bool partial_script) {
options->partial_script = partial_script;
}
/**
* Set the freeze option on the given options struct.
*/
PRISM_EXPORTED_FUNCTION void
pm_options_freeze_set(pm_options_t *options, bool freeze) {
options->freeze = freeze;
}
// For some reason, GCC analyzer thinks we're leaking allocated scopes and
// locals here, even though we definitely aren't. This is a false positive.
// Ideally we wouldn't need to suppress this.
@ -274,6 +282,7 @@ pm_options_read(pm_options_t *options, const char *data) {
options->encoding_locked = ((uint8_t) *data++) > 0;
options->main_script = ((uint8_t) *data++) > 0;
options->partial_script = ((uint8_t) *data++) > 0;
options->freeze = ((uint8_t) *data++) > 0;
uint32_t scopes_count = pm_options_read_u32(data);
data += 4;

View File

@ -160,6 +160,13 @@ typedef struct pm_options {
* inside another script.
*/
bool partial_script;
/**
* Whether or not the parser should freeze the nodes that it creates. This
* makes it possible to have a deeply frozen AST that is safe to share
* between concurrency primitives.
*/
bool freeze;
} pm_options_t;
/**
@ -285,6 +292,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, b
*/
PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
/**
* Set the freeze option on the given options struct.
*
* @param options The options struct to set the freeze value on.
* @param freeze The freeze value to set.
*/
PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze);
/**
* Allocate and zero out the scopes array on the given options struct.
*
@ -355,6 +370,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
* | `1` | encoding locked |
* | `1` | main script |
* | `1` | partial script |
* | `1` | freeze |
* | `4` | the number of scopes |
* | ... | the scopes |
*

View File

@ -12,24 +12,34 @@ static VALUE rb_cPrism<%= node.name %>;
<%- end -%>
static VALUE
pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start)));
return ULL2NUM(value);
pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source, bool freeze) {
if (freeze) {
VALUE location_argv[] = {
source,
LONG2FIX(start - parser->start),
LONG2FIX(end - start)
};
return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation));
} else {
uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start)));
return ULL2NUM(value);
}
}
VALUE
pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source) {
pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) {
ID type = rb_intern(pm_token_type_name(token->type));
VALUE location = pm_location_new(parser, token->start, token->end);
VALUE location = pm_location_new(parser, token->start, token->end, source, freeze);
VALUE argv[] = {
source,
ID2SYM(type),
rb_enc_str_new((const char *) token->start, token->end - token->start, encoding),
location
};
VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding);
if (freeze) rb_obj_freeze(slice);
return rb_class_new_instance(4, argv, rb_cPrismToken);
VALUE argv[] = { source, ID2SYM(type), slice, location };
VALUE value = rb_class_new_instance(4, argv, rb_cPrismToken);
if (freeze) rb_obj_freeze(value);
return value;
}
static VALUE
@ -68,7 +78,7 @@ pm_integer_new(const pm_integer_t *integer) {
// Create a Prism::Source object from the given parser, after pm_parse() was called.
VALUE
pm_source_new(const pm_parser_t *parser, rb_encoding *encoding) {
pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) {
VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
VALUE offsets = rb_ary_new_capa(parser->newline_list.size);
@ -76,7 +86,15 @@ pm_source_new(const pm_parser_t *parser, rb_encoding *encoding) {
rb_ary_push(offsets, ULONG2NUM(parser->newline_list.offsets[index]));
}
return rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets);
if (freeze) {
rb_obj_freeze(source_string);
rb_obj_freeze(offsets);
}
VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets);
if (freeze) rb_obj_freeze(source);
return source;
}
typedef struct pm_node_stack_node {
@ -106,7 +124,7 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) {
}
VALUE
pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source) {
pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) {
VALUE constants = rb_ary_new_capa(parser->constant_pool.size);
for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
@ -182,7 +200,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
argv[1] = ULONG2NUM(node->node_id);
// location
argv[2] = pm_location_new(parser, node->location.start, node->location.end);
argv[2] = pm_location_new(parser, node->location.start, node->location.end, source, freeze);
// flags
argv[3] = ULONG2NUM(node->flags);
@ -199,6 +217,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
rb_ary_push(argv[<%= index %>], rb_ary_pop(value_stack));
}
if (freeze) rb_obj_freeze(argv[<%= index %>]);
<%- when Prism::Template::StringField -%>
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = pm_string_new(&cast-><%= field.name %>, encoding);
@ -215,12 +234,13 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
assert(cast-><%= field.name %>.ids[index] != 0);
rb_ary_push(argv[<%= index %>], RARRAY_AREF(constants, cast-><%= field.name %>.ids[index] - 1));
}
if (freeze) rb_obj_freeze(argv[<%= index %>]);
<%- when Prism::Template::LocationField -%>
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end);
argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
<%- when Prism::Template::OptionalLocationField -%>
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end);
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
<%- when Prism::Template::UInt8Field -%>
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>);
@ -238,7 +258,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
<%- end -%>
<%- end -%>
rb_ary_push(value_stack, rb_class_new_instance(<%= node.fields.length + 4 %>, argv, rb_cPrism<%= node.name %>));
VALUE value = rb_class_new_instance(<%= node.fields.length + 4 %>, argv, rb_cPrism<%= node.name %>);
if (freeze) rb_obj_freeze(value);
rb_ary_push(value_stack, value);
break;
}
<%- end -%>

View File

@ -17,12 +17,12 @@ module Prism
PATCH_VERSION = 0
# Deserialize the AST represented by the given string into a parse result.
def self.load(input, serialized)
def self.load(input, serialized, freeze)
input = input.dup
source = Source.for(input)
loader = Loader.new(source, serialized)
result = loader.load_result
result = loader.load_result(freeze)
input.force_encoding(loader.encoding)
@ -35,13 +35,20 @@ module Prism
input.force_encoding(Encoding::BINARY) unless input.valid_encoding?
end
if freeze
input.freeze
source.source.freeze
source.offsets.freeze
source.freeze
end
result
end
# Deserialize the tokens represented by the given string into a parse
# result.
def self.load_tokens(source, serialized)
Loader.new(source, serialized).load_tokens_result
def self.load_tokens(source, serialized, freeze)
Loader.new(source, serialized).load_tokens_result(freeze)
end
class Loader # :nodoc:
@ -111,20 +118,47 @@ module Prism
end
def load_start_line
source.instance_variable_set :@start_line, load_varsint
source.instance_variable_set(:@start_line, load_varsint)
end
def load_line_offsets
source.instance_variable_set :@offsets, Array.new(load_varuint) { load_varuint }
def load_line_offsets(freeze)
offsets = Array.new(load_varuint) { load_varuint }
offsets.freeze if freeze
source.instance_variable_set(:@offsets, offsets)
end
def load_comments
Array.new(load_varuint) do
case load_varuint
when 0 then InlineComment.new(load_location_object)
when 1 then EmbDocComment.new(load_location_object)
def load_comments(freeze)
comments =
Array.new(load_varuint) do
comment =
case load_varuint
when 0 then InlineComment.new(load_location_object(freeze))
when 1 then EmbDocComment.new(load_location_object(freeze))
end
comment.freeze if freeze
comment
end
end
comments.freeze if freeze
comments
end
def load_magic_comments(freeze)
magic_comments =
Array.new(load_varuint) do
magic_comment =
MagicComment.new(
load_location_object(freeze),
load_location_object(freeze)
)
magic_comment.freeze if freeze
magic_comment
end
magic_comments.freeze if freeze
magic_comments
end
DIAGNOSTIC_TYPES = [
@ -138,57 +172,155 @@ module Prism
private_constant :DIAGNOSTIC_TYPES
def load_metadata
comments = load_comments
magic_comments = Array.new(load_varuint) { MagicComment.new(load_location_object, load_location_object) }
data_loc = load_optional_location_object
errors = Array.new(load_varuint) { ParseError.new(DIAGNOSTIC_TYPES.fetch(load_varuint), load_embedded_string, load_location_object, load_error_level) }
warnings = Array.new(load_varuint) { ParseWarning.new(DIAGNOSTIC_TYPES.fetch(load_varuint), load_embedded_string, load_location_object, load_warning_level) }
[comments, magic_comments, data_loc, errors, warnings]
def load_error_level
level = io.getbyte
case level
when 0
:syntax
when 1
:argument
when 2
:load
else
raise "Unknown level: #{level}"
end
end
def load_tokens
def load_errors(freeze)
errors =
Array.new(load_varuint) do
error =
ParseError.new(
DIAGNOSTIC_TYPES.fetch(load_varuint),
load_embedded_string,
load_location_object(freeze),
load_error_level
)
error.freeze if freeze
error
end
errors.freeze if freeze
errors
end
def load_warning_level
level = io.getbyte
case level
when 0
:default
when 1
:verbose
else
raise "Unknown level: #{level}"
end
end
def load_warnings(freeze)
warnings =
Array.new(load_varuint) do
warning =
ParseWarning.new(
DIAGNOSTIC_TYPES.fetch(load_varuint),
load_embedded_string,
load_location_object(freeze),
load_warning_level
)
warning.freeze if freeze
warning
end
warnings.freeze if freeze
warnings
end
def load_metadata(freeze)
[
load_comments(freeze),
load_magic_comments(freeze),
load_optional_location_object(freeze),
load_errors(freeze),
load_warnings(freeze)
]
end
def load_tokens(freeze)
tokens = []
while type = TOKEN_TYPES.fetch(load_varuint)
while (type = TOKEN_TYPES.fetch(load_varuint))
start = load_varuint
length = load_varuint
lex_state = load_varuint
location = Location.new(@source, start, length)
tokens << [Token.new(source, type, location.slice, location), lex_state]
location.freeze if freeze
slice = location.slice
slice.freeze if freeze
token = Token.new(@source, type, slice, location)
token.freeze if freeze
tokens << [token, lex_state]
end
tokens.freeze if freeze
tokens
end
def load_tokens_result
tokens = load_tokens
def load_tokens_result(freeze)
tokens = load_tokens(false)
encoding = load_encoding
load_start_line
load_line_offsets
comments, magic_comments, data_loc, errors, warnings = load_metadata
tokens.each { |token,| token.value.force_encoding(encoding) }
load_line_offsets(freeze)
comments, magic_comments, data_loc, errors, warnings = load_metadata(freeze)
tokens.each do |token,|
token.value.force_encoding(encoding)
if freeze
token.value.freeze
token.location.freeze
token.freeze
end
end
raise "Expected to consume all bytes while deserializing" unless @io.eof?
LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
if freeze
tokens.each(&:freeze)
tokens.freeze
result.freeze
end
result
end
def load_nodes
def load_nodes(freeze)
load_header
load_encoding
load_start_line
load_line_offsets
load_line_offsets(freeze)
comments, magic_comments, data_loc, errors, warnings = load_metadata
comments, magic_comments, data_loc, errors, warnings = load_metadata(freeze)
@constant_pool_offset = load_uint32
@constant_pool = Array.new(load_varuint, nil)
[load_node, comments, magic_comments, data_loc, errors, warnings]
[load_node(freeze), comments, magic_comments, data_loc, errors, warnings]
end
def load_result
node, comments, magic_comments, data_loc, errors, warnings = load_nodes
ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
def load_result(freeze)
node, comments, magic_comments, data_loc, errors, warnings = load_nodes(freeze)
result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
result.freeze if freeze
result
end
private
@ -233,10 +365,10 @@ module Prism
io.read(4).unpack1("L")
end
def load_optional_node
def load_optional_node(freeze)
if io.getbyte != 0
io.pos -= 1
load_node
load_node(freeze)
end
end
@ -245,8 +377,7 @@ module Prism
end
def load_string
type = io.getbyte
case type
case (type = io.getbyte)
when 1
input.byteslice(load_varuint, load_varuint).force_encoding(encoding).freeze
when 2
@ -256,20 +387,23 @@ module Prism
end
end
def load_location
def load_location_object(freeze)
location = Location.new(source, load_varuint, load_varuint)
location.freeze if freeze
location
end
def load_location(freeze)
return load_location_object(freeze) if freeze
(load_varuint << 32) | load_varuint
end
def load_location_object
Location.new(source, load_varuint, load_varuint)
def load_optional_location(freeze)
load_location(freeze) if io.getbyte != 0
end
def load_optional_location
load_location if io.getbyte != 0
end
def load_optional_location_object
load_location_object if io.getbyte != 0
def load_optional_location_object(freeze)
load_location_object(freeze) if io.getbyte != 0
end
def load_constant(index)
@ -302,41 +436,12 @@ module Prism
load_constant(index - 1) if index != 0
end
def load_error_level
level = io.getbyte
case level
when 0
:syntax
when 1
:argument
when 2
:load
else
raise "Unknown level: #{level}"
end
end
def load_warning_level
level = io.getbyte
case level
when 0
:default
when 1
:verbose
else
raise "Unknown level: #{level}"
end
end
if RUBY_ENGINE == "ruby"
def load_node
def load_node(freeze)
type = io.getbyte
node_id = load_varuint
location = load_location
case type
location = load_location(freeze)
value = case type
<%- nodes.each_with_index do |node, index| -%>
when <%= index + 1 %> then
<%- if node.needs_serialized_length? -%>
@ -344,15 +449,15 @@ module Prism
<%- end -%>
<%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
case field
when Prism::Template::NodeField then "load_node"
when Prism::Template::OptionalNodeField then "load_optional_node"
when Prism::Template::NodeField then "load_node(freeze)"
when Prism::Template::OptionalNodeField then "load_optional_node(freeze)"
when Prism::Template::StringField then "load_string"
when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node }"
when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(freeze) }.tap { |nodes| nodes.freeze if freeze }"
when Prism::Template::ConstantField then "load_required_constant"
when Prism::Template::OptionalConstantField then "load_optional_constant"
when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_required_constant }"
when Prism::Template::LocationField then "load_location"
when Prism::Template::OptionalLocationField then "load_optional_location"
when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_required_constant }.tap { |constants| constants.freeze if freeze }"
when Prism::Template::LocationField then "load_location(freeze)"
when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
when Prism::Template::UInt8Field then "io.getbyte"
when Prism::Template::UInt32Field then "load_varuint"
when Prism::Template::IntegerField then "load_integer"
@ -362,34 +467,37 @@ module Prism
}].join(", ") -%>)
<%- end -%>
end
value.freeze if freeze
value
end
else
def load_node
def load_node(freeze)
type = io.getbyte
@load_node_lambdas[type].call
@load_node_lambdas[type].call(freeze)
end
def define_load_node_lambdas
@load_node_lambdas = [
nil,
<%- nodes.each do |node| -%>
-> {
-> (freeze) {
node_id = load_varuint
location = load_location
location = load_location(freeze)
<%- if node.needs_serialized_length? -%>
load_uint32
<%- end -%>
<%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
value = <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
case field
when Prism::Template::NodeField then "load_node"
when Prism::Template::OptionalNodeField then "load_optional_node"
when Prism::Template::NodeField then "load_node(freeze)"
when Prism::Template::OptionalNodeField then "load_optional_node(freeze)"
when Prism::Template::StringField then "load_string"
when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node }"
when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(freeze) }"
when Prism::Template::ConstantField then "load_required_constant"
when Prism::Template::OptionalConstantField then "load_optional_constant"
when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_required_constant }"
when Prism::Template::LocationField then "load_location"
when Prism::Template::OptionalLocationField then "load_optional_location"
when Prism::Template::LocationField then "load_location(freeze)"
when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
when Prism::Template::UInt8Field then "io.getbyte"
when Prism::Template::UInt32Field then "load_varuint"
when Prism::Template::IntegerField then "load_integer"
@ -397,6 +505,8 @@ module Prism
else raise
end
}].join(", ") -%>)
value.freeze if freeze
value
},
<%- end -%>
]
@ -411,5 +521,7 @@ module Prism
<%= token.name.to_sym.inspect %>,
<%- end -%>
]
private_constant :TOKEN_TYPES
end
end

View File

@ -0,0 +1,60 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class FreezeTest < TestCase
def test_parse
assert_frozen(Prism.parse("1 + 2; %i{foo} + %i{bar}", freeze: true))
end
def test_lex
assert_frozen(Prism.lex("1 + 2; %i{foo} + %i{bar}", freeze: true))
end
def test_parse_lex
assert_frozen(Prism.parse_lex("1 + 2; %i{foo} + %i{bar}", freeze: true))
assert_frozen(Prism.parse_lex("# encoding: euc-jp\n%i{foo}", freeze: true))
end
def test_parse_comments
assert_frozen(Prism.parse_comments("# comment", freeze: true))
end
def test_parse_stream
assert_frozen(Prism.parse_stream(StringIO.new("1 + 2; %i{foo} + %i{bar}"), freeze: true))
end
if !ENV["PRISM_BUILD_MINIMAL"]
def test_dump
assert_frozen(Prism.dump("1 + 2; %i{foo} + %i{bar}", freeze: true))
end
end
private
def assert_frozen_each(value)
assert_predicate value, :frozen?
value.instance_variables.each do |name|
case (child = value.instance_variable_get(name))
when Array
child.each { |item| assert_frozen_each(item) }
when Hash
child.each { |key, item| assert_frozen_each(key); assert_frozen_each(item) }
else
assert_frozen_each(child)
end
end
end
if defined?(Ractor.shareable?)
def assert_frozen(value)
assert_frozen_each(value)
assert Ractor.shareable?(value), -> { binding.irb }
end
else
alias assert_frozen assert_frozen_each
end
end
end