[ruby/prism] Refactor serializer
https://github.com/ruby/prism/commit/8ab2532f09
This commit is contained in:
parent
713f31872a
commit
da93c9ae29
@ -63,7 +63,7 @@ module Prism
|
||||
#
|
||||
# Load the serialized AST using the source as a reference into a tree.
|
||||
def self.load(source, serialized, freeze = false)
|
||||
Serialize.load(source, serialized, freeze)
|
||||
Serialize.load_parse(source, serialized, freeze)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -15,7 +15,8 @@ module Prism
|
||||
# must align with the build shared library from make/rake.
|
||||
libprism_in_build = File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
|
||||
libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}"
|
||||
if File.exist? libprism_in_build
|
||||
|
||||
if File.exist?(libprism_in_build)
|
||||
INCLUDE_DIR = File.expand_path("../../include", __dir__)
|
||||
ffi_lib libprism_in_build
|
||||
else
|
||||
@ -363,86 +364,28 @@ module Prism
|
||||
end
|
||||
|
||||
def lex_common(string, code, options) # :nodoc:
|
||||
serialized =
|
||||
LibRubyParser::PrismBuffer.with do |buffer|
|
||||
LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
|
||||
buffer.read
|
||||
end
|
||||
|
||||
freeze = options.fetch(:freeze, false)
|
||||
source = Source.for(code)
|
||||
result = Serialize.load_tokens(source, serialized, freeze)
|
||||
|
||||
if freeze
|
||||
source.source.freeze
|
||||
source.offsets.freeze
|
||||
source.freeze
|
||||
LibRubyParser::PrismBuffer.with do |buffer|
|
||||
LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
|
||||
Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false))
|
||||
end
|
||||
|
||||
result
|
||||
end
|
||||
|
||||
def parse_common(string, code, options) # :nodoc:
|
||||
serialized = dump_common(string, options)
|
||||
Prism.load(code, serialized, options.fetch(:freeze, false))
|
||||
Serialize.load_parse(code, serialized, options.fetch(:freeze, false))
|
||||
end
|
||||
|
||||
def parse_comments_common(string, code, options) # :nodoc:
|
||||
LibRubyParser::PrismBuffer.with do |buffer|
|
||||
LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options))
|
||||
|
||||
source = Source.for(code)
|
||||
loader = Serialize::Loader.new(source, buffer.read)
|
||||
|
||||
loader.load_header
|
||||
loader.load_encoding
|
||||
loader.load_start_line
|
||||
|
||||
if (freeze = options.fetch(:freeze, false))
|
||||
source.source.freeze
|
||||
source.offsets.freeze
|
||||
source.freeze
|
||||
end
|
||||
|
||||
loader.load_comments(freeze)
|
||||
Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false))
|
||||
end
|
||||
end
|
||||
|
||||
def parse_lex_common(string, code, options) # :nodoc:
|
||||
LibRubyParser::PrismBuffer.with do |buffer|
|
||||
LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
|
||||
|
||||
source = Source.for(code)
|
||||
loader = Serialize::Loader.new(source, buffer.read)
|
||||
freeze = options.fetch(:freeze, false)
|
||||
|
||||
tokens = loader.load_tokens(false)
|
||||
node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes(freeze)
|
||||
|
||||
tokens.each do |token,|
|
||||
token.value.force_encoding(loader.encoding)
|
||||
|
||||
if freeze
|
||||
token.value.freeze
|
||||
token.location.freeze
|
||||
token.freeze
|
||||
end
|
||||
end
|
||||
|
||||
value = [node, tokens]
|
||||
result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source)
|
||||
|
||||
if freeze
|
||||
source.source.freeze
|
||||
source.offsets.freeze
|
||||
source.freeze
|
||||
tokens.each(&:freeze)
|
||||
tokens.freeze
|
||||
value.freeze
|
||||
result.freeze
|
||||
end
|
||||
|
||||
result
|
||||
Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -48,6 +48,16 @@ module Prism
|
||||
@offsets = offsets # set after parsing is done
|
||||
end
|
||||
|
||||
# Replace the value of start_line with the given value.
|
||||
def replace_start_line(start_line)
|
||||
@start_line = start_line
|
||||
end
|
||||
|
||||
# Replace the value of offsets with the given value.
|
||||
def replace_offsets(offsets)
|
||||
@offsets.replace(offsets)
|
||||
end
|
||||
|
||||
# Returns the encoding of the source code, which is set by parameters to the
|
||||
# parser or by the encoding magic comment.
|
||||
def encoding
|
||||
@ -132,6 +142,13 @@ module Prism
|
||||
code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
|
||||
end
|
||||
|
||||
# Freeze this object and the objects it contains.
|
||||
def deep_freeze
|
||||
source.freeze
|
||||
offsets.freeze
|
||||
freeze
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Binary search through the offsets to find the line number for the given
|
||||
@ -854,5 +871,12 @@ module Prism
|
||||
location
|
||||
super
|
||||
end
|
||||
|
||||
# Freeze this object and the objects it contains.
|
||||
def deep_freeze
|
||||
value.freeze
|
||||
location.freeze
|
||||
freeze
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -387,29 +387,55 @@ dump_file(int argc, VALUE *argv, VALUE self) {
|
||||
/* Extracting values for the parse result */
|
||||
/******************************************************************************/
|
||||
|
||||
/**
|
||||
* The same as rb_class_new_instance, but accepts an additional boolean to
|
||||
* indicate whether or not the resulting class instance should be frozen.
|
||||
*/
|
||||
static inline VALUE
|
||||
rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool freeze) {
|
||||
VALUE value = rb_class_new_instance(argc, argv, klass);
|
||||
if (freeze) rb_obj_freeze(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Location instance from the given parser and bounds.
|
||||
*/
|
||||
static inline VALUE
|
||||
parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) {
|
||||
VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) };
|
||||
return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Location instance from the given parser and location.
|
||||
*/
|
||||
#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \
|
||||
parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start))
|
||||
|
||||
/**
|
||||
* Build a new Comment instance from the given parser and comment.
|
||||
*/
|
||||
static inline VALUE
|
||||
parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) {
|
||||
VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) };
|
||||
VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
|
||||
return rb_class_new_instance_freeze(1, argv, type, freeze);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the comments out of the parser into an array.
|
||||
*/
|
||||
static VALUE
|
||||
parser_comments(pm_parser_t *parser, VALUE source, bool freeze) {
|
||||
parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
|
||||
VALUE comments = rb_ary_new_capa(parser->comment_list.size);
|
||||
|
||||
for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
|
||||
VALUE location_argv[] = {
|
||||
source,
|
||||
LONG2FIX(comment->location.start - parser->start),
|
||||
LONG2FIX(comment->location.end - comment->location.start)
|
||||
};
|
||||
|
||||
VALUE location = rb_class_new_instance(3, location_argv, rb_cPrismLocation);
|
||||
if (freeze) rb_obj_freeze(location);
|
||||
|
||||
VALUE comment_argv[] = { location };
|
||||
VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
|
||||
|
||||
VALUE value = rb_class_new_instance(1, comment_argv, type);
|
||||
if (freeze) rb_obj_freeze(value);
|
||||
|
||||
for (
|
||||
const pm_comment_t *comment = (const pm_comment_t *) parser->comment_list.head;
|
||||
comment != NULL;
|
||||
comment = (const pm_comment_t *) comment->node.next
|
||||
) {
|
||||
VALUE value = parser_comment(parser, source, freeze, comment);
|
||||
rb_ary_push(comments, value);
|
||||
}
|
||||
|
||||
@ -417,36 +443,30 @@ parser_comments(pm_parser_t *parser, VALUE source, bool freeze) {
|
||||
return comments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a new MagicComment instance from the given parser and magic comment.
|
||||
*/
|
||||
static inline VALUE
|
||||
parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
|
||||
VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length);
|
||||
VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length);
|
||||
VALUE argv[] = { key_loc, value_loc };
|
||||
return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the magic comments out of the parser into an array.
|
||||
*/
|
||||
static VALUE
|
||||
parser_magic_comments(pm_parser_t *parser, VALUE source, bool freeze) {
|
||||
parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
|
||||
VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size);
|
||||
|
||||
for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
|
||||
VALUE key_loc_argv[] = {
|
||||
source,
|
||||
LONG2FIX(magic_comment->key_start - parser->start),
|
||||
LONG2FIX(magic_comment->key_length)
|
||||
};
|
||||
|
||||
VALUE key_loc = rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation);
|
||||
if (freeze) rb_obj_freeze(key_loc);
|
||||
|
||||
VALUE value_loc_argv[] = {
|
||||
source,
|
||||
LONG2FIX(magic_comment->value_start - parser->start),
|
||||
LONG2FIX(magic_comment->value_length)
|
||||
};
|
||||
|
||||
VALUE value_loc = rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation);
|
||||
if (freeze) rb_obj_freeze(value_loc);
|
||||
|
||||
VALUE magic_comment_argv[] = { key_loc, value_loc };
|
||||
VALUE value = rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment);
|
||||
if (freeze) rb_obj_freeze(value);
|
||||
|
||||
for (
|
||||
const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) parser->magic_comment_list.head;
|
||||
magic_comment != NULL;
|
||||
magic_comment = (const pm_magic_comment_t *) magic_comment->node.next
|
||||
) {
|
||||
VALUE value = parser_magic_comment(parser, source, freeze, magic_comment);
|
||||
rb_ary_push(magic_comments, value);
|
||||
}
|
||||
|
||||
@ -463,16 +483,7 @@ parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
|
||||
if (parser->data_loc.end == NULL) {
|
||||
return Qnil;
|
||||
} else {
|
||||
VALUE argv[] = {
|
||||
source,
|
||||
LONG2FIX(parser->data_loc.start - parser->start),
|
||||
LONG2FIX(parser->data_loc.end - parser->data_loc.start)
|
||||
};
|
||||
|
||||
VALUE location = rb_class_new_instance(3, argv, rb_cPrismLocation);
|
||||
if (freeze) rb_obj_freeze(location);
|
||||
|
||||
return location;
|
||||
return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc);
|
||||
}
|
||||
}
|
||||
|
||||
@ -480,19 +491,17 @@ parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
|
||||
* Extract the errors out of the parser into an array.
|
||||
*/
|
||||
static VALUE
|
||||
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
|
||||
parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
|
||||
VALUE errors = rb_ary_new_capa(parser->error_list.size);
|
||||
pm_diagnostic_t *error;
|
||||
|
||||
for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
||||
VALUE location_argv[] = {
|
||||
source,
|
||||
LONG2FIX(error->location.start - parser->start),
|
||||
LONG2FIX(error->location.end - error->location.start)
|
||||
};
|
||||
|
||||
VALUE location = rb_class_new_instance(3, location_argv, rb_cPrismLocation);
|
||||
if (freeze) rb_obj_freeze(location);
|
||||
for (
|
||||
const pm_diagnostic_t *error = (const pm_diagnostic_t *) parser->error_list.head;
|
||||
error != NULL;
|
||||
error = (const pm_diagnostic_t *) error->node.next
|
||||
) {
|
||||
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
|
||||
VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding));
|
||||
VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location);
|
||||
|
||||
VALUE level = Qnil;
|
||||
switch (error->level) {
|
||||
@ -509,15 +518,8 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool fre
|
||||
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
|
||||
}
|
||||
|
||||
VALUE message = rb_enc_str_new_cstr(error->message, encoding);
|
||||
if (freeze) rb_obj_freeze(message);
|
||||
|
||||
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
|
||||
VALUE error_argv[] = { type, message, location, level };
|
||||
|
||||
VALUE value = rb_class_new_instance(4, error_argv, rb_cPrismParseError);
|
||||
if (freeze) rb_obj_freeze(value);
|
||||
|
||||
VALUE argv[] = { type, message, location, level };
|
||||
VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, freeze);
|
||||
rb_ary_push(errors, value);
|
||||
}
|
||||
|
||||
@ -529,19 +531,17 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool fre
|
||||
* Extract the warnings out of the parser into an array.
|
||||
*/
|
||||
static VALUE
|
||||
parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
|
||||
parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
|
||||
VALUE warnings = rb_ary_new_capa(parser->warning_list.size);
|
||||
pm_diagnostic_t *warning;
|
||||
|
||||
for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
|
||||
VALUE location_argv[] = {
|
||||
source,
|
||||
LONG2FIX(warning->location.start - parser->start),
|
||||
LONG2FIX(warning->location.end - warning->location.start)
|
||||
};
|
||||
|
||||
VALUE location = rb_class_new_instance(3, location_argv, rb_cPrismLocation);
|
||||
if (freeze) rb_obj_freeze(location);
|
||||
for (
|
||||
const pm_diagnostic_t *warning = (const pm_diagnostic_t *) parser->warning_list.head;
|
||||
warning != NULL;
|
||||
warning = (const pm_diagnostic_t *) warning->node.next
|
||||
) {
|
||||
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
|
||||
VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding));
|
||||
VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location);
|
||||
|
||||
VALUE level = Qnil;
|
||||
switch (warning->level) {
|
||||
@ -555,15 +555,8 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool f
|
||||
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
|
||||
}
|
||||
|
||||
VALUE message = rb_enc_str_new_cstr(warning->message, encoding);
|
||||
if (freeze) rb_obj_freeze(message);
|
||||
|
||||
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
|
||||
VALUE warning_argv[] = { type, message, location, level };
|
||||
|
||||
VALUE value = rb_class_new_instance(4, warning_argv, rb_cPrismParseWarning);
|
||||
if (freeze) rb_obj_freeze(value);
|
||||
|
||||
VALUE argv[] = { type, message, location, level };
|
||||
VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, freeze);
|
||||
rb_ary_push(warnings, value);
|
||||
}
|
||||
|
||||
@ -575,7 +568,7 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool f
|
||||
* Create a new parse result from the given parser, value, encoding, and source.
|
||||
*/
|
||||
static VALUE
|
||||
parse_result_create(VALUE class, pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source, bool freeze) {
|
||||
parse_result_create(VALUE class, const pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source, bool freeze) {
|
||||
VALUE result_argv[] = {
|
||||
value,
|
||||
parser_comments(parser, source, freeze),
|
||||
@ -586,10 +579,7 @@ parse_result_create(VALUE class, pm_parser_t *parser, VALUE value, rb_encoding *
|
||||
source
|
||||
};
|
||||
|
||||
VALUE result = rb_class_new_instance(7, result_argv, class);
|
||||
if (freeze) rb_obj_freeze(result);
|
||||
|
||||
return result;
|
||||
return rb_class_new_instance_freeze(7, result_argv, class, freeze);
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
@ -16,15 +16,41 @@ module Prism
|
||||
# strings.
|
||||
PATCH_VERSION = 0
|
||||
|
||||
# Deserialize the AST represented by the given string into a parse result.
|
||||
def self.load(input, serialized, freeze)
|
||||
# Deserialize the dumped output from a request to parse or parse_file.
|
||||
#
|
||||
# The formatting of the source of this method is purposeful to illustrate
|
||||
# the structure of the serialized data.
|
||||
def self.load_parse(input, serialized, freeze)
|
||||
input = input.dup
|
||||
source = Source.for(input)
|
||||
|
||||
loader = Loader.new(source, serialized)
|
||||
result = loader.load_result(freeze)
|
||||
|
||||
input.force_encoding(loader.encoding)
|
||||
loader.load_header
|
||||
encoding = loader.load_encoding
|
||||
start_line = loader.load_varsint
|
||||
offsets = loader.load_line_offsets(freeze)
|
||||
|
||||
source.replace_start_line(start_line)
|
||||
source.replace_offsets(offsets)
|
||||
|
||||
comments = loader.load_comments(freeze)
|
||||
magic_comments = loader.load_magic_comments(freeze)
|
||||
data_loc = loader.load_optional_location_object(freeze)
|
||||
errors = loader.load_errors(encoding, freeze)
|
||||
warnings = loader.load_warnings(encoding, freeze)
|
||||
cpool_base = loader.load_uint32
|
||||
cpool_size = loader.load_varuint
|
||||
|
||||
constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
|
||||
|
||||
node = loader.load_node(constant_pool, encoding, freeze)
|
||||
loader.load_constant_pool(constant_pool)
|
||||
raise unless loader.eof?
|
||||
|
||||
result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, source)
|
||||
result.freeze if freeze
|
||||
|
||||
input.force_encoding(encoding)
|
||||
|
||||
# This is an extremely niche use-case where the file was marked as binary
|
||||
# but it contained UTF-8-encoded characters. In that case we will actually
|
||||
@ -37,94 +63,231 @@ module Prism
|
||||
|
||||
if freeze
|
||||
input.freeze
|
||||
source.source.freeze
|
||||
source.offsets.freeze
|
||||
source.freeze
|
||||
source.deep_freeze
|
||||
end
|
||||
|
||||
result
|
||||
end
|
||||
|
||||
# Deserialize the tokens represented by the given string into a parse
|
||||
# result.
|
||||
def self.load_tokens(source, serialized, freeze)
|
||||
Loader.new(source, serialized).load_tokens_result(freeze)
|
||||
# Deserialize the dumped output from a request to lex or lex_file.
|
||||
#
|
||||
# The formatting of the source of this method is purposeful to illustrate
|
||||
# the structure of the serialized data.
|
||||
def self.load_lex(input, serialized, freeze)
|
||||
source = Source.for(input)
|
||||
loader = Loader.new(source, serialized)
|
||||
|
||||
tokens = loader.load_tokens
|
||||
encoding = loader.load_encoding
|
||||
start_line = loader.load_varsint
|
||||
offsets = loader.load_line_offsets(freeze)
|
||||
|
||||
source.replace_start_line(start_line)
|
||||
source.replace_offsets(offsets)
|
||||
|
||||
comments = loader.load_comments(freeze)
|
||||
magic_comments = loader.load_magic_comments(freeze)
|
||||
data_loc = loader.load_optional_location_object(freeze)
|
||||
errors = loader.load_errors(encoding, freeze)
|
||||
warnings = loader.load_warnings(encoding, freeze)
|
||||
raise unless loader.eof?
|
||||
|
||||
result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, source)
|
||||
|
||||
tokens.each do |token|
|
||||
token[0].value.force_encoding(encoding)
|
||||
|
||||
if freeze
|
||||
token[0].deep_freeze
|
||||
token.freeze
|
||||
end
|
||||
end
|
||||
|
||||
if freeze
|
||||
source.deep_freeze
|
||||
tokens.freeze
|
||||
result.freeze
|
||||
end
|
||||
|
||||
result
|
||||
end
|
||||
|
||||
# Deserialize the dumped output from a request to parse_comments or
|
||||
# parse_file_comments.
|
||||
#
|
||||
# The formatting of the source of this method is purposeful to illustrate
|
||||
# the structure of the serialized data.
|
||||
def self.load_parse_comments(input, serialized, freeze)
|
||||
source = Source.for(input)
|
||||
loader = Loader.new(source, serialized)
|
||||
|
||||
loader.load_header
|
||||
loader.load_encoding
|
||||
start_line = loader.load_varsint
|
||||
|
||||
source.replace_start_line(start_line)
|
||||
|
||||
result = loader.load_comments(freeze)
|
||||
raise unless loader.eof?
|
||||
|
||||
source.deep_freeze if freeze
|
||||
result
|
||||
end
|
||||
|
||||
# Deserialize the dumped output from a request to parse_lex or
|
||||
# parse_lex_file.
|
||||
#
|
||||
# The formatting of the source of this method is purposeful to illustrate
|
||||
# the structure of the serialized data.
|
||||
def self.load_parse_lex(input, serialized, freeze)
|
||||
source = Source.for(input)
|
||||
loader = Loader.new(source, serialized)
|
||||
|
||||
tokens = loader.load_tokens
|
||||
loader.load_header
|
||||
encoding = loader.load_encoding
|
||||
start_line = loader.load_varsint
|
||||
offsets = loader.load_line_offsets(freeze)
|
||||
|
||||
source.replace_start_line(start_line)
|
||||
source.replace_offsets(offsets)
|
||||
|
||||
comments = loader.load_comments(freeze)
|
||||
magic_comments = loader.load_magic_comments(freeze)
|
||||
data_loc = loader.load_optional_location_object(freeze)
|
||||
errors = loader.load_errors(encoding, freeze)
|
||||
warnings = loader.load_warnings(encoding, freeze)
|
||||
cpool_base = loader.load_uint32
|
||||
cpool_size = loader.load_varuint
|
||||
|
||||
constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
|
||||
|
||||
node = loader.load_node(constant_pool, encoding, freeze)
|
||||
loader.load_constant_pool(constant_pool)
|
||||
raise unless loader.eof?
|
||||
|
||||
value = [node, tokens]
|
||||
result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source)
|
||||
|
||||
tokens.each do |token|
|
||||
token[0].value.force_encoding(encoding)
|
||||
|
||||
if freeze
|
||||
token[0].deep_freeze
|
||||
token.freeze
|
||||
end
|
||||
end
|
||||
|
||||
if freeze
|
||||
source.deep_freeze
|
||||
tokens.freeze
|
||||
value.freeze
|
||||
result.freeze
|
||||
end
|
||||
|
||||
result
|
||||
end
|
||||
|
||||
class ConstantPool # :nodoc:
|
||||
attr_reader :size
|
||||
|
||||
def initialize(input, serialized, base, size)
|
||||
@input = input
|
||||
@serialized = serialized
|
||||
@base = base
|
||||
@size = size
|
||||
@pool = Array.new(size, nil)
|
||||
end
|
||||
|
||||
def get(index, encoding)
|
||||
@pool[index] ||=
|
||||
begin
|
||||
offset = @base + index * 8
|
||||
start = @serialized.unpack1("L", offset: offset)
|
||||
length = @serialized.unpack1("L", offset: offset + 4)
|
||||
|
||||
if start.nobits?(1 << 31)
|
||||
@input.byteslice(start, length).force_encoding(encoding).to_sym
|
||||
else
|
||||
@serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(encoding).to_sym
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if RUBY_ENGINE == "truffleruby"
|
||||
# StringIO is synchronized and that adds a high overhead on TruffleRuby.
|
||||
class FastStringIO # :nodoc:
|
||||
attr_accessor :pos
|
||||
|
||||
def initialize(string)
|
||||
@string = string
|
||||
@pos = 0
|
||||
end
|
||||
|
||||
def getbyte
|
||||
byte = @string.getbyte(@pos)
|
||||
@pos += 1
|
||||
byte
|
||||
end
|
||||
|
||||
def read(n)
|
||||
slice = @string.byteslice(@pos, n)
|
||||
@pos += n
|
||||
slice
|
||||
end
|
||||
|
||||
def eof?
|
||||
@pos >= @string.bytesize
|
||||
end
|
||||
end
|
||||
else
|
||||
FastStringIO = ::StringIO # :nodoc:
|
||||
end
|
||||
|
||||
class Loader # :nodoc:
|
||||
if RUBY_ENGINE == "truffleruby"
|
||||
# StringIO is synchronized and that adds a high overhead on TruffleRuby.
|
||||
class FastStringIO # :nodoc:
|
||||
attr_accessor :pos
|
||||
|
||||
def initialize(string)
|
||||
@string = string
|
||||
@pos = 0
|
||||
end
|
||||
|
||||
def getbyte
|
||||
byte = @string.getbyte(@pos)
|
||||
@pos += 1
|
||||
byte
|
||||
end
|
||||
|
||||
def read(n)
|
||||
slice = @string.byteslice(@pos, n)
|
||||
@pos += n
|
||||
slice
|
||||
end
|
||||
|
||||
def eof?
|
||||
@pos >= @string.bytesize
|
||||
end
|
||||
end
|
||||
else
|
||||
FastStringIO = ::StringIO
|
||||
end
|
||||
private_constant :FastStringIO
|
||||
|
||||
attr_reader :encoding, :input, :serialized, :io
|
||||
attr_reader :constant_pool_offset, :constant_pool, :source
|
||||
attr_reader :start_line
|
||||
attr_reader :input, :io, :source
|
||||
|
||||
def initialize(source, serialized)
|
||||
@encoding = Encoding::UTF_8
|
||||
|
||||
@input = source.source.dup
|
||||
raise unless serialized.encoding == Encoding::BINARY
|
||||
@serialized = serialized
|
||||
@io = FastStringIO.new(serialized)
|
||||
|
||||
@constant_pool_offset = nil
|
||||
@constant_pool = nil
|
||||
|
||||
@source = source
|
||||
define_load_node_lambdas unless RUBY_ENGINE == "ruby"
|
||||
define_load_node_lambdas if RUBY_ENGINE != "ruby"
|
||||
end
|
||||
|
||||
def eof?
|
||||
io.getbyte
|
||||
io.eof?
|
||||
end
|
||||
|
||||
def load_constant_pool(constant_pool)
|
||||
trailer = 0
|
||||
|
||||
constant_pool.size.times do |index|
|
||||
start, length = io.read(8).unpack("L2")
|
||||
trailer += length if start.anybits?(1 << 31)
|
||||
end
|
||||
|
||||
io.read(trailer)
|
||||
end
|
||||
|
||||
def load_header
|
||||
raise "Invalid serialization" if io.read(5) != "PRISM"
|
||||
raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
|
||||
only_semantic_fields = io.getbyte
|
||||
unless only_semantic_fields == 0
|
||||
raise "Invalid serialization (location fields must be included but are not)"
|
||||
end
|
||||
raise "Invalid serialization (location fields must be included but are not)" if io.getbyte != 0
|
||||
end
|
||||
|
||||
def load_encoding
|
||||
@encoding = Encoding.find(io.read(load_varuint))
|
||||
@input = input.force_encoding(@encoding).freeze
|
||||
@encoding
|
||||
end
|
||||
|
||||
def load_start_line
|
||||
source.instance_variable_set(:@start_line, load_varsint)
|
||||
encoding = Encoding.find(io.read(load_varuint))
|
||||
@input = input.force_encoding(encoding).freeze
|
||||
encoding
|
||||
end
|
||||
|
||||
def load_line_offsets(freeze)
|
||||
offsets = Array.new(load_varuint) { load_varuint }
|
||||
offsets.freeze if freeze
|
||||
source.instance_variable_set(:@offsets, offsets)
|
||||
offsets
|
||||
end
|
||||
|
||||
def load_comments(freeze)
|
||||
@ -187,13 +350,13 @@ module Prism
|
||||
end
|
||||
end
|
||||
|
||||
def load_errors(freeze)
|
||||
def load_errors(encoding, freeze)
|
||||
errors =
|
||||
Array.new(load_varuint) do
|
||||
error =
|
||||
ParseError.new(
|
||||
DIAGNOSTIC_TYPES.fetch(load_varuint),
|
||||
load_embedded_string,
|
||||
load_embedded_string(encoding),
|
||||
load_location_object(freeze),
|
||||
load_error_level
|
||||
)
|
||||
@ -219,13 +382,13 @@ module Prism
|
||||
end
|
||||
end
|
||||
|
||||
def load_warnings(freeze)
|
||||
def load_warnings(encoding, freeze)
|
||||
warnings =
|
||||
Array.new(load_varuint) do
|
||||
warning =
|
||||
ParseWarning.new(
|
||||
DIAGNOSTIC_TYPES.fetch(load_varuint),
|
||||
load_embedded_string,
|
||||
load_embedded_string(encoding),
|
||||
load_location_object(freeze),
|
||||
load_warning_level
|
||||
)
|
||||
@ -238,17 +401,7 @@ module Prism
|
||||
warnings
|
||||
end
|
||||
|
||||
def load_metadata(freeze)
|
||||
[
|
||||
load_comments(freeze),
|
||||
load_magic_comments(freeze),
|
||||
load_optional_location_object(freeze),
|
||||
load_errors(freeze),
|
||||
load_warnings(freeze)
|
||||
]
|
||||
end
|
||||
|
||||
def load_tokens(freeze)
|
||||
def load_tokens
|
||||
tokens = []
|
||||
|
||||
while (type = TOKEN_TYPES.fetch(load_varuint))
|
||||
@ -257,74 +410,14 @@ module Prism
|
||||
lex_state = load_varuint
|
||||
|
||||
location = Location.new(@source, start, length)
|
||||
location.freeze if freeze
|
||||
|
||||
slice = location.slice
|
||||
slice.freeze if freeze
|
||||
|
||||
token = Token.new(@source, type, slice, location)
|
||||
token.freeze if freeze
|
||||
token = Token.new(@source, type, location.slice, location)
|
||||
|
||||
tokens << [token, lex_state]
|
||||
end
|
||||
|
||||
tokens.freeze if freeze
|
||||
tokens
|
||||
end
|
||||
|
||||
def load_tokens_result(freeze)
|
||||
tokens = load_tokens(false)
|
||||
encoding = load_encoding
|
||||
load_start_line
|
||||
load_line_offsets(freeze)
|
||||
comments, magic_comments, data_loc, errors, warnings = load_metadata(freeze)
|
||||
|
||||
tokens.each do |token,|
|
||||
token.value.force_encoding(encoding)
|
||||
|
||||
if freeze
|
||||
token.value.freeze
|
||||
token.location.freeze
|
||||
token.freeze
|
||||
end
|
||||
end
|
||||
|
||||
raise "Expected to consume all bytes while deserializing" unless @io.eof?
|
||||
result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
|
||||
|
||||
if freeze
|
||||
tokens.each(&:freeze)
|
||||
tokens.freeze
|
||||
result.freeze
|
||||
end
|
||||
|
||||
result
|
||||
end
|
||||
|
||||
def load_nodes(freeze)
|
||||
load_header
|
||||
load_encoding
|
||||
load_start_line
|
||||
load_line_offsets(freeze)
|
||||
|
||||
comments, magic_comments, data_loc, errors, warnings = load_metadata(freeze)
|
||||
|
||||
@constant_pool_offset = load_uint32
|
||||
@constant_pool = Array.new(load_varuint, nil)
|
||||
|
||||
[load_node(freeze), comments, magic_comments, data_loc, errors, warnings]
|
||||
end
|
||||
|
||||
def load_result(freeze)
|
||||
node, comments, magic_comments, data_loc, errors, warnings = load_nodes(freeze)
|
||||
result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
|
||||
|
||||
result.freeze if freeze
|
||||
result
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# variable-length integer using https://en.wikipedia.org/wiki/LEB128
|
||||
# This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints
|
||||
def load_varuint
|
||||
@ -365,23 +458,23 @@ module Prism
|
||||
io.read(4).unpack1("L")
|
||||
end
|
||||
|
||||
def load_optional_node(freeze)
|
||||
def load_optional_node(constant_pool, encoding, freeze)
|
||||
if io.getbyte != 0
|
||||
io.pos -= 1
|
||||
load_node(freeze)
|
||||
load_node(constant_pool, encoding, freeze)
|
||||
end
|
||||
end
|
||||
|
||||
def load_embedded_string
|
||||
def load_embedded_string(encoding)
|
||||
io.read(load_varuint).force_encoding(encoding).freeze
|
||||
end
|
||||
|
||||
def load_string
|
||||
def load_string(encoding)
|
||||
case (type = io.getbyte)
|
||||
when 1
|
||||
input.byteslice(load_varuint, load_varuint).force_encoding(encoding).freeze
|
||||
when 2
|
||||
load_embedded_string
|
||||
load_embedded_string(encoding)
|
||||
else
|
||||
raise "Unknown serialized string type: #{type}"
|
||||
end
|
||||
@ -406,38 +499,18 @@ module Prism
|
||||
load_location_object(freeze) if io.getbyte != 0
|
||||
end
|
||||
|
||||
def load_constant(index)
|
||||
constant = constant_pool[index]
|
||||
|
||||
unless constant
|
||||
offset = constant_pool_offset + index * 8
|
||||
start = @serialized.unpack1("L", offset: offset)
|
||||
length = @serialized.unpack1("L", offset: offset + 4)
|
||||
|
||||
constant =
|
||||
if start.nobits?(1 << 31)
|
||||
input.byteslice(start, length).force_encoding(@encoding).to_sym
|
||||
else
|
||||
@serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(@encoding).to_sym
|
||||
end
|
||||
|
||||
constant_pool[index] = constant
|
||||
end
|
||||
|
||||
constant
|
||||
end
|
||||
|
||||
def load_required_constant
|
||||
load_constant(load_varuint - 1)
|
||||
end
|
||||
|
||||
def load_optional_constant
|
||||
def load_constant(constant_pool, encoding)
|
||||
index = load_varuint
|
||||
load_constant(index - 1) if index != 0
|
||||
constant_pool.get(index - 1, encoding)
|
||||
end
|
||||
|
||||
def load_optional_constant(constant_pool, encoding)
|
||||
index = load_varuint
|
||||
constant_pool.get(index - 1, encoding) if index != 0
|
||||
end
|
||||
|
||||
if RUBY_ENGINE == "ruby"
|
||||
def load_node(freeze)
|
||||
def load_node(constant_pool, encoding, freeze)
|
||||
type = io.getbyte
|
||||
node_id = load_varuint
|
||||
location = load_location(freeze)
|
||||
@ -449,13 +522,13 @@ module Prism
|
||||
<%- end -%>
|
||||
<%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
|
||||
case field
|
||||
when Prism::Template::NodeField then "load_node(freeze)"
|
||||
when Prism::Template::OptionalNodeField then "load_optional_node(freeze)"
|
||||
when Prism::Template::StringField then "load_string"
|
||||
when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(freeze) }.tap { |nodes| nodes.freeze if freeze }"
|
||||
when Prism::Template::ConstantField then "load_required_constant"
|
||||
when Prism::Template::OptionalConstantField then "load_optional_constant"
|
||||
when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_required_constant }.tap { |constants| constants.freeze if freeze }"
|
||||
when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
|
||||
when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
|
||||
when Prism::Template::StringField then "load_string(encoding)"
|
||||
when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }.tap { |nodes| nodes.freeze if freeze }"
|
||||
when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
|
||||
when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
|
||||
when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze }"
|
||||
when Prism::Template::LocationField then "load_location(freeze)"
|
||||
when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
|
||||
when Prism::Template::UInt8Field then "io.getbyte"
|
||||
@ -472,16 +545,15 @@ module Prism
|
||||
value
|
||||
end
|
||||
else
|
||||
def load_node(freeze)
|
||||
type = io.getbyte
|
||||
@load_node_lambdas[type].call(freeze)
|
||||
def load_node(constant_pool, encoding, freeze)
|
||||
@load_node_lambdas[io.getbyte].call(constant_pool, encoding, freeze)
|
||||
end
|
||||
|
||||
def define_load_node_lambdas
|
||||
@load_node_lambdas = [
|
||||
nil,
|
||||
<%- nodes.each do |node| -%>
|
||||
-> (freeze) {
|
||||
-> (constant_pool, encoding, freeze) {
|
||||
node_id = load_varuint
|
||||
location = load_location(freeze)
|
||||
<%- if node.needs_serialized_length? -%>
|
||||
@ -489,13 +561,13 @@ module Prism
|
||||
<%- end -%>
|
||||
value = <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
|
||||
case field
|
||||
when Prism::Template::NodeField then "load_node(freeze)"
|
||||
when Prism::Template::OptionalNodeField then "load_optional_node(freeze)"
|
||||
when Prism::Template::StringField then "load_string"
|
||||
when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(freeze) }"
|
||||
when Prism::Template::ConstantField then "load_required_constant"
|
||||
when Prism::Template::OptionalConstantField then "load_optional_constant"
|
||||
when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_required_constant }"
|
||||
when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
|
||||
when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
|
||||
when Prism::Template::StringField then "load_string(encoding)"
|
||||
when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }"
|
||||
when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
|
||||
when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
|
||||
when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }"
|
||||
when Prism::Template::LocationField then "load_location(freeze)"
|
||||
when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
|
||||
when Prism::Template::UInt8Field then "io.getbyte"
|
||||
@ -522,6 +594,9 @@ module Prism
|
||||
<%- end -%>
|
||||
]
|
||||
|
||||
private_constant :TOKEN_TYPES
|
||||
private_constant :MAJOR_VERSION, :MINOR_VERSION, :PATCH_VERSION
|
||||
private_constant :ConstantPool, :FastStringIO, :Loader, :TOKEN_TYPES
|
||||
end
|
||||
|
||||
private_constant :Serialize
|
||||
end
|
||||
|
Loading…
x
Reference in New Issue
Block a user