[ruby/prism] Provide the ability to dump AST to JSON from C

https://github.com/ruby/prism/commit/d3a149efc5
This commit is contained in:
Kevin Newton 2024-02-16 18:29:19 -05:00 committed by git
parent 075b6ac8ae
commit b56b8ec797
6 changed files with 181 additions and 59 deletions

View File

@ -8,6 +8,7 @@
#include "prism/defines.h"
#include "prism/parser.h"
#include "prism/util/pm_buffer.h"
/**
* Append a new node onto the end of the node list.

View File

@ -188,14 +188,13 @@ const char * pm_token_type_human(pm_token_type_t token_type);
PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize);
/**
* Visit each of the nodes in this subtree using the given visitor callback.
* Dump JSON to the given buffer.
*
* @param node The node to visit.
* @param visitor The visitor callback to use. It should return `true` if the
* visitor should continue visiting nodes, and `false` if it should stop.
* @param data The optional data to pass to the visitor.
* @param buffer The buffer to serialize to.
* @param parser The parser that parsed the node.
* @param node The node to serialize.
*/
PRISM_EXPORTED_FUNCTION void pm_node_visit(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
/**
* @mainpage

View File

@ -161,42 +161,110 @@ pm_node_type_to_str(pm_node_type_t node_type)
return "";
}
static void
pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
pm_buffer_append_byte(buffer, '"');
pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
pm_buffer_append_byte(buffer, '"');
}
static void
pm_dump_json_location(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_location_t *location) {
uint32_t start = (uint32_t) (location->start - parser->start);
uint32_t end = (uint32_t) (location->end - parser->start);
pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"end\":%" PRIu32 "}", start, end);
}
/**
* Visit each of the nodes in this subtree using the given visitor callback.
* Dump JSON to the given buffer.
*/
PRISM_EXPORTED_FUNCTION void
pm_node_visit(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
if (!visitor(node, data)) return;
pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
<%- if (fields = node.fields.select { |field| field.is_a?(Prism::NodeField) || field.is_a?(Prism::OptionalNodeField) || field.is_a?(Prism::NodeListField) }).any? -%>
case <%= node.type %>: {
const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
<%- fields.each do |field| -%>
pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
// Visit the <%= field.name %> field
const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
pm_dump_json_location(buffer, parser, &cast->base.location);
<%- node.fields.each_with_index do |field, index| -%>
// Dump the <%= field.name %> field
pm_buffer_append_byte(buffer, ',');
pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
<%- case field -%>
<%- when Prism::NodeField -%>
pm_node_visit((const pm_node_t *) cast-><%= field.name %>, visitor, data);
pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
<%- when Prism::OptionalNodeField -%>
if (cast-><%= field.name %> != NULL) {
pm_node_visit((const pm_node_t *) cast-><%= field.name %>, visitor, data);
pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
} else {
pm_buffer_append_string(buffer, "null", 4);
}
<%- when Prism::NodeListField -%>
const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
pm_buffer_append_byte(buffer, '[');
for (size_t index = 0; index < <%= field.name %>->size; index++) {
pm_node_visit(<%= field.name %>->nodes[index], visitor, data);
if (index != 0) pm_buffer_append_byte(buffer, ',');
pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
}
pm_buffer_append_byte(buffer, ']');
<%- when Prism::StringField -%>
const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
pm_buffer_append_byte(buffer, '"');
pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
pm_buffer_append_byte(buffer, '"');
<%- when Prism::ConstantField -%>
pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
<%- when Prism::OptionalConstantField -%>
if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
} else {
pm_buffer_append_string(buffer, "null", 4);
}
<%- when Prism::ConstantListField -%>
const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
pm_buffer_append_byte(buffer, '[');
for (size_t index = 0; index < <%= field.name %>->size; index++) {
if (index != 0) pm_buffer_append_byte(buffer, ',');
pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
}
pm_buffer_append_byte(buffer, ']');
<%- when Prism::LocationField -%>
pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
<%- when Prism::OptionalLocationField -%>
if (cast-><%= field.name %>.start != NULL) {
pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
} else {
pm_buffer_append_string(buffer, "null", 4);
}
<%- when Prism::UInt8Field -%>
pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
<%- when Prism::UInt32Field -%>
pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
<%- when Prism::FlagsField -%>
size_t flags = 0;
pm_buffer_append_byte(buffer, '[');
<%- found = flags.find { |flag| flag.name == field.kind }.tap { |found| raise "Expected to find #{field.kind}" unless found } -%>
<%- found.values.each_with_index do |value, index| -%>
if (PM_NODE_FLAG_P(cast, PM_<%= found.human.upcase %>_<%= value.name %>)) {
if (flags != 0) pm_buffer_append_byte(buffer, ',');
pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
flags++;
}
<%- end -%>
pm_buffer_append_byte(buffer, ']');
<%- else -%>
<%- raise %>
<%- end -%>
<%- end -%>
pm_buffer_append_byte(buffer, '}');
break;
}
<%- else -%>
case <%= node.type %>:
break;
<%- end -%>
<%- end -%>
case PM_SCOPE_NODE:
break;

View File

@ -1,41 +1,6 @@
<%# encoding: ASCII -%>
#include "prism/prettyprint.h"
static void
prettyprint_source(pm_buffer_t *output_buffer, const uint8_t *source, size_t length) {
for (size_t index = 0; index < length; index++) {
const uint8_t byte = source[index];
if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
pm_buffer_append_format(output_buffer, "\\x%02X", byte);
} else {
switch (byte) {
case '\a': pm_buffer_append_string(output_buffer, "\\a", 2); break;
case '\b': pm_buffer_append_string(output_buffer, "\\b", 2); break;
case '\t': pm_buffer_append_string(output_buffer, "\\t", 2); break;
case '\n': pm_buffer_append_string(output_buffer, "\\n", 2); break;
case '\v': pm_buffer_append_string(output_buffer, "\\v", 2); break;
case '\f': pm_buffer_append_string(output_buffer, "\\f", 2); break;
case '\r': pm_buffer_append_string(output_buffer, "\\r", 2); break;
case '"': pm_buffer_append_string(output_buffer, "\\\"", 2); break;
case '#': {
if (index + 1 < length) {
const uint8_t next_byte = source[index + 1];
if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
pm_buffer_append_byte(output_buffer, '\\');
}
}
pm_buffer_append_byte(output_buffer, '#');
break;
}
case '\\': pm_buffer_append_string(output_buffer, "\\\\", 2); break;
default: pm_buffer_append_byte(output_buffer, byte); break;
}
}
}
}
static inline void
prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line);
@ -93,7 +58,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
}
<%- when Prism::StringField -%>
pm_buffer_append_string(output_buffer, " \"", 2);
prettyprint_source(output_buffer, pm_string_source(&cast-><%= field.name %>), pm_string_length(&cast-><%= field.name %>));
pm_buffer_append_source(output_buffer, pm_string_source(&cast-><%= field.name %>), pm_string_length(&cast-><%= field.name %>), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
<%- when Prism::NodeListField -%>
pm_buffer_append_format(output_buffer, " (length: %lu)\n", (unsigned long) (cast-><%= field.name %>.size));
@ -139,7 +104,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_location(output_buffer, parser, location);
pm_buffer_append_string(output_buffer, " = \"", 4);
prettyprint_source(output_buffer, location->start, (size_t) (location->end - location->start));
pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
<%- when Prism::OptionalLocationField -%>
pm_location_t *location = &cast-><%= field.name %>;
@ -149,7 +114,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_location(output_buffer, parser, location);
pm_buffer_append_string(output_buffer, " = \"", 4);
prettyprint_source(output_buffer, location->start, (size_t) (location->end - location->start));
pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
}
<%- when Prism::UInt8Field -%>

View File

@ -160,6 +160,76 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
pm_buffer_append_varuint(buffer, unsigned_int);
}
/**
* Append a slice of source code to the buffer.
*/
void
pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping) {
for (size_t index = 0; index < length; index++) {
const uint8_t byte = source[index];
if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
if (escaping == PM_BUFFER_ESCAPING_RUBY) {
pm_buffer_append_format(buffer, "\\x%02X", byte);
} else {
pm_buffer_append_format(buffer, "\\u%04X", byte);
}
} else {
switch (byte) {
case '\a':
if (escaping == PM_BUFFER_ESCAPING_RUBY) {
pm_buffer_append_string(buffer, "\\a", 2);
} else {
pm_buffer_append_format(buffer, "\\u%04X", byte);
}
break;
case '\b':
pm_buffer_append_string(buffer, "\\b", 2);
break;
case '\t':
pm_buffer_append_string(buffer, "\\t", 2);
break;
case '\n':
pm_buffer_append_string(buffer, "\\n", 2);
break;
case '\v':
if (escaping == PM_BUFFER_ESCAPING_RUBY) {
pm_buffer_append_string(buffer, "\\v", 2);
} else {
pm_buffer_append_format(buffer, "\\u%04X", byte);
}
break;
case '\f':
pm_buffer_append_string(buffer, "\\f", 2);
break;
case '\r':
pm_buffer_append_string(buffer, "\\r", 2);
break;
case '"':
pm_buffer_append_string(buffer, "\\\"", 2);
break;
case '#': {
if (escaping == PM_BUFFER_ESCAPING_RUBY && index + 1 < length) {
const uint8_t next_byte = source[index + 1];
if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
pm_buffer_append_byte(buffer, '\\');
}
}
pm_buffer_append_byte(buffer, '#');
break;
}
case '\\':
pm_buffer_append_string(buffer, "\\\\", 2);
break;
default:
pm_buffer_append_byte(buffer, byte);
break;
}
}
}
}
/**
* Prepend the given string to the buffer.
*/

View File

@ -129,6 +129,25 @@ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
*/
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
/**
* The different types of escaping that can be performed by the buffer when
* appending a slice of Ruby source code.
*/
typedef enum {
PM_BUFFER_ESCAPING_RUBY,
PM_BUFFER_ESCAPING_JSON
} pm_buffer_escaping_t;
/**
* Append a slice of source code to the buffer.
*
* @param buffer The buffer to append to.
* @param source The source code to append.
* @param length The length of the source code to append.
* @param escaping The type of escaping to perform.
*/
void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
/**
* Prepend the given string to the buffer.
*