From 5299b4a362c000f13778a04acfcac5ec0cd33654 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 20 Nov 2023 21:43:13 -0500 Subject: [PATCH] [ruby/prism] Build the ability to format errors (https://github.com/ruby/prism/pull/1796) Previously, we only supported error messages that were constant strings. This works for the most part, but there are some times where we want to include some part of the source in the error message to make it better. For example, instead of "Token is reserved" it's better to write "_1 is reserved". To do this, we now support allocating error messages at runtime that are built around format strings. https://github.com/ruby/prism/commit/7e6aa17deb --- prism/diagnostic.c | 59 +++++++++++++++++++++++++++++++++++++-- prism/diagnostic.h | 23 ++++++++++++++- prism/prism.c | 31 ++++++++++++++------ test/prism/errors_test.rb | 28 +++++++++---------- 4 files changed, 115 insertions(+), 26 deletions(-) diff --git a/prism/diagnostic.c b/prism/diagnostic.c index b6a4e291a7..7da8806b37 100644 --- a/prism/diagnostic.c +++ b/prism/diagnostic.c @@ -202,7 +202,7 @@ static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = { [PM_ERR_PARAMETER_NAME_REPEAT] = "Repeated parameter name", [PM_ERR_PARAMETER_NO_DEFAULT] = "Expected a default value for the parameter", [PM_ERR_PARAMETER_NO_DEFAULT_KW] = "Expected a default value for the keyword parameter", - [PM_ERR_PARAMETER_NUMBERED_RESERVED] = "Token reserved for a numbered parameter", + [PM_ERR_PARAMETER_NUMBERED_RESERVED] = "%.2s is reserved for a numbered parameter", [PM_ERR_PARAMETER_ORDER] = "Unexpected parameter order", [PM_ERR_PARAMETER_SPLAT_MULTI] = "Unexpected multiple `*` splat parameters", [PM_ERR_PARAMETER_STAR] = "Unexpected parameter `*`", @@ -261,8 +261,10 @@ static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = { static const char* pm_diagnostic_message(pm_diagnostic_id_t diag_id) { assert(diag_id < PM_DIAGNOSTIC_ID_LEN); + const char *message = diagnostic_messages[diag_id]; assert(message); + return message; } @@ -274,7 +276,57 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t * pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1); if (diagnostic == NULL) return false; - *diagnostic = (pm_diagnostic_t) { .start = start, .end = end, .message = pm_diagnostic_message(diag_id) }; + *diagnostic = (pm_diagnostic_t) { + .start = start, + .end = end, + .message = pm_diagnostic_message(diag_id), + .owned = false + }; + + pm_list_append(list, (pm_list_node_t *) diagnostic); + return true; +} + +/** + * Append a diagnostic to the given list of diagnostics that is using a format + * string for its message. + */ +bool +pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...) { + va_list arguments; + va_start(arguments, diag_id); + + const char *format = pm_diagnostic_message(diag_id); + int result = vsnprintf(NULL, 0, format, arguments); + va_end(arguments); + + if (result < 0) { + return false; + } + + pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1); + if (diagnostic == NULL) { + return false; + } + + size_t length = (size_t) (result + 1); + char *message = (char *) malloc(length); + if (message == NULL) { + free(diagnostic); + return false; + } + + va_start(arguments, diag_id); + vsnprintf(message, length, format, arguments); + va_end(arguments); + + *diagnostic = (pm_diagnostic_t) { + .start = start, + .end = end, + .message = message, + .owned = true + }; + pm_list_append(list, (pm_list_node_t *) diagnostic); return true; } @@ -288,8 +340,9 @@ pm_diagnostic_list_free(pm_list_t *list) { for (node = list->head; node != NULL; node = next) { next = node->next; - pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) node; + + if (diagnostic->owned) free((void *) diagnostic->message); free(diagnostic); } } diff --git a/prism/diagnostic.h b/prism/diagnostic.h index 92bc88953e..33a70229c8 100644 --- a/prism/diagnostic.h +++ b/prism/diagnostic.h @@ -30,6 +30,13 @@ typedef struct { /** The message associated with the diagnostic. */ const char *message; + + /** + * Whether or not the memory related to the message of this diagnostic is + * owned by this diagnostic. If it is, it needs to be freed when the + * diagnostic is freed. + */ + bool owned; } pm_diagnostic_t; /** @@ -249,7 +256,8 @@ typedef enum { } pm_diagnostic_id_t; /** - * Append a diagnostic to the given list of diagnostics. + * Append a diagnostic to the given list of diagnostics that is using shared + * memory for its message. * * @param list The list to append to. * @param start The start of the diagnostic. @@ -259,6 +267,19 @@ typedef enum { */ bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id); +/** + * Append a diagnostic to the given list of diagnostics that is using a format + * string for its message. + * + * @param list The list to append to. + * @param start The start of the diagnostic. + * @param end The end of the diagnostic. + * @param diag_id The diagnostic ID. + * @param ... The arguments to the format string for the message. + * @return Whether the diagnostic was successfully appended. + */ +bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...); + /** * Deallocate the internal state of the given diagnostic list. * diff --git a/prism/prism.c b/prism/prism.c index 48b6311d2f..3b70ae1115 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -479,6 +479,11 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_ pm_diagnostic_list_append(&parser->error_list, start, end, diag_id); } +/** + * Append an error to the list of errors on the parser using a format string. + */ +#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__) + /** * Append an error to the list of errors on the parser using the location of the * current token. @@ -489,12 +494,10 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { } /** - * Append an error to the list of errors on the parser using the given location. + * Append an error to the list of errors on the parser using the given location + * using a format string. */ -static inline void -pm_parser_err_location(pm_parser_t *parser, const pm_location_t *location, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, location->start, location->end, diag_id); -} +#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (location)->start, (location)->end, diag_id, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the @@ -505,6 +508,12 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_ pm_parser_err(parser, node->location.start, node->location.end, diag_id); } +/** + * Append an error to the list of errors on the parser using the location of the + * given node and a format string. + */ +#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, node->location.start, node->location.end, diag_id, __VA_ARGS__) + /** * Append an error to the list of errors on the parser using the location of the * previous token. @@ -523,6 +532,12 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_ pm_parser_err(parser, token->start, token->end, diag_id); } +/** + * Append an error to the list of errors on the parser using the location of the + * given token and a format string. + */ +#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, token->start, token->end, diag_id, __VA_ARGS__) + /** * Append a warning to the list of warnings on the parser. */ @@ -4078,7 +4093,7 @@ pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) { static inline void pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { if (pm_token_is_numbered_parameter(start, end)) { - pm_parser_err(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED); + PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start); } } @@ -10576,7 +10591,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) { return target; case PM_LOCAL_VARIABLE_READ_NODE: if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) { - pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED); + PM_PARSER_ERR_NODE_FORMAT(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start); } else { assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t)); target->type = PM_LOCAL_VARIABLE_TARGET_NODE; @@ -15905,7 +15920,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * if (pm_token_is_numbered_parameter(source, source + length)) { const pm_location_t *location = &call->receiver->location; - pm_parser_err_location(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED); + PM_PARSER_ERR_LOCATION_FORMAT(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED, location->start); } } diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index c6ef417e55..c6d35baed4 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -563,15 +563,15 @@ module Prism end RUBY assert_errors expected, source, [ - ["Token reserved for a numbered parameter", 8..10], - ["Token reserved for a numbered parameter", 14..16], - ["Token reserved for a numbered parameter", 20..22], - ["Token reserved for a numbered parameter", 26..28], - ["Token reserved for a numbered parameter", 32..34], - ["Token reserved for a numbered parameter", 40..42], - ["Token reserved for a numbered parameter", 46..48], - ["Token reserved for a numbered parameter", 52..54], - ["Token reserved for a numbered parameter", 58..60], + ["_1 is reserved for a numbered parameter", 8..10], + ["_2 is reserved for a numbered parameter", 14..16], + ["_3 is reserved for a numbered parameter", 20..22], + ["_4 is reserved for a numbered parameter", 26..28], + ["_5 is reserved for a numbered parameter", 32..34], + ["_6 is reserved for a numbered parameter", 40..42], + ["_7 is reserved for a numbered parameter", 46..48], + ["_8 is reserved for a numbered parameter", 52..54], + ["_9 is reserved for a numbered parameter", 58..60], ] end @@ -1253,18 +1253,18 @@ module Prism def test_writing_numbered_parameter assert_errors expression("-> { _1 = 0 }"), "-> { _1 = 0 }", [ - ["Token reserved for a numbered parameter", 5..7] + ["_1 is reserved for a numbered parameter", 5..7] ] end def test_targeting_numbered_parameter assert_errors expression("-> { _1, = 0 }"), "-> { _1, = 0 }", [ - ["Token reserved for a numbered parameter", 5..7] + ["_1 is reserved for a numbered parameter", 5..7] ] end def test_defining_numbered_parameter - error_messages = ["Token reserved for a numbered parameter"] + error_messages = ["_1 is reserved for a numbered parameter"] assert_error_messages "def _1; end", error_messages assert_error_messages "def self._1; end", error_messages @@ -1319,7 +1319,7 @@ module Prism def test_numbered_parameters_in_block_arguments source = "foo { |_1| }" assert_errors expression(source), source, [ - ["Token reserved for a numbered parameter", 7..9], + ["_1 is reserved for a numbered parameter", 7..9], ] end @@ -1405,7 +1405,7 @@ module Prism /(?<_1>)/ =~ a RUBY - message = "Token reserved for a numbered parameter" + message = "_1 is reserved for a numbered parameter" assert_errors expression(source), source, [ [message, 5..7], [message, 13..15],