diff --git a/prism/config.yml b/prism/config.yml index 51851bd288..5f0741bce3 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -247,6 +247,7 @@ warnings: - FLOAT_OUT_OF_RANGE - INTEGER_IN_FLIP_FLOP - INVALID_CHARACTER + - INVALID_NUMBERED_REFERENCE - KEYWORD_EOL - LITERAL_IN_CONDITION_DEFAULT - LITERAL_IN_CONDITION_VERBOSE @@ -2677,13 +2678,13 @@ nodes: - name: number type: uint32 comment: | - The (1-indexed, from the left) number of the capture group. Numbered references that would overflow a `uint32` result in a `number` of exactly `2**32 - 1`. + The (1-indexed, from the left) number of the capture group. Numbered references that are too large result in this value being `0`. $1 # number `1` $5432 # number `5432` - $4294967296 # number `4294967295` + $4294967296 # number `0` comment: | Represents reading a numbered reference to a capture in the previous match. diff --git a/prism/defines.h b/prism/defines.h index 5995d54cb8..00411a0eb6 100644 --- a/prism/defines.h +++ b/prism/defines.h @@ -10,6 +10,7 @@ #define PRISM_DEFINES_H #include +#include #include #include #include @@ -22,7 +23,6 @@ * some platforms they aren't included unless this is already defined. */ #define __STDC_FORMAT_MACROS - #include /** diff --git a/prism/prism.c b/prism/prism.c index a4f9c5dafa..a492eaa329 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -1203,40 +1203,6 @@ pm_node_flag_set_repeated_parameter(pm_node_t *node) { /* Node creation functions */ /******************************************************************************/ -/** - * Parse the decimal number represented by the range of bytes. returns - * UINT32_MAX if the number fails to parse. This function assumes that the range - * of bytes has already been validated to contain only decimal digits. - */ -static uint32_t -parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - ptrdiff_t diff = end - start; - assert(diff > 0 && ((unsigned long) diff < SIZE_MAX)); - size_t length = (size_t) diff; - - char *digits = xcalloc(length + 1, sizeof(char)); - memcpy(digits, start, length); - digits[length] = '\0'; - - char *endptr; - errno = 0; - unsigned long value = strtoul(digits, &endptr, 10); - - if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) { - pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL); - value = UINT32_MAX; - } - - xfree(digits); - - if (value > UINT32_MAX) { - pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL); - value = UINT32_MAX; - } - - return (uint32_t) value; -} - /** * When you have an encoding flag on a regular expression, it takes precedence * over all of the previously set encoding flags. So we need to mask off any @@ -5136,6 +5102,52 @@ pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *loc return node; } +/** + * The maximum numbered reference value is defined as the maximum value that an + * integer can hold minus 1 bit for CRuby instruction sequence operand tagging. + */ +#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1)) + +/** + * Parse the decimal number represented by the range of bytes. Returns + * 0 if the number fails to parse or if the number is greater than the maximum + * value representable by a numbered reference. This function assumes that the + * range of bytes has already been validated to contain only decimal digits. + */ +static uint32_t +pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) { + const uint8_t *start = token->start + 1; + const uint8_t *end = token->end; + + ptrdiff_t diff = end - start; + assert(diff > 0 && ((unsigned long) diff < SIZE_MAX)); + size_t length = (size_t) diff; + + char *digits = xcalloc(length + 1, sizeof(char)); + memcpy(digits, start, length); + digits[length] = '\0'; + + char *endptr; + errno = 0; + unsigned long value = strtoul(digits, &endptr, 10); + + if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) { + pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL); + value = 0; + } + + xfree(digits); + + if (value > NTH_REF_MAX) { + PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start); + value = 0; + } + + return (uint32_t) value; +} + +#undef NTH_REF_MAX + /** * Allocate and initialize a new NthReferenceReadNode node. */ @@ -5149,7 +5161,7 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na .type = PM_NUMBERED_REFERENCE_READ_NODE, .location = PM_LOCATION_TOKEN_VALUE(name), }, - .number = parse_decimal_number(parser, name->start + 1, name->end) + .number = pm_numbered_reference_read_node_number(parser, name) }; return node; diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index 5ba4d62317..2a14c0dcb3 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -327,6 +327,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_WARN_FLOAT_OUT_OF_RANGE] = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE }, [PM_WARN_INTEGER_IN_FLIP_FLOP] = { "integer literal in flip-flop", PM_WARNING_LEVEL_DEFAULT }, [PM_WARN_INVALID_CHARACTER] = { "invalid character syntax; use %s%s%s", PM_WARNING_LEVEL_DEFAULT }, + [PM_WARN_INVALID_NUMBERED_REFERENCE] = { "'%.*s' is too big for a number variable, always nil", PM_WARNING_LEVEL_DEFAULT }, [PM_WARN_KEYWORD_EOL] = { "`%.*s` at the end of line without an expression", PM_WARNING_LEVEL_VERBOSE }, [PM_WARN_LITERAL_IN_CONDITION_DEFAULT] = { "%sliteral in %s", PM_WARNING_LEVEL_DEFAULT }, [PM_WARN_LITERAL_IN_CONDITION_VERBOSE] = { "%sliteral in %s", PM_WARNING_LEVEL_VERBOSE },