From cbf508da58d50f7448f10a1a6030b3a48dfec14d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 15 Aug 2024 12:24:49 -0400 Subject: [PATCH] [ruby/prism] Special error for too short unicode errors https://github.com/ruby/prism/commit/9f1f7d08bd --- prism/config.yml | 1 + prism/prism.c | 5 ++++- prism/templates/src/diagnostic.c.erb | 1 + test/prism/errors/regexp_unicode_too_short.txt | 3 +++ 4 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 test/prism/errors/regexp_unicode_too_short.txt diff --git a/prism/config.yml b/prism/config.yml index fed8265173..6ead7a9d95 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -83,6 +83,7 @@ errors: - ESCAPE_INVALID_UNICODE_CM_FLAGS - ESCAPE_INVALID_UNICODE_LITERAL - ESCAPE_INVALID_UNICODE_LONG + - ESCAPE_INVALID_UNICODE_SHORT - ESCAPE_INVALID_UNICODE_TERM - EXPECT_ARGUMENT - EXPECT_EOL_AFTER_STATEMENT diff --git a/prism/prism.c b/prism/prism.c index 30fb1ad298..a706881e3e 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -9771,7 +9771,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre } else { size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4)); - if (length == 4) { + if (length == 0) { + const uint8_t *start = parser->current.end - 2; + PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); + } else if (length == 4) { uint32_t value = escape_unicode(parser, parser->current.end, 4); if (flags & PM_ESCAPE_FLAG_REGEXP) { diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index 5e6858ac61..d3fabb8449 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -167,6 +167,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_ESCAPE_INVALID_UNICODE_SHORT] = { "too short escape sequence: %.*s", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "unterminated Unicode escape", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_ARGUMENT] = { "expected an argument", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/errors/regexp_unicode_too_short.txt b/test/prism/errors/regexp_unicode_too_short.txt new file mode 100644 index 0000000000..a7638b2712 --- /dev/null +++ b/test/prism/errors/regexp_unicode_too_short.txt @@ -0,0 +1,3 @@ +/\u/ + ^~ too short escape sequence: \u +