From cbf508da58d50f7448f10a1a6030b3a48dfec14d Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Thu, 15 Aug 2024 12:24:49 -0400
Subject: [PATCH] [ruby/prism] Special error for too short unicode errors

https://github.com/ruby/prism/commit/9f1f7d08bd
---
 prism/config.yml                               | 1 +
 prism/prism.c                                  | 5 ++++-
 prism/templates/src/diagnostic.c.erb           | 1 +
 test/prism/errors/regexp_unicode_too_short.txt | 3 +++
 4 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 test/prism/errors/regexp_unicode_too_short.txt

diff --git a/prism/config.yml b/prism/config.yml
index fed8265173..6ead7a9d95 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -83,6 +83,7 @@ errors:
   - ESCAPE_INVALID_UNICODE_CM_FLAGS
   - ESCAPE_INVALID_UNICODE_LITERAL
   - ESCAPE_INVALID_UNICODE_LONG
+  - ESCAPE_INVALID_UNICODE_SHORT
   - ESCAPE_INVALID_UNICODE_TERM
   - EXPECT_ARGUMENT
   - EXPECT_EOL_AFTER_STATEMENT
diff --git a/prism/prism.c b/prism/prism.c
index 30fb1ad298..a706881e3e 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -9771,7 +9771,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
             } else {
                 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
 
-                if (length == 4) {
+                if (length == 0) {
+                    const uint8_t *start = parser->current.end - 2;
+                    PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+                } else if (length == 4) {
                     uint32_t value = escape_unicode(parser, parser->current.end, 4);
 
                     if (flags & PM_ESCAPE_FLAG_REGEXP) {
diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb
index 5e6858ac61..d3fabb8449 100644
--- a/prism/templates/src/diagnostic.c.erb
+++ b/prism/templates/src/diagnostic.c.erb
@@ -167,6 +167,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS]    = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL]     = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ESCAPE_INVALID_UNICODE_LONG]        = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_UNICODE_SHORT]       = { "too short escape sequence: %.*s", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ESCAPE_INVALID_UNICODE_TERM]        = { "unterminated Unicode escape", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_EXPECT_ARGUMENT]                    = { "expected an argument", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_EXPECT_EOL_AFTER_STATEMENT]         = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_SYNTAX },
diff --git a/test/prism/errors/regexp_unicode_too_short.txt b/test/prism/errors/regexp_unicode_too_short.txt
new file mode 100644
index 0000000000..a7638b2712
--- /dev/null
+++ b/test/prism/errors/regexp_unicode_too_short.txt
@@ -0,0 +1,3 @@
+/\u/
+ ^~ too short escape sequence: \u
+