deps: update simdutf to 5.3.4

PR-URL: https://github.com/nodejs/node/pull/54312 Reviewed-By: Marco Ippolito <marcoippolito54@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
2024-08-12 20:54:02 -04:00 · 2024-08-12 20:54:02 -04:00 · a199c529bc
commit a199c529bc
parent 5f230d2cf4
2 changed files with 573 additions and 414 deletions
--- a/deps/simdutf/simdutf.cpp
+++ b/deps/simdutf/simdutf.cpp
--- a/deps/simdutf/simdutf.h
+++ b/deps/simdutf/simdutf.h
@ -1,4 +1,4 @@
-/* auto-generated on 2024-07-31 17:22:10 -0400. Do not edit! */
+/* auto-generated on 2024-08-09 09:52:29 -0400. Do not edit! */
 /* begin file include/simdutf.h */
 #ifndef SIMDUTF_H
 #define SIMDUTF_H
@ -433,8 +433,11 @@
  #define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning( pop ))
 #else // SIMDUTF_REGULAR_VISUAL_STUDIO
-
+#if defined(__OPTIMIZE__) || defined(NDEBUG)
  #define simdutf_really_inline inline __attribute__((always_inline))
 #else
  #define simdutf_really_inline inline
 #endif
  #define simdutf_never_inline inline __attribute__((noinline))
  #define simdutf_unused __attribute__((unused))
@ -594,7 +597,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
 #define SIMDUTF_SIMDUTF_VERSION_H
 /** The version of simdutf being used (major.minor.revision) */
-#define SIMDUTF_VERSION "5.3.1"
+#define SIMDUTF_VERSION "5.3.4"
 namespace simdutf {
 enum {
@ -609,7 +612,7 @@ enum {
  /**
   * The revision (major.minor.REVISION) of simdutf being used.
   */
-  SIMDUTF_VERSION_REVISION = 1
+  SIMDUTF_VERSION_REVISION = 4
 };
 } // namespace simdutf
@ -1314,7 +1317,7 @@ simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input,
    /**
   * Convert valid UTF-8 string into latin1 string.
   *
-   * This function assumes that the input string is valid UTF-8.
+   * This function assumes that the input string is valid UTF-8 and that it can be represented as Latin1.
   *
   * This function is not BOM-aware.
   *
@ -1387,7 +1390,8 @@ simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t le
 /**
 * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
   * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -1400,7 +1404,8 @@ simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t le
 /**
 * Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -1415,7 +1420,8 @@ simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t len
 *
 * This function is equivalent to count_utf8
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -1628,7 +1634,7 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, s
 /**
 * Using native endianness, convert UTF-16 string into Latin1 string.
 *
- * This function assumes that the input string is valid UTF-8.
+ * This function assumes that the input string is valid UTF-16 and that it can be represented as Latin1.
 *
 * This function is not BOM-aware.
 *
@ -1642,7 +1648,7 @@ simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * input,
 /**
 * Convert valid UTF-16LE string into Latin1 string.
 *
- * This function assumes that the input string is valid UTF-16LE.
+ * This function assumes that the input string is valid UTF-16LE and that it can be represented as Latin1.
 *
 * This function is not BOM-aware.
 *
@ -1656,7 +1662,7 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * inpu
 /**
 * Convert valid UTF-16BE string into Latin1 string.
 *
- * This function assumes that the input string is valid UTF-16BE.
+ * This function assumes that the input string is valid UTF-16BE and that it can be represented as Latin1.
 *
 * This function is not BOM-aware.
 *
@ -1671,7 +1677,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * inpu
 /**
 * Convert valid UTF-16LE string into UTF-8 string.
 *
- * This function assumes that the input string is valid UTF-16LE.
+ * This function assumes that the input string is valid UTF-16LE and that it can be represented as Latin1.
 *
 * This function is not BOM-aware.
 *
@ -1833,7 +1839,8 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input
 /*
 * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -1847,7 +1854,8 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
 * Using native endianness; Compute the number of bytes that this UTF-16
 * string would require in UTF-8 format.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * @param input         the UTF-16 string to convert
 * @param length        the length of the string in 2-byte code units (char16_t)
@ -1858,7 +1866,8 @@ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t
 /**
 * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * @param input         the UTF-16LE string to convert
 * @param length        the length of the string in 2-byte code units (char16_t)
@ -1869,7 +1878,8 @@ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size
 /**
 * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * @param input         the UTF-16BE string to convert
 * @param length        the length of the string in 2-byte code units (char16_t)
@ -1986,7 +1996,7 @@ simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *
 /**
 * Convert valid UTF-32 string into Latin1 string.
 *
- * This function assumes that the input string is valid UTF-32.
+ * This function assumes that the input string is valid UTF-32 and that it can be represented as Latin1.
 *
 * This function is not BOM-aware.
 *
@ -2117,7 +2127,8 @@ void change_endianness_utf16(const char16_t * input, size_t length, char16_t * o
 /**
 * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
 * the result is implementation defined.
 *
 * @param input         the UTF-32 string to convert
 * @param length        the length of the string in 4-byte code units (char32_t)
@ -2128,7 +2139,8 @@ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t
 /**
 * Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
 * the result is implementation defined.
 *
 * @param input         the UTF-32 string to convert
 * @param length        the length of the string in 4-byte code units (char32_t)
@ -2142,7 +2154,8 @@ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_
 *
 * This function is equivalent to count_utf16.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -2157,7 +2170,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_
 *
 * This function is equivalent to count_utf16le.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -2172,7 +2186,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, siz
 *
 * This function is equivalent to count_utf16be.
 *
- * This function does not validate the input.
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -2187,6 +2202,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, siz
 * it is valid.
 *
 * This function assumes that the input string is valid UTF-16 (native endianness).
 * It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -2201,6 +2218,8 @@ simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) no
 * it is valid.
 *
 * This function assumes that the input string is valid UTF-16LE.
 * It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -2215,6 +2234,8 @@ simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length)
 * it is valid.
 *
 * This function assumes that the input string is valid UTF-16BE.
 * It is acceptable to pass invalid UTF-16 strings but in such cases
 * the result is implementation defined.
 *
 * This function is not BOM-aware.
 *
@ -2229,6 +2250,8 @@ simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length)
 * it is valid.
 *
 * This function assumes that the input string is valid UTF-8.
 * It is acceptable to pass invalid UTF-8 strings but in such cases
 * the result is implementation defined.
 *
 * @param input         the UTF-8 string to process
 * @param length        the length of the string in bytes
@ -2739,10 +2762,10 @@ public:
   */
  simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0;
-    /**
+  /**
   * Convert valid UTF-8 string into latin1 string.
   *
-   * This function assumes that the input string is valid UTF-8.
+   * This function assumes that the input string is valid UTF-8 and that it can be represented as Latin1.
   *
   * This function is not BOM-aware.
   *
@ -2871,7 +2894,8 @@ public:
  /**
   * Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
   * the result is implementation defined.
   *
   * @param input         the UTF-8 string to process
   * @param length        the length of the string in bytes
@ -2882,7 +2906,8 @@ public:
   /**
   * Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
   *
-   * This function is equivalent to count_utf8.
+   * This function is equivalent to count_utf8. It is acceptable to pass invalid UTF-8 strings but in such cases
   * the result is implementation defined.
   *
   * This function does not validate the input.
   *
@ -2957,7 +2982,7 @@ public:
  /**
   * Convert valid UTF-16LE string into Latin1 string.
   *
-   * This function assumes that the input string is valid UTF-8.
+   * This function assumes that the input string is valid UTF-L16LE and that it can be represented as Latin1.
   * This function is not BOM-aware.
   *
@ -2971,7 +2996,7 @@ public:
  /**
   * Convert valid UTF-16BE string into Latin1 string.
   *
-   * This function assumes that the input string is valid UTF-8.
+   * This function assumes that the input string is valid UTF16-BE and that it can be represented as Latin1.
   *
   * This function is not BOM-aware.
   *
@ -3161,7 +3186,8 @@ public:
  /**
   * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
   * the result is implementation defined.
   *
   * This function is not BOM-aware.
   *
@ -3174,7 +3200,8 @@ public:
  /**
   * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
   * the result is implementation defined.
   *
   * This function is not BOM-aware.
   *
@ -3275,7 +3302,7 @@ public:
  simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
-    /**
+  /**
   * Return the number of bytes that this UTF-16 string would require in Latin1 format.
   *
   *
@ -3399,7 +3426,8 @@ public:
  /**
   * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
   * the result is implementation defined.
   *
   * @param input         the UTF-32 string to convert
   * @param length        the length of the string in 4-byte code units (char32_t)
@ -3410,7 +3438,8 @@ public:
  /**
   * Compute the number of bytes that this UTF-32 string would require in Latin1 format.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
   * the result is implementation defined.
   *
   * @param length        the length of the string in 4-byte code units (char32_t)
   * @return the number of bytes required to encode the UTF-32 string as Latin1
@ -3420,7 +3449,8 @@ public:
  /**
   * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
   * the result is implementation defined.
   *
   * @param input         the UTF-8 string to convert
   * @param length        the length of the string in byte
@ -3431,7 +3461,8 @@ public:
  /*
   * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
   * the result is implementation defined.
   *
   * This function is not BOM-aware.
   *
@ -3444,7 +3475,8 @@ public:
  /**
   * Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
   * the result is implementation defined.
   *
   * @param input         the UTF-32 string to convert
   * @param length        the length of the string in 4-byte code units (char32_t)
@ -3453,11 +3485,9 @@ public:
  simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
-    /**
+  /**
   * Return the number of bytes that this UTF-32 string would require in Latin1 format.
   *
   * This function does not validate the input.
   *
   * @param input         the UTF-32 string to convert
   * @param length        the length of the string in 4-byte code units (char32_t)
   * @return the number of bytes required to encode the UTF-32 string as Latin1
@ -3469,7 +3499,8 @@ public:
   *
   * This function is equivalent to count_utf16le.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
   * the result is implementation defined.
   *
   * This function is not BOM-aware.
   *
@ -3484,7 +3515,8 @@ public:
   *
   * This function is equivalent to count_utf16be.
   *
-   * This function does not validate the input.
+   * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
   * the result is implementation defined.
   *
   * This function is not BOM-aware.
   *
@ -3499,6 +3531,8 @@ public:
   * it is valid.
   *
   * This function assumes that the input string is valid UTF-16LE.
   * It is acceptable to pass invalid UTF-16 strings but in such cases
   * the result is implementation defined.
   *
   * This function is not BOM-aware.
   *
@ -3513,6 +3547,8 @@ public:
   * it is valid.
   *
   * This function assumes that the input string is valid UTF-16BE.
   * It is acceptable to pass invalid UTF-16 strings but in such cases
   * the result is implementation defined.
   *
   * This function is not BOM-aware.
   *
@ -3528,6 +3564,8 @@ public:
   * it is valid.
   *
   * This function assumes that the input string is valid UTF-8.
   * It is acceptable to pass invalid UTF-8 strings but in such cases
   * the result is implementation defined.
   *
   * @param input         the UTF-8 string to process
   * @param length        the length of the string in bytes
@ -3538,7 +3576,8 @@ public:
  /**
   * Provide the maximal binary length in bytes given the base64 input.
   * In general, if the input contains ASCII spaces, the result will be less than
-   * the maximum length.
+   * the maximum length. It is acceptable to pass invalid base64 strings but in such cases
   * the result is implementation defined.
   *
   * @param input         the base64 input to process
   * @param length        the length of the base64 input in bytes
@ -3549,7 +3588,8 @@ public:
  /**
   * Provide the maximal binary length in bytes given the base64 input.
   * In general, if the input contains ASCII spaces, the result will be less than
-   * the maximum length.
+   * the maximum length. It is acceptable to pass invalid base64 strings but in such cases
   * the result is implementation defined.
   *
   * @param input         the base64 input to process, in ASCII stored as 16-bit units
   * @param length        the length of the base64 input in 16-bit units