[ruby/json] Introduce ARM Neon and SSE2 SIMD.

(https://github.com/ruby/json/pull/743) See the pull request for the long development history: https://github.com/ruby/json/pull/743 ``` == Encoding activitypub.json (52595 bytes) ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 2.913k i/100ms Calculating ------------------------------------- after 29.377k (± 2.0%) i/s (34.04 μs/i) - 148.563k in 5.059169s Comparison: before: 23314.1 i/s after: 29377.3 i/s - 1.26x faster == Encoding citm_catalog.json (500298 bytes) ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 152.000 i/100ms Calculating ------------------------------------- after 1.569k (± 0.8%) i/s (637.49 μs/i) - 7.904k in 5.039001s Comparison: before: 1485.6 i/s after: 1568.7 i/s - 1.06x faster == Encoding twitter.json (466906 bytes) ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 309.000 i/100ms Calculating ------------------------------------- after 3.115k (± 3.1%) i/s (321.01 μs/i) - 15.759k in 5.063776s Comparison: before: 2508.3 i/s after: 3115.2 i/s - 1.24x faster ``` https://github.com/ruby/json/commit/49003523da
2025-04-28 07:57:10 -05:00 · 2025-04-28 07:57:10 -05:00 · a3ec53bbb0
commit a3ec53bbb0
parent 7f0c6d30d3
4 changed files with 584 additions and 13 deletions
--- a/ext/json/generator/extconf.rb
+++ b/ext/json/generator/extconf.rb
@ -6,5 +6,36 @@ if RUBY_ENGINE == 'truffleruby'
 else
  append_cflags("-std=c99")
  $defs << "-DJSON_GENERATOR"
+
+  if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
+    if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
+      # Try to compile a small program using NEON instructions
+      if have_header('arm_neon.h')
+        have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
+          #include <arm_neon.h>
+          int main() {
+              uint8x16_t test = vdupq_n_u8(32);
+              return 0;
+          }
+        SRC
+          $defs.push("-DENABLE_SIMD")
+      end
+    end
+
+    if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC', opt='-msse2')
+      #include <x86intrin.h>
+      int main() {
+          __m128i test = _mm_set1_epi8(32);
+          return 0;
+      }
+      SRC
+        $defs.push("-DENABLE_SIMD")
+    end
+
+    have_header('cpuid.h')
+  end
+
+  create_header
+
  create_makefile 'json/ext/generator'
 end
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@ -5,6 +5,8 @@
 #include <math.h>
 #include <ctype.h>

+#include "simd.h"
+
 /* ruby api and some helpers */

 typedef struct JSON_Generator_StateStruct {
@ -109,12 +111,40 @@ typedef struct _search_state {
    const char *end;
    const char *cursor;
    FBuffer *buffer;
+
+#ifdef ENABLE_SIMD
+    const char *chunk_base;
+    const char *chunk_end;
+    bool has_matches;
+
+#ifdef HAVE_SIMD_NEON
+    uint64_t matches_mask;
+#elif HAVE_SIMD_SSE2
+    int matches_mask;
+#else
+#error "Unknown SIMD Implementation."
+#endif /* HAVE_SIMD_NEON */
+#endif /* ENABLE_SIMD */ 
 } search_state;

-static inline void search_flush(search_state *search)
+#if (defined(__GNUC__ ) || defined(__clang__))
+#define FORCE_INLINE __attribute__((always_inline))
+#else
+#define FORCE_INLINE
+#endif
+
+static inline FORCE_INLINE void search_flush(search_state *search)
 {
-    fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
-    search->cursor = search->ptr;
+    // Do not remove this conditional without profiling, specifically escape-heavy text.
+    // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
+    // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
+    // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
+    // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
+    // nothing needs to be flushed, we can save a few memory references with this conditional.
+    if (search->ptr > search->cursor) {
+        fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
+        search->cursor = search->ptr;
+    }
 }

 static const unsigned char escape_table_basic[256] = {
@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 };

+static unsigned char (*search_escape_basic_impl)(search_state *);
+
 static inline unsigned char search_escape_basic(search_state *search)
 {
    while (search->ptr < search->end) {
@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
    return 0;
 }

-static inline void escape_UTF8_char_basic(search_state *search) {
+static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
+{
    const unsigned char ch = (unsigned char)*search->ptr;
    switch (ch) {
        case '"':  fbuffer_append(search->buffer, "\\\"", 2); break;
@ -156,11 +189,15 @@ static inline void escape_UTF8_char_basic(search_state *search) {
        case '\r': fbuffer_append(search->buffer, "\\r", 2);  break;
        case '\t': fbuffer_append(search->buffer, "\\t", 2);  break;
        default: {
-            const char *hexdig = "0123456789abcdef";
-            char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
-            scratch[4] = hexdig[(ch >> 4) & 0xf];
-            scratch[5] = hexdig[ch & 0xf];
-            fbuffer_append(search->buffer, scratch, 6);
+            if (ch < ' ') {
+                const char *hexdig = "0123456789abcdef";
+                char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
+                scratch[4] = hexdig[(ch >> 4) & 0xf];
+                scratch[5] = hexdig[ch & 0xf];
+                fbuffer_append(search->buffer, scratch, 6);
+            } else {
+                fbuffer_append_char(search->buffer, ch);
+            }
            break;
        }
    }
@ -186,12 +223,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
 */
 static inline void convert_UTF8_to_JSON(search_state *search)
 {
-    while (search_escape_basic(search)) {
+    while (search_escape_basic_impl(search)) {
        escape_UTF8_char_basic(search);
    }
 }

-static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) {
+static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
+{
    const unsigned char ch = (unsigned char)*search->ptr;
    switch (ch_len) {
        case 1: {
@ -227,6 +265,285 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
    search->cursor = (search->ptr += ch_len);
 }

+#ifdef ENABLE_SIMD
+
+static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
+{
+    // Flush the buffer so everything up until the last 'len' characters are unflushed.
+    search_flush(search);
+
+    FBuffer *buf = search->buffer;
+    fbuffer_inc_capa(buf, vec_len);
+
+    char *s = (buf->ptr + buf->len);
+
+    // Pad the buffer with dummy characters that won't need escaping.
+    // This seem wateful at first sight, but memset of vector length is very fast.
+    memset(s, 'X', vec_len);
+
+    // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
+    // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
+    MEMCPY(s, search->ptr, char, len);
+
+    return s;
+}
+
+#ifdef HAVE_SIMD_NEON
+
+static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
+{
+    uint64_t mask = search->matches_mask;
+    uint32_t index = trailing_zeros64(mask) >> 2;
+
+    // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
+    // If we want to use a similar approach for full escaping we'll need to ensure:
+    //     search->chunk_base + index >= search->ptr
+    // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
+    // is one byte after the previous match then:
+    //     search->chunk_base + index == search->ptr
+    search->ptr = search->chunk_base + index;
+    mask &= mask - 1;
+    search->matches_mask = mask;
+    search_flush(search);
+    return 1;
+}
+
+// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
+static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
+{
+    const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
+    const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
+    return mask & 0x8888888888888888ull;
+}
+
+static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
+{
+    uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
+
+    const uint8x16_t lower_bound = vdupq_n_u8(' '); 
+    const uint8x16_t backslash   = vdupq_n_u8('\\');
+    const uint8x16_t dblquote    = vdupq_n_u8('\"');
+
+    uint8x16_t too_low       = vcltq_u8(chunk, lower_bound);
+    uint8x16_t has_backslash = vceqq_u8(chunk, backslash);
+    uint8x16_t has_dblquote  = vceqq_u8(chunk, dblquote);
+    uint8x16_t needs_escape  = vorrq_u8(too_low, vorrq_u8(has_backslash, has_dblquote));
+
+    return neon_match_mask(needs_escape);
+}
+
+static inline unsigned char search_escape_basic_neon(search_state *search)
+{
+    if (RB_UNLIKELY(search->has_matches)) {
+        // There are more matches if search->matches_mask > 0.
+        if (search->matches_mask > 0) {
+            return neon_next_match(search);
+        } else {
+            // neon_next_match will only advance search->ptr up to the last matching character. 
+            // Skip over any characters in the last chunk that occur after the last match.
+            search->has_matches = false;
+            search->ptr = search->chunk_end;
+        }
+    }
+
+    /*
+    * The code below implements an SIMD-based algorithm to determine if N bytes at a time
+    * need to be escaped. 
+    * 
+    * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
+    * 
+    * The explanation will be limited to the first 8 bytes of the string for simplicity. However
+    * the vector insructions may work on larger vectors.
+    * 
+    * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
+    * 
+    * lower_bound: [20 20 20 20 20 20 20 20] 
+    * backslash:   [5C 5C 5C 5C 5C 5C 5C 5C] 
+    * dblquote:    [22 22 22 22 22 22 22 22] 
+    * 
+    * Next we load the first chunk of the ptr: 
+    * [22 54 65 5C 73 74 69 6E] ("  T  e  \  s  t  i  n)
+    * 
+    * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
+    * as no bytes are less than 32 (0x20):
+    * [0 0 0 0 0 0 0 0]
+    * 
+    * Next, we check if any byte in chunk is equal to a backslash:
+    * [0 0 0 FF 0 0 0 0]
+    * 
+    * Finally we check if any byte in chunk is equal to a double quote:
+    * [FF 0 0 0 0 0 0 0] 
+    * 
+    * Now we have three vectors where each byte indicates if the corresponding byte in chunk
+    * needs to be escaped. We combine these vectors with a series of logical OR instructions.
+    * This is the needs_escape vector and it is equal to:
+    * [FF 0 0 FF 0 0 0 0] 
+    * 
+    * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
+    * the values in the vector. This computes how many bytes need to be escaped within this chunk.
+    * 
+    * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
+    * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
+    * have at least one byte that needs to be escaped.
+    */
+    while (search->ptr + sizeof(uint8x16_t) <= search->end) {
+        uint64_t mask = neon_rules_update(search->ptr);
+
+        if (!mask) {
+            search->ptr += sizeof(uint8x16_t);
+            continue;
+        }
+        search->matches_mask = mask;
+        search->has_matches = true;
+        search->chunk_base = search->ptr;
+        search->chunk_end = search->ptr + sizeof(uint8x16_t);
+        return neon_next_match(search);
+    }
+
+    // There are fewer than 16 bytes left. 
+    unsigned long remaining = (search->end - search->ptr);
+    if (remaining >= SIMD_MINIMUM_THRESHOLD) {
+        char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
+
+        uint64_t mask = neon_rules_update(s);
+
+        if (!mask) {
+            // Nothing to escape, ensure search_flush doesn't do anything by setting 
+            // search->cursor to search->ptr.
+            search->buffer->len += remaining;
+            search->ptr = search->end;
+            search->cursor = search->end;
+            return 0;
+        }
+
+        search->matches_mask = mask;
+        search->has_matches = true;
+        search->chunk_end = search->end;
+        search->chunk_base = search->ptr;
+        return neon_next_match(search);
+    }
+
+    if (search->ptr < search->end) {
+        return search_escape_basic(search);
+    }
+
+    search_flush(search);
+    return 0;
+}
+#endif /* HAVE_SIMD_NEON */
+
+#ifdef HAVE_SIMD_SSE2
+
+#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
+#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
+#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
+#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
+
+static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
+{
+    int mask = search->matches_mask;
+    int index = trailing_zeros(mask);
+
+    // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
+    // If we want to use a similar approach for full escaping we'll need to ensure:
+    //     search->chunk_base + index >= search->ptr
+    // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
+    // is one byte after the previous match then:
+    //     search->chunk_base + index == search->ptr
+    search->ptr = search->chunk_base + index;
+    mask &= mask - 1;
+    search->matches_mask = mask;
+    search_flush(search);
+    return 1;
+}
+
+#if defined(__clang__) || defined(__GNUC__)
+#define TARGET_SSE2 __attribute__((target("sse2")))
+#else
+#define TARGET_SSE2
+#endif
+
+static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
+{
+    __m128i chunk         = _mm_loadu_si128((__m128i const*)ptr);
+
+    const __m128i lower_bound = _mm_set1_epi8(' '); 
+    const __m128i backslash   = _mm_set1_epi8('\\');
+    const __m128i dblquote    = _mm_set1_epi8('\"');
+
+    __m128i too_low       = _mm_cmplt_epu8(chunk, lower_bound);
+    __m128i has_backslash = _mm_cmpeq_epi8(chunk, backslash);
+    __m128i has_dblquote  = _mm_cmpeq_epi8(chunk, dblquote);
+    __m128i needs_escape  = _mm_or_si128(too_low, _mm_or_si128(has_backslash, has_dblquote));
+    return _mm_movemask_epi8(needs_escape);
+}
+
+static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
+{
+    if (RB_UNLIKELY(search->has_matches)) {
+        // There are more matches if search->matches_mask > 0.
+        if (search->matches_mask > 0) {
+            return sse2_next_match(search);
+        } else {
+            // sse2_next_match will only advance search->ptr up to the last matching character. 
+            // Skip over any characters in the last chunk that occur after the last match.
+            search->has_matches = false;
+            if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
+                search->ptr = search->end;
+            } else {
+                search->ptr = search->chunk_base + sizeof(__m128i);
+            }
+        }
+    }
+
+    while (search->ptr + sizeof(__m128i) <= search->end) {
+        int needs_escape_mask = sse2_update(search->ptr);
+
+        if (needs_escape_mask == 0) {
+            search->ptr += sizeof(__m128i);
+            continue;
+        }
+
+        search->has_matches = true;
+        search->matches_mask = needs_escape_mask;
+        search->chunk_base = search->ptr;
+        return sse2_next_match(search);
+    }
+
+    // There are fewer than 16 bytes left. 
+    unsigned long remaining = (search->end - search->ptr);
+    if (remaining >= SIMD_MINIMUM_THRESHOLD) {
+        char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
+
+        int needs_escape_mask = sse2_update(s);
+
+        if (needs_escape_mask == 0) {
+            // Nothing to escape, ensure search_flush doesn't do anything by setting 
+            // search->cursor to search->ptr.
+            search->buffer->len += remaining;
+            search->ptr = search->end;
+            search->cursor = search->end;
+            return 0;
+        }
+
+        search->has_matches = true;
+        search->matches_mask = needs_escape_mask;
+        search->chunk_base = search->ptr;
+        return sse2_next_match(search);
+    }
+
+    if (search->ptr < search->end) {
+        return search_escape_basic(search);
+    }
+
+    search_flush(search);
+    return 0;
+}
+
+#endif /* HAVE_SIMD_SSE2 */
+
+#endif /* ENABLE_SIMD */
+
 static const unsigned char script_safe_escape_table[256] = {
    // ASCII Control Characters
     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@ -990,6 +1307,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
    search.cursor = search.ptr;
    search.end = search.ptr + len;

+#ifdef ENABLE_SIMD
+    search.matches_mask = 0;
+    search.has_matches = false;
+    search.chunk_base = NULL;
+#endif /* ENABLE_SIMD */
+
    switch(rb_enc_str_coderange(obj)) {
        case ENC_CODERANGE_7BIT:
        case ENC_CODERANGE_VALID:
@ -1853,4 +2176,23 @@ void Init_generator(void)
    binary_encindex = rb_ascii8bit_encindex();

    rb_require("json/ext/generator/state");
+
+
+    switch(find_simd_implementation()) {
+#ifdef ENABLE_SIMD
+#ifdef HAVE_SIMD_NEON
+        case SIMD_NEON:
+            search_escape_basic_impl = search_escape_basic_neon;
+            break;
+#endif /* HAVE_SIMD_NEON */
+#ifdef HAVE_SIMD_SSE2
+        case SIMD_SSE2:
+            search_escape_basic_impl = search_escape_basic_sse2;
+            break;
+#endif /* HAVE_SIMD_SSE2 */
+#endif /* ENABLE_SIMD */
+        default:
+            search_escape_basic_impl = search_escape_basic;
+            break;
+    }
 }
--- a/ext/json/generator/simd.h
+++ b/ext/json/generator/simd.h
@ -0,0 +1,112 @@
+#include "extconf.h"
+
+typedef enum {
+    SIMD_NONE,
+    SIMD_NEON,
+    SIMD_SSE2
+} SIMD_Implementation;
+
+#ifdef ENABLE_SIMD
+
+#ifdef __clang__
+  #if __has_builtin(__builtin_ctzll)
+    #define HAVE_BUILTIN_CTZLL 1
+  #else
+    #define HAVE_BUILTIN_CTZLL 0
+  #endif
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+  #define HAVE_BUILTIN_CTZLL 1
+#else
+  #define HAVE_BUILTIN_CTZLL 0
+#endif
+
+static inline uint32_t trailing_zeros64(uint64_t input) {
+#if HAVE_BUILTIN_CTZLL
+  return __builtin_ctzll(input);
+#else
+  uint32_t trailing_zeros = 0;
+  uint64_t temp = input;
+  while ((temp & 1) == 0 && temp > 0) {
+    trailing_zeros++;
+    temp >>= 1;
+  }
+  return trailing_zeros;
+#endif
+}
+
+static inline int trailing_zeros(int input) {
+  #if HAVE_BUILTIN_CTZLL
+    return __builtin_ctz(input);
+  #else
+    int trailing_zeros = 0;
+    int temp = input;
+    while ((temp & 1) == 0 && temp > 0) {
+      trailing_zeros++;
+      temp >>= 1;
+    }
+    return trailing_zeros;
+  #endif
+}
+
+#define SIMD_MINIMUM_THRESHOLD 6
+
+#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
+#include <arm_neon.h>
+
+#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
+static SIMD_Implementation find_simd_implementation(void) {
+    return SIMD_NEON;
+}
+
+#define HAVE_SIMD_NEON 1
+
+uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
+  uint8x16x4_t tab;
+  tab.val[0] = vld1q_u8(table);
+  tab.val[1] = vld1q_u8(table+16);
+  tab.val[2] = vld1q_u8(table+32);
+  tab.val[3] = vld1q_u8(table+48);
+  return tab;
+}
+
+#endif /* ARM Neon Support.*/
+
+#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+
+#ifdef HAVE_X86INTRIN_H
+#include <x86intrin.h>
+
+#define HAVE_SIMD_SSE2 1
+
+#ifdef HAVE_CPUID_H
+#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
+
+#include <cpuid.h>
+#endif /* HAVE_CPUID_H */
+
+static SIMD_Implementation find_simd_implementation(void) {
+
+#if defined(__GNUC__ ) || defined(__clang__)
+#ifdef __GNUC__ 
+    __builtin_cpu_init();
+#endif /* __GNUC__  */
+
+    // TODO Revisit. I think the SSE version now only uses SSE2 instructions.
+    if (__builtin_cpu_supports("sse2")) {
+        return SIMD_SSE2;
+    }
+#endif /* __GNUC__ || __clang__*/
+
+    return SIMD_NONE;
+}
+
+#endif /* HAVE_X86INTRIN_H */
+#endif /* X86_64 Support */
+
+#endif /* ENABLE_SIMD */
+
+#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
+static SIMD_Implementation find_simd_implementation(void) {
+    return SIMD_NONE;
+}
+#endif
--- a/test/json/json_generator_test.rb
+++ b/test/json/json_generator_test.rb
@ -410,18 +410,34 @@ class JSONGeneratorTest < Test::Unit::TestCase
    json = '["\\\\.(?i:gif|jpe?g|png)$"]'
    assert_equal json, generate(data)
    #
-    data = [ '\\"' ]
-    json = '["\\\\\""]'
+    data = [ '\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$' ]
+    json = '["\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$"]'
+    assert_equal json, generate(data)
+    #
+    data = [ '\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"' ]
+    json = '["\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\""]'
    assert_equal json, generate(data)
    #
    data = [ '/' ]
    json = '["/"]'
    assert_equal json, generate(data)
    #
+    data = [ '////////////////////////////////////////////////////////////////////////////////////' ]
+    json = '["////////////////////////////////////////////////////////////////////////////////////"]'
+    assert_equal json, generate(data)
+    #
    data = [ '/' ]
    json = '["\/"]'
    assert_equal json, generate(data, :script_safe => true)
    #
+    data = [ '///////////' ]
+    json = '["\/\/\/\/\/\/\/\/\/\/\/"]'
+    assert_equal json, generate(data, :script_safe => true)
+    #
+    data = [ '///////////////////////////////////////////////////////' ]
+    json = '["\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/"]'
+    assert_equal json, generate(data, :script_safe => true)
+    #
    data = [ "\u2028\u2029" ]
    json = '["\u2028\u2029"]'
    assert_equal json, generate(data, :script_safe => true)
@ -438,6 +454,10 @@ class JSONGeneratorTest < Test::Unit::TestCase
    json = '["\""]'
    assert_equal json, generate(data)
    #
+    data = ['"""""""""""""""""""""""""']
+    json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
+    assert_equal json, generate(data)
+    #
    data = ["'"]
    json = '["\\\'"]'
    assert_equal '["\'"]', generate(data)
@ -445,6 +465,72 @@ class JSONGeneratorTest < Test::Unit::TestCase
    data = ["倩", "瀨"]
    json = '["倩","瀨"]'
    assert_equal json, generate(data, script_safe: true)
+    #
+    data = '["This is a "test" of the emergency broadcast system."]'
+    json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\""
+    assert_equal json, generate(data)
+    #
+    data = '\tThis is a test of the emergency broadcast system.'
+    json = "\"\\\\tThis is a test of the emergency broadcast system.\""
+    assert_equal json, generate(data)
+    #
+    data = 'This\tis a test of the emergency broadcast system.'
+    json = "\"This\\\\tis a test of the emergency broadcast system.\""
+    assert_equal json, generate(data)
+    #
+    data = 'This is\ta test of the emergency broadcast system.'
+    json = "\"This is\\\\ta test of the emergency broadcast system.\""
+    assert_equal json, generate(data)
+    #
+    data = 'This is a test of the emergency broadcast\tsystem.'
+    json = "\"This is a test of the emergency broadcast\\\\tsystem.\""
+    assert_equal json, generate(data)
+    #
+    data = 'This is a test of the emergency broadcast\tsystem.\n'
+    json = "\"This is a test of the emergency broadcast\\\\tsystem.\\\\n\""
+    assert_equal json, generate(data)
+    data = '"' * 15
+    json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\""
+    assert_equal json, generate(data)
+    data = "\"\"\"\"\"\"\"\"\"\"\"\"\"\"a"
+    json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"a\""
+    assert_equal json, generate(data)
+    data = "\u0001\u0001\u0001\u0001"
+    json = "\"\\u0001\\u0001\\u0001\\u0001\""
+    assert_equal json, generate(data)
+    data = "\u0001a\u0001a\u0001a\u0001a"
+    json = "\"\\u0001a\\u0001a\\u0001a\\u0001a\""
+    assert_equal json, generate(data)
+    data = "\u0001aa\u0001aa"
+    json = "\"\\u0001aa\\u0001aa\""
+    assert_equal json, generate(data)
+    data = "\u0001aa\u0001aa\u0001aa"
+    json = "\"\\u0001aa\\u0001aa\\u0001aa\""
+    assert_equal json, generate(data)
+    data = "\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa"
+    json = "\"\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\""
+    assert_equal json, generate(data)
+    data = "\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002"
+    json = "\"\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\""
+    assert_equal json, generate(data)
+    data = "ab\u0002c"
+    json = "\"ab\\u0002c\""
+    assert_equal json, generate(data)
+    data = "ab\u0002cab\u0002cab\u0002cab\u0002c"
+    json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
+    assert_equal json, generate(data)
+    data = "ab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002c"
+    json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
+    assert_equal json, generate(data)
+    data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f"
+    json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\""
+    assert_equal json, generate(data)
+    data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b"
+    json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\""
+    assert_equal json, generate(data)
+    data = "a\n\t\f\b\n\t\f\b\n\t\f\b\n\t"
+    json = "\"a\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\""
+    assert_equal json, generate(data)
  end

  def test_string_subclass