[ruby/json] Introduce ARM Neon and SSE2 SIMD.

(https://github.com/ruby/json/pull/743) See the pull request for the long development history: https://github.com/ruby/json/pull/743 ``` == Encoding activitypub.json (52595 bytes) ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 2.913k i/100ms Calculating ------------------------------------- after 29.377k (± 2.0%) i/s (34.04 μs/i) - 148.563k in 5.059169s Comparison: before: 23314.1 i/s after: 29377.3 i/s - 1.26x faster == Encoding citm_catalog.json (500298 bytes) ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 152.000 i/100ms Calculating ------------------------------------- after 1.569k (± 0.8%) i/s (637.49 μs/i) - 7.904k in 5.039001s Comparison: before: 1485.6 i/s after: 1568.7 i/s - 1.06x faster == Encoding twitter.json (466906 bytes) ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 309.000 i/100ms Calculating ------------------------------------- after 3.115k (± 3.1%) i/s (321.01 μs/i) - 15.759k in 5.063776s Comparison: before: 2508.3 i/s after: 3115.2 i/s - 1.24x faster ``` https://github.com/ruby/json/commit/49003523da
2025-04-28 07:57:10 -05:00 · 2025-04-28 07:57:10 -05:00 · a3ec53bbb0
commit a3ec53bbb0
parent 7f0c6d30d3
4 changed files with 584 additions and 13 deletions
--- a/ext/json/generator/extconf.rb
+++ b/ext/json/generator/extconf.rb
@ -6,5 +6,36 @@ if RUBY_ENGINE == 'truffleruby'
 else
  append_cflags("-std=c99")
  $defs << "-DJSON_GENERATOR"
  if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
    if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
      # Try to compile a small program using NEON instructions
      if have_header('arm_neon.h')
        have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
          #include <arm_neon.h>
          int main() {
              uint8x16_t test = vdupq_n_u8(32);
              return 0;
          }
        SRC
          $defs.push("-DENABLE_SIMD")
      end
    end
    if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC', opt='-msse2')
      #include <x86intrin.h>
      int main() {
          __m128i test = _mm_set1_epi8(32);
          return 0;
      }
      SRC
        $defs.push("-DENABLE_SIMD")
    end
    have_header('cpuid.h')
  end
  create_header
  create_makefile 'json/ext/generator'
 end
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@ -5,6 +5,8 @@
 #include <math.h>
 #include <ctype.h>
 #include "simd.h"
 /* ruby api and some helpers */
 typedef struct JSON_Generator_StateStruct {
@ -109,12 +111,40 @@ typedef struct _search_state {
    const char *end;
    const char *cursor;
    FBuffer *buffer;
 #ifdef ENABLE_SIMD
    const char *chunk_base;
    const char *chunk_end;
    bool has_matches;
 #ifdef HAVE_SIMD_NEON
    uint64_t matches_mask;
 #elif HAVE_SIMD_SSE2
    int matches_mask;
 #else
 #error "Unknown SIMD Implementation."
 #endif /* HAVE_SIMD_NEON */
 #endif /* ENABLE_SIMD */ 
 } search_state;
-static inline void search_flush(search_state *search)
+#if (defined(__GNUC__ ) || defined(__clang__))
 #define FORCE_INLINE __attribute__((always_inline))
 #else
 #define FORCE_INLINE
 #endif
 static inline FORCE_INLINE void search_flush(search_state *search)
 {
-    fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
+    // Do not remove this conditional without profiling, specifically escape-heavy text.
-    search->cursor = search->ptr;
+    // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
    // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
    // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
    // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
    // nothing needs to be flushed, we can save a few memory references with this conditional.
    if (search->ptr > search->cursor) {
        fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
        search->cursor = search->ptr;
    }
 }
 static const unsigned char escape_table_basic[256] = {
@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 };
 static unsigned char (*search_escape_basic_impl)(search_state *);
 static inline unsigned char search_escape_basic(search_state *search)
 {
    while (search->ptr < search->end) {
@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
    return 0;
 }
-static inline void escape_UTF8_char_basic(search_state *search) {
+static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
 {
    const unsigned char ch = (unsigned char)*search->ptr;
    switch (ch) {
        case '"':  fbuffer_append(search->buffer, "\\\"", 2); break;
@ -156,11 +189,15 @@ static inline void escape_UTF8_char_basic(search_state *search) {
        case '\r': fbuffer_append(search->buffer, "\\r", 2);  break;
        case '\t': fbuffer_append(search->buffer, "\\t", 2);  break;
        default: {
-            const char *hexdig = "0123456789abcdef";
+            if (ch < ' ') {
-            char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
+                const char *hexdig = "0123456789abcdef";
-            scratch[4] = hexdig[(ch >> 4) & 0xf];
+                char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
-            scratch[5] = hexdig[ch & 0xf];
+                scratch[4] = hexdig[(ch >> 4) & 0xf];
-            fbuffer_append(search->buffer, scratch, 6);
+                scratch[5] = hexdig[ch & 0xf];
                fbuffer_append(search->buffer, scratch, 6);
            } else {
                fbuffer_append_char(search->buffer, ch);
            }
            break;
        }
    }
@ -186,12 +223,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
 */
 static inline void convert_UTF8_to_JSON(search_state *search)
 {
-    while (search_escape_basic(search)) {
+    while (search_escape_basic_impl(search)) {
        escape_UTF8_char_basic(search);
    }
 }
-static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) {
+static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
 {
    const unsigned char ch = (unsigned char)*search->ptr;
    switch (ch_len) {
        case 1: {
@ -227,6 +265,285 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
    search->cursor = (search->ptr += ch_len);
 }
 #ifdef ENABLE_SIMD
 static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
 {
    // Flush the buffer so everything up until the last 'len' characters are unflushed.
    search_flush(search);
    FBuffer *buf = search->buffer;
    fbuffer_inc_capa(buf, vec_len);
    char *s = (buf->ptr + buf->len);
    // Pad the buffer with dummy characters that won't need escaping.
    // This seem wateful at first sight, but memset of vector length is very fast.
    memset(s, 'X', vec_len);
    // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
    // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
    MEMCPY(s, search->ptr, char, len);
    return s;
 }
 #ifdef HAVE_SIMD_NEON
 static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
 {
    uint64_t mask = search->matches_mask;
    uint32_t index = trailing_zeros64(mask) >> 2;
    // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
    // If we want to use a similar approach for full escaping we'll need to ensure:
    //     search->chunk_base + index >= search->ptr
    // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
    // is one byte after the previous match then:
    //     search->chunk_base + index == search->ptr
    search->ptr = search->chunk_base + index;
    mask &= mask - 1;
    search->matches_mask = mask;
    search_flush(search);
    return 1;
 }
 // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
 static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
 {
    const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
    const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
    return mask & 0x8888888888888888ull;
 }
 static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
 {
    uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
    const uint8x16_t lower_bound = vdupq_n_u8(' '); 
    const uint8x16_t backslash   = vdupq_n_u8('\\');
    const uint8x16_t dblquote    = vdupq_n_u8('\"');
    uint8x16_t too_low       = vcltq_u8(chunk, lower_bound);
    uint8x16_t has_backslash = vceqq_u8(chunk, backslash);
    uint8x16_t has_dblquote  = vceqq_u8(chunk, dblquote);
    uint8x16_t needs_escape  = vorrq_u8(too_low, vorrq_u8(has_backslash, has_dblquote));
    return neon_match_mask(needs_escape);
 }
 static inline unsigned char search_escape_basic_neon(search_state *search)
 {
    if (RB_UNLIKELY(search->has_matches)) {
        // There are more matches if search->matches_mask > 0.
        if (search->matches_mask > 0) {
            return neon_next_match(search);
        } else {
            // neon_next_match will only advance search->ptr up to the last matching character. 
            // Skip over any characters in the last chunk that occur after the last match.
            search->has_matches = false;
            search->ptr = search->chunk_end;
        }
    }
    /*
    * The code below implements an SIMD-based algorithm to determine if N bytes at a time
    * need to be escaped. 
    * 
    * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
    * 
    * The explanation will be limited to the first 8 bytes of the string for simplicity. However
    * the vector insructions may work on larger vectors.
    * 
    * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
    * 
    * lower_bound: [20 20 20 20 20 20 20 20] 
    * backslash:   [5C 5C 5C 5C 5C 5C 5C 5C] 
    * dblquote:    [22 22 22 22 22 22 22 22] 
    * 
    * Next we load the first chunk of the ptr: 
    * [22 54 65 5C 73 74 69 6E] ("  T  e  \  s  t  i  n)
    * 
    * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
    * as no bytes are less than 32 (0x20):
    * [0 0 0 0 0 0 0 0]
    * 
    * Next, we check if any byte in chunk is equal to a backslash:
    * [0 0 0 FF 0 0 0 0]
    * 
    * Finally we check if any byte in chunk is equal to a double quote:
    * [FF 0 0 0 0 0 0 0] 
    * 
    * Now we have three vectors where each byte indicates if the corresponding byte in chunk
    * needs to be escaped. We combine these vectors with a series of logical OR instructions.
    * This is the needs_escape vector and it is equal to:
    * [FF 0 0 FF 0 0 0 0] 
    * 
    * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
    * the values in the vector. This computes how many bytes need to be escaped within this chunk.
    * 
    * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
    * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
    * have at least one byte that needs to be escaped.
    */
    while (search->ptr + sizeof(uint8x16_t) <= search->end) {
        uint64_t mask = neon_rules_update(search->ptr);
        if (!mask) {
            search->ptr += sizeof(uint8x16_t);
            continue;
        }
        search->matches_mask = mask;
        search->has_matches = true;
        search->chunk_base = search->ptr;
        search->chunk_end = search->ptr + sizeof(uint8x16_t);
        return neon_next_match(search);
    }
    // There are fewer than 16 bytes left. 
    unsigned long remaining = (search->end - search->ptr);
    if (remaining >= SIMD_MINIMUM_THRESHOLD) {
        char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
        uint64_t mask = neon_rules_update(s);
        if (!mask) {
            // Nothing to escape, ensure search_flush doesn't do anything by setting 
            // search->cursor to search->ptr.
            search->buffer->len += remaining;
            search->ptr = search->end;
            search->cursor = search->end;
            return 0;
        }
        search->matches_mask = mask;
        search->has_matches = true;
        search->chunk_end = search->end;
        search->chunk_base = search->ptr;
        return neon_next_match(search);
    }
    if (search->ptr < search->end) {
        return search_escape_basic(search);
    }
    search_flush(search);
    return 0;
 }
 #endif /* HAVE_SIMD_NEON */
 #ifdef HAVE_SIMD_SSE2
 #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
 #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
 #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
 #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
 static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
 {
    int mask = search->matches_mask;
    int index = trailing_zeros(mask);
    // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
    // If we want to use a similar approach for full escaping we'll need to ensure:
    //     search->chunk_base + index >= search->ptr
    // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
    // is one byte after the previous match then:
    //     search->chunk_base + index == search->ptr
    search->ptr = search->chunk_base + index;
    mask &= mask - 1;
    search->matches_mask = mask;
    search_flush(search);
    return 1;
 }
 #if defined(__clang__) || defined(__GNUC__)
 #define TARGET_SSE2 __attribute__((target("sse2")))
 #else
 #define TARGET_SSE2
 #endif
 static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
 {
    __m128i chunk         = _mm_loadu_si128((__m128i const*)ptr);
    const __m128i lower_bound = _mm_set1_epi8(' '); 
    const __m128i backslash   = _mm_set1_epi8('\\');
    const __m128i dblquote    = _mm_set1_epi8('\"');
    __m128i too_low       = _mm_cmplt_epu8(chunk, lower_bound);
    __m128i has_backslash = _mm_cmpeq_epi8(chunk, backslash);
    __m128i has_dblquote  = _mm_cmpeq_epi8(chunk, dblquote);
    __m128i needs_escape  = _mm_or_si128(too_low, _mm_or_si128(has_backslash, has_dblquote));
    return _mm_movemask_epi8(needs_escape);
 }
 static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
 {
    if (RB_UNLIKELY(search->has_matches)) {
        // There are more matches if search->matches_mask > 0.
        if (search->matches_mask > 0) {
            return sse2_next_match(search);
        } else {
            // sse2_next_match will only advance search->ptr up to the last matching character. 
            // Skip over any characters in the last chunk that occur after the last match.
            search->has_matches = false;
            if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
                search->ptr = search->end;
            } else {
                search->ptr = search->chunk_base + sizeof(__m128i);
            }
        }
    }
    while (search->ptr + sizeof(__m128i) <= search->end) {
        int needs_escape_mask = sse2_update(search->ptr);
        if (needs_escape_mask == 0) {
            search->ptr += sizeof(__m128i);
            continue;
        }
        search->has_matches = true;
        search->matches_mask = needs_escape_mask;
        search->chunk_base = search->ptr;
        return sse2_next_match(search);
    }
    // There are fewer than 16 bytes left. 
    unsigned long remaining = (search->end - search->ptr);
    if (remaining >= SIMD_MINIMUM_THRESHOLD) {
        char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
        int needs_escape_mask = sse2_update(s);
        if (needs_escape_mask == 0) {
            // Nothing to escape, ensure search_flush doesn't do anything by setting 
            // search->cursor to search->ptr.
            search->buffer->len += remaining;
            search->ptr = search->end;
            search->cursor = search->end;
            return 0;
        }
        search->has_matches = true;
        search->matches_mask = needs_escape_mask;
        search->chunk_base = search->ptr;
        return sse2_next_match(search);
    }
    if (search->ptr < search->end) {
        return search_escape_basic(search);
    }
    search_flush(search);
    return 0;
 }
 #endif /* HAVE_SIMD_SSE2 */
 #endif /* ENABLE_SIMD */
 static const unsigned char script_safe_escape_table[256] = {
    // ASCII Control Characters
     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@ -990,6 +1307,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
    search.cursor = search.ptr;
    search.end = search.ptr + len;
 #ifdef ENABLE_SIMD
    search.matches_mask = 0;
    search.has_matches = false;
    search.chunk_base = NULL;
 #endif /* ENABLE_SIMD */
    switch(rb_enc_str_coderange(obj)) {
        case ENC_CODERANGE_7BIT:
        case ENC_CODERANGE_VALID:
@ -1853,4 +2176,23 @@ void Init_generator(void)
    binary_encindex = rb_ascii8bit_encindex();
    rb_require("json/ext/generator/state");
    switch(find_simd_implementation()) {
 #ifdef ENABLE_SIMD
 #ifdef HAVE_SIMD_NEON
        case SIMD_NEON:
            search_escape_basic_impl = search_escape_basic_neon;
            break;
 #endif /* HAVE_SIMD_NEON */
 #ifdef HAVE_SIMD_SSE2
        case SIMD_SSE2:
            search_escape_basic_impl = search_escape_basic_sse2;
            break;
 #endif /* HAVE_SIMD_SSE2 */
 #endif /* ENABLE_SIMD */
        default:
            search_escape_basic_impl = search_escape_basic;
            break;
    }
 }
--- a/ext/json/generator/simd.h
+++ b/ext/json/generator/simd.h
@ -0,0 +1,112 @@
 #include "extconf.h"
 typedef enum {
    SIMD_NONE,
    SIMD_NEON,
    SIMD_SSE2
 } SIMD_Implementation;
 #ifdef ENABLE_SIMD
 #ifdef __clang__
  #if __has_builtin(__builtin_ctzll)
    #define HAVE_BUILTIN_CTZLL 1
  #else
    #define HAVE_BUILTIN_CTZLL 0
  #endif
 #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
  #define HAVE_BUILTIN_CTZLL 1
 #else
  #define HAVE_BUILTIN_CTZLL 0
 #endif
 static inline uint32_t trailing_zeros64(uint64_t input) {
 #if HAVE_BUILTIN_CTZLL
  return __builtin_ctzll(input);
 #else
  uint32_t trailing_zeros = 0;
  uint64_t temp = input;
  while ((temp & 1) == 0 && temp > 0) {
    trailing_zeros++;
    temp >>= 1;
  }
  return trailing_zeros;
 #endif
 }
 static inline int trailing_zeros(int input) {
  #if HAVE_BUILTIN_CTZLL
    return __builtin_ctz(input);
  #else
    int trailing_zeros = 0;
    int temp = input;
    while ((temp & 1) == 0 && temp > 0) {
      trailing_zeros++;
      temp >>= 1;
    }
    return trailing_zeros;
  #endif
 }
 #define SIMD_MINIMUM_THRESHOLD 6
 #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
 #include <arm_neon.h>
 #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
 static SIMD_Implementation find_simd_implementation(void) {
    return SIMD_NEON;
 }
 #define HAVE_SIMD_NEON 1
 uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
  uint8x16x4_t tab;
  tab.val[0] = vld1q_u8(table);
  tab.val[1] = vld1q_u8(table+16);
  tab.val[2] = vld1q_u8(table+32);
  tab.val[3] = vld1q_u8(table+48);
  return tab;
 }
 #endif /* ARM Neon Support.*/
 #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
 #ifdef HAVE_X86INTRIN_H
 #include <x86intrin.h>
 #define HAVE_SIMD_SSE2 1
 #ifdef HAVE_CPUID_H
 #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
 #include <cpuid.h>
 #endif /* HAVE_CPUID_H */
 static SIMD_Implementation find_simd_implementation(void) {
 #if defined(__GNUC__ ) || defined(__clang__)
 #ifdef __GNUC__ 
    __builtin_cpu_init();
 #endif /* __GNUC__  */
    // TODO Revisit. I think the SSE version now only uses SSE2 instructions.
    if (__builtin_cpu_supports("sse2")) {
        return SIMD_SSE2;
    }
 #endif /* __GNUC__ || __clang__*/
    return SIMD_NONE;
 }
 #endif /* HAVE_X86INTRIN_H */
 #endif /* X86_64 Support */
 #endif /* ENABLE_SIMD */
 #ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
 static SIMD_Implementation find_simd_implementation(void) {
    return SIMD_NONE;
 }
 #endif
--- a/test/json/json_generator_test.rb
+++ b/test/json/json_generator_test.rb
@ -410,18 +410,34 @@ class JSONGeneratorTest < Test::Unit::TestCase
    json = '["\\\\.(?i:gif|jpe?g|png)$"]'
    assert_equal json, generate(data)
    #
-    data = [ '\\"' ]
+    data = [ '\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$' ]
-    json = '["\\\\\""]'
+    json = '["\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$"]'
    assert_equal json, generate(data)
    #
    data = [ '\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"' ]
    json = '["\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\""]'
    assert_equal json, generate(data)
    #
    data = [ '/' ]
    json = '["/"]'
    assert_equal json, generate(data)
    #
    data = [ '////////////////////////////////////////////////////////////////////////////////////' ]
    json = '["////////////////////////////////////////////////////////////////////////////////////"]'
    assert_equal json, generate(data)
    #
    data = [ '/' ]
    json = '["\/"]'
    assert_equal json, generate(data, :script_safe => true)
    #
    data = [ '///////////' ]
    json = '["\/\/\/\/\/\/\/\/\/\/\/"]'
    assert_equal json, generate(data, :script_safe => true)
    #
    data = [ '///////////////////////////////////////////////////////' ]
    json = '["\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/"]'
    assert_equal json, generate(data, :script_safe => true)
    #
    data = [ "\u2028\u2029" ]
    json = '["\u2028\u2029"]'
    assert_equal json, generate(data, :script_safe => true)
@ -438,6 +454,10 @@ class JSONGeneratorTest < Test::Unit::TestCase
    json = '["\""]'
    assert_equal json, generate(data)
    #
    data = ['"""""""""""""""""""""""""']
    json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
    assert_equal json, generate(data)
    #
    data = ["'"]
    json = '["\\\'"]'
    assert_equal '["\'"]', generate(data)
@ -445,6 +465,72 @@ class JSONGeneratorTest < Test::Unit::TestCase
    data = ["倩", "瀨"]
    json = '["倩","瀨"]'
    assert_equal json, generate(data, script_safe: true)
    #
    data = '["This is a "test" of the emergency broadcast system."]'
    json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\""
    assert_equal json, generate(data)
    #
    data = '\tThis is a test of the emergency broadcast system.'
    json = "\"\\\\tThis is a test of the emergency broadcast system.\""
    assert_equal json, generate(data)
    #
    data = 'This\tis a test of the emergency broadcast system.'
    json = "\"This\\\\tis a test of the emergency broadcast system.\""
    assert_equal json, generate(data)
    #
    data = 'This is\ta test of the emergency broadcast system.'
    json = "\"This is\\\\ta test of the emergency broadcast system.\""
    assert_equal json, generate(data)
    #
    data = 'This is a test of the emergency broadcast\tsystem.'
    json = "\"This is a test of the emergency broadcast\\\\tsystem.\""
    assert_equal json, generate(data)
    #
    data = 'This is a test of the emergency broadcast\tsystem.\n'
    json = "\"This is a test of the emergency broadcast\\\\tsystem.\\\\n\""
    assert_equal json, generate(data)
    data = '"' * 15
    json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\""
    assert_equal json, generate(data)
    data = "\"\"\"\"\"\"\"\"\"\"\"\"\"\"a"
    json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"a\""
    assert_equal json, generate(data)
    data = "\u0001\u0001\u0001\u0001"
    json = "\"\\u0001\\u0001\\u0001\\u0001\""
    assert_equal json, generate(data)
    data = "\u0001a\u0001a\u0001a\u0001a"
    json = "\"\\u0001a\\u0001a\\u0001a\\u0001a\""
    assert_equal json, generate(data)
    data = "\u0001aa\u0001aa"
    json = "\"\\u0001aa\\u0001aa\""
    assert_equal json, generate(data)
    data = "\u0001aa\u0001aa\u0001aa"
    json = "\"\\u0001aa\\u0001aa\\u0001aa\""
    assert_equal json, generate(data)
    data = "\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa"
    json = "\"\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\""
    assert_equal json, generate(data)
    data = "\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002"
    json = "\"\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\""
    assert_equal json, generate(data)
    data = "ab\u0002c"
    json = "\"ab\\u0002c\""
    assert_equal json, generate(data)
    data = "ab\u0002cab\u0002cab\u0002cab\u0002c"
    json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
    assert_equal json, generate(data)
    data = "ab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002c"
    json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
    assert_equal json, generate(data)
    data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f"
    json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\""
    assert_equal json, generate(data)
    data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b"
    json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\""
    assert_equal json, generate(data)
    data = "a\n\t\f\b\n\t\f\b\n\t\f\b\n\t"
    json = "\"a\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\""
    assert_equal json, generate(data)
  end
  def test_string_subclass