[ruby/json] Introduce ARM Neon and SSE2 SIMD.

(https://github.com/ruby/json/pull/743)

See the pull request for the long development history: https://github.com/ruby/json/pull/743

```
== Encoding activitypub.json (52595 bytes)
ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after     2.913k i/100ms
Calculating -------------------------------------
               after     29.377k (± 2.0%) i/s   (34.04 μs/i) -    148.563k in   5.059169s

Comparison:
              before:    23314.1 i/s
               after:    29377.3 i/s - 1.26x  faster

== Encoding citm_catalog.json (500298 bytes)
ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after   152.000 i/100ms
Calculating -------------------------------------
               after      1.569k (± 0.8%) i/s  (637.49 μs/i) -      7.904k in   5.039001s

Comparison:
              before:     1485.6 i/s
               after:     1568.7 i/s - 1.06x  faster

== Encoding twitter.json (466906 bytes)
ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after   309.000 i/100ms
Calculating -------------------------------------
               after      3.115k (± 3.1%) i/s  (321.01 μs/i) -     15.759k in   5.063776s

Comparison:
              before:     2508.3 i/s
               after:     3115.2 i/s - 1.24x  faster
```

https://github.com/ruby/json/commit/49003523da
This commit is contained in:
Scott Myron 2025-04-28 07:57:10 -05:00 committed by Jean Boussier
parent 7f0c6d30d3
commit a3ec53bbb0
4 changed files with 584 additions and 13 deletions

View File

@ -6,5 +6,36 @@ if RUBY_ENGINE == 'truffleruby'
else
append_cflags("-std=c99")
$defs << "-DJSON_GENERATOR"
if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
# Try to compile a small program using NEON instructions
if have_header('arm_neon.h')
have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
#include <arm_neon.h>
int main() {
uint8x16_t test = vdupq_n_u8(32);
return 0;
}
SRC
$defs.push("-DENABLE_SIMD")
end
end
if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC', opt='-msse2')
#include <x86intrin.h>
int main() {
__m128i test = _mm_set1_epi8(32);
return 0;
}
SRC
$defs.push("-DENABLE_SIMD")
end
have_header('cpuid.h')
end
create_header
create_makefile 'json/ext/generator'
end

View File

@ -5,6 +5,8 @@
#include <math.h>
#include <ctype.h>
#include "simd.h"
/* ruby api and some helpers */
typedef struct JSON_Generator_StateStruct {
@ -109,12 +111,40 @@ typedef struct _search_state {
const char *end;
const char *cursor;
FBuffer *buffer;
#ifdef ENABLE_SIMD
const char *chunk_base;
const char *chunk_end;
bool has_matches;
#ifdef HAVE_SIMD_NEON
uint64_t matches_mask;
#elif HAVE_SIMD_SSE2
int matches_mask;
#else
#error "Unknown SIMD Implementation."
#endif /* HAVE_SIMD_NEON */
#endif /* ENABLE_SIMD */
} search_state;
static inline void search_flush(search_state *search)
#if (defined(__GNUC__ ) || defined(__clang__))
#define FORCE_INLINE __attribute__((always_inline))
#else
#define FORCE_INLINE
#endif
static inline FORCE_INLINE void search_flush(search_state *search)
{
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
search->cursor = search->ptr;
// Do not remove this conditional without profiling, specifically escape-heavy text.
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
// For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
// will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
// nothing needs to be flushed, we can save a few memory references with this conditional.
if (search->ptr > search->cursor) {
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
search->cursor = search->ptr;
}
}
static const unsigned char escape_table_basic[256] = {
@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static unsigned char (*search_escape_basic_impl)(search_state *);
static inline unsigned char search_escape_basic(search_state *search)
{
while (search->ptr < search->end) {
@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
return 0;
}
static inline void escape_UTF8_char_basic(search_state *search) {
static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
{
const unsigned char ch = (unsigned char)*search->ptr;
switch (ch) {
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
@ -156,11 +189,15 @@ static inline void escape_UTF8_char_basic(search_state *search) {
case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
default: {
const char *hexdig = "0123456789abcdef";
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
scratch[4] = hexdig[(ch >> 4) & 0xf];
scratch[5] = hexdig[ch & 0xf];
fbuffer_append(search->buffer, scratch, 6);
if (ch < ' ') {
const char *hexdig = "0123456789abcdef";
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
scratch[4] = hexdig[(ch >> 4) & 0xf];
scratch[5] = hexdig[ch & 0xf];
fbuffer_append(search->buffer, scratch, 6);
} else {
fbuffer_append_char(search->buffer, ch);
}
break;
}
}
@ -186,12 +223,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
*/
static inline void convert_UTF8_to_JSON(search_state *search)
{
while (search_escape_basic(search)) {
while (search_escape_basic_impl(search)) {
escape_UTF8_char_basic(search);
}
}
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) {
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
{
const unsigned char ch = (unsigned char)*search->ptr;
switch (ch_len) {
case 1: {
@ -227,6 +265,285 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
search->cursor = (search->ptr += ch_len);
}
#ifdef ENABLE_SIMD
static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
{
// Flush the buffer so everything up until the last 'len' characters are unflushed.
search_flush(search);
FBuffer *buf = search->buffer;
fbuffer_inc_capa(buf, vec_len);
char *s = (buf->ptr + buf->len);
// Pad the buffer with dummy characters that won't need escaping.
// This seem wateful at first sight, but memset of vector length is very fast.
memset(s, 'X', vec_len);
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
MEMCPY(s, search->ptr, char, len);
return s;
}
#ifdef HAVE_SIMD_NEON
static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
{
uint64_t mask = search->matches_mask;
uint32_t index = trailing_zeros64(mask) >> 2;
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
// If we want to use a similar approach for full escaping we'll need to ensure:
// search->chunk_base + index >= search->ptr
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
// is one byte after the previous match then:
// search->chunk_base + index == search->ptr
search->ptr = search->chunk_base + index;
mask &= mask - 1;
search->matches_mask = mask;
search_flush(search);
return 1;
}
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
{
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
return mask & 0x8888888888888888ull;
}
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
{
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
const uint8x16_t lower_bound = vdupq_n_u8(' ');
const uint8x16_t backslash = vdupq_n_u8('\\');
const uint8x16_t dblquote = vdupq_n_u8('\"');
uint8x16_t too_low = vcltq_u8(chunk, lower_bound);
uint8x16_t has_backslash = vceqq_u8(chunk, backslash);
uint8x16_t has_dblquote = vceqq_u8(chunk, dblquote);
uint8x16_t needs_escape = vorrq_u8(too_low, vorrq_u8(has_backslash, has_dblquote));
return neon_match_mask(needs_escape);
}
static inline unsigned char search_escape_basic_neon(search_state *search)
{
if (RB_UNLIKELY(search->has_matches)) {
// There are more matches if search->matches_mask > 0.
if (search->matches_mask > 0) {
return neon_next_match(search);
} else {
// neon_next_match will only advance search->ptr up to the last matching character.
// Skip over any characters in the last chunk that occur after the last match.
search->has_matches = false;
search->ptr = search->chunk_end;
}
}
/*
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
* need to be escaped.
*
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
*
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
* the vector insructions may work on larger vectors.
*
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
*
* lower_bound: [20 20 20 20 20 20 20 20]
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
* dblquote: [22 22 22 22 22 22 22 22]
*
* Next we load the first chunk of the ptr:
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
*
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
* as no bytes are less than 32 (0x20):
* [0 0 0 0 0 0 0 0]
*
* Next, we check if any byte in chunk is equal to a backslash:
* [0 0 0 FF 0 0 0 0]
*
* Finally we check if any byte in chunk is equal to a double quote:
* [FF 0 0 0 0 0 0 0]
*
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
* This is the needs_escape vector and it is equal to:
* [FF 0 0 FF 0 0 0 0]
*
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
*
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
* have at least one byte that needs to be escaped.
*/
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
uint64_t mask = neon_rules_update(search->ptr);
if (!mask) {
search->ptr += sizeof(uint8x16_t);
continue;
}
search->matches_mask = mask;
search->has_matches = true;
search->chunk_base = search->ptr;
search->chunk_end = search->ptr + sizeof(uint8x16_t);
return neon_next_match(search);
}
// There are fewer than 16 bytes left.
unsigned long remaining = (search->end - search->ptr);
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
uint64_t mask = neon_rules_update(s);
if (!mask) {
// Nothing to escape, ensure search_flush doesn't do anything by setting
// search->cursor to search->ptr.
search->buffer->len += remaining;
search->ptr = search->end;
search->cursor = search->end;
return 0;
}
search->matches_mask = mask;
search->has_matches = true;
search->chunk_end = search->end;
search->chunk_base = search->ptr;
return neon_next_match(search);
}
if (search->ptr < search->end) {
return search_escape_basic(search);
}
search_flush(search);
return 0;
}
#endif /* HAVE_SIMD_NEON */
#ifdef HAVE_SIMD_SSE2
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
{
int mask = search->matches_mask;
int index = trailing_zeros(mask);
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
// If we want to use a similar approach for full escaping we'll need to ensure:
// search->chunk_base + index >= search->ptr
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
// is one byte after the previous match then:
// search->chunk_base + index == search->ptr
search->ptr = search->chunk_base + index;
mask &= mask - 1;
search->matches_mask = mask;
search_flush(search);
return 1;
}
#if defined(__clang__) || defined(__GNUC__)
#define TARGET_SSE2 __attribute__((target("sse2")))
#else
#define TARGET_SSE2
#endif
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
{
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
const __m128i lower_bound = _mm_set1_epi8(' ');
const __m128i backslash = _mm_set1_epi8('\\');
const __m128i dblquote = _mm_set1_epi8('\"');
__m128i too_low = _mm_cmplt_epu8(chunk, lower_bound);
__m128i has_backslash = _mm_cmpeq_epi8(chunk, backslash);
__m128i has_dblquote = _mm_cmpeq_epi8(chunk, dblquote);
__m128i needs_escape = _mm_or_si128(too_low, _mm_or_si128(has_backslash, has_dblquote));
return _mm_movemask_epi8(needs_escape);
}
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
{
if (RB_UNLIKELY(search->has_matches)) {
// There are more matches if search->matches_mask > 0.
if (search->matches_mask > 0) {
return sse2_next_match(search);
} else {
// sse2_next_match will only advance search->ptr up to the last matching character.
// Skip over any characters in the last chunk that occur after the last match.
search->has_matches = false;
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
search->ptr = search->end;
} else {
search->ptr = search->chunk_base + sizeof(__m128i);
}
}
}
while (search->ptr + sizeof(__m128i) <= search->end) {
int needs_escape_mask = sse2_update(search->ptr);
if (needs_escape_mask == 0) {
search->ptr += sizeof(__m128i);
continue;
}
search->has_matches = true;
search->matches_mask = needs_escape_mask;
search->chunk_base = search->ptr;
return sse2_next_match(search);
}
// There are fewer than 16 bytes left.
unsigned long remaining = (search->end - search->ptr);
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
int needs_escape_mask = sse2_update(s);
if (needs_escape_mask == 0) {
// Nothing to escape, ensure search_flush doesn't do anything by setting
// search->cursor to search->ptr.
search->buffer->len += remaining;
search->ptr = search->end;
search->cursor = search->end;
return 0;
}
search->has_matches = true;
search->matches_mask = needs_escape_mask;
search->chunk_base = search->ptr;
return sse2_next_match(search);
}
if (search->ptr < search->end) {
return search_escape_basic(search);
}
search_flush(search);
return 0;
}
#endif /* HAVE_SIMD_SSE2 */
#endif /* ENABLE_SIMD */
static const unsigned char script_safe_escape_table[256] = {
// ASCII Control Characters
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@ -990,6 +1307,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
search.cursor = search.ptr;
search.end = search.ptr + len;
#ifdef ENABLE_SIMD
search.matches_mask = 0;
search.has_matches = false;
search.chunk_base = NULL;
#endif /* ENABLE_SIMD */
switch(rb_enc_str_coderange(obj)) {
case ENC_CODERANGE_7BIT:
case ENC_CODERANGE_VALID:
@ -1853,4 +2176,23 @@ void Init_generator(void)
binary_encindex = rb_ascii8bit_encindex();
rb_require("json/ext/generator/state");
switch(find_simd_implementation()) {
#ifdef ENABLE_SIMD
#ifdef HAVE_SIMD_NEON
case SIMD_NEON:
search_escape_basic_impl = search_escape_basic_neon;
break;
#endif /* HAVE_SIMD_NEON */
#ifdef HAVE_SIMD_SSE2
case SIMD_SSE2:
search_escape_basic_impl = search_escape_basic_sse2;
break;
#endif /* HAVE_SIMD_SSE2 */
#endif /* ENABLE_SIMD */
default:
search_escape_basic_impl = search_escape_basic;
break;
}
}

112
ext/json/generator/simd.h Normal file
View File

@ -0,0 +1,112 @@
#include "extconf.h"
typedef enum {
SIMD_NONE,
SIMD_NEON,
SIMD_SSE2
} SIMD_Implementation;
#ifdef ENABLE_SIMD
#ifdef __clang__
#if __has_builtin(__builtin_ctzll)
#define HAVE_BUILTIN_CTZLL 1
#else
#define HAVE_BUILTIN_CTZLL 0
#endif
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#define HAVE_BUILTIN_CTZLL 1
#else
#define HAVE_BUILTIN_CTZLL 0
#endif
static inline uint32_t trailing_zeros64(uint64_t input) {
#if HAVE_BUILTIN_CTZLL
return __builtin_ctzll(input);
#else
uint32_t trailing_zeros = 0;
uint64_t temp = input;
while ((temp & 1) == 0 && temp > 0) {
trailing_zeros++;
temp >>= 1;
}
return trailing_zeros;
#endif
}
static inline int trailing_zeros(int input) {
#if HAVE_BUILTIN_CTZLL
return __builtin_ctz(input);
#else
int trailing_zeros = 0;
int temp = input;
while ((temp & 1) == 0 && temp > 0) {
trailing_zeros++;
temp >>= 1;
}
return trailing_zeros;
#endif
}
#define SIMD_MINIMUM_THRESHOLD 6
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
#include <arm_neon.h>
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
static SIMD_Implementation find_simd_implementation(void) {
return SIMD_NEON;
}
#define HAVE_SIMD_NEON 1
uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
uint8x16x4_t tab;
tab.val[0] = vld1q_u8(table);
tab.val[1] = vld1q_u8(table+16);
tab.val[2] = vld1q_u8(table+32);
tab.val[3] = vld1q_u8(table+48);
return tab;
}
#endif /* ARM Neon Support.*/
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#ifdef HAVE_X86INTRIN_H
#include <x86intrin.h>
#define HAVE_SIMD_SSE2 1
#ifdef HAVE_CPUID_H
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
#include <cpuid.h>
#endif /* HAVE_CPUID_H */
static SIMD_Implementation find_simd_implementation(void) {
#if defined(__GNUC__ ) || defined(__clang__)
#ifdef __GNUC__
__builtin_cpu_init();
#endif /* __GNUC__ */
// TODO Revisit. I think the SSE version now only uses SSE2 instructions.
if (__builtin_cpu_supports("sse2")) {
return SIMD_SSE2;
}
#endif /* __GNUC__ || __clang__*/
return SIMD_NONE;
}
#endif /* HAVE_X86INTRIN_H */
#endif /* X86_64 Support */
#endif /* ENABLE_SIMD */
#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
static SIMD_Implementation find_simd_implementation(void) {
return SIMD_NONE;
}
#endif

View File

@ -410,18 +410,34 @@ class JSONGeneratorTest < Test::Unit::TestCase
json = '["\\\\.(?i:gif|jpe?g|png)$"]'
assert_equal json, generate(data)
#
data = [ '\\"' ]
json = '["\\\\\""]'
data = [ '\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$' ]
json = '["\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$"]'
assert_equal json, generate(data)
#
data = [ '\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"' ]
json = '["\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\""]'
assert_equal json, generate(data)
#
data = [ '/' ]
json = '["/"]'
assert_equal json, generate(data)
#
data = [ '////////////////////////////////////////////////////////////////////////////////////' ]
json = '["////////////////////////////////////////////////////////////////////////////////////"]'
assert_equal json, generate(data)
#
data = [ '/' ]
json = '["\/"]'
assert_equal json, generate(data, :script_safe => true)
#
data = [ '///////////' ]
json = '["\/\/\/\/\/\/\/\/\/\/\/"]'
assert_equal json, generate(data, :script_safe => true)
#
data = [ '///////////////////////////////////////////////////////' ]
json = '["\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/"]'
assert_equal json, generate(data, :script_safe => true)
#
data = [ "\u2028\u2029" ]
json = '["\u2028\u2029"]'
assert_equal json, generate(data, :script_safe => true)
@ -438,6 +454,10 @@ class JSONGeneratorTest < Test::Unit::TestCase
json = '["\""]'
assert_equal json, generate(data)
#
data = ['"""""""""""""""""""""""""']
json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
assert_equal json, generate(data)
#
data = ["'"]
json = '["\\\'"]'
assert_equal '["\'"]', generate(data)
@ -445,6 +465,72 @@ class JSONGeneratorTest < Test::Unit::TestCase
data = ["", ""]
json = '["倩","瀨"]'
assert_equal json, generate(data, script_safe: true)
#
data = '["This is a "test" of the emergency broadcast system."]'
json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\""
assert_equal json, generate(data)
#
data = '\tThis is a test of the emergency broadcast system.'
json = "\"\\\\tThis is a test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This\tis a test of the emergency broadcast system.'
json = "\"This\\\\tis a test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This is\ta test of the emergency broadcast system.'
json = "\"This is\\\\ta test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This is a test of the emergency broadcast\tsystem.'
json = "\"This is a test of the emergency broadcast\\\\tsystem.\""
assert_equal json, generate(data)
#
data = 'This is a test of the emergency broadcast\tsystem.\n'
json = "\"This is a test of the emergency broadcast\\\\tsystem.\\\\n\""
assert_equal json, generate(data)
data = '"' * 15
json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\""
assert_equal json, generate(data)
data = "\"\"\"\"\"\"\"\"\"\"\"\"\"\"a"
json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"a\""
assert_equal json, generate(data)
data = "\u0001\u0001\u0001\u0001"
json = "\"\\u0001\\u0001\\u0001\\u0001\""
assert_equal json, generate(data)
data = "\u0001a\u0001a\u0001a\u0001a"
json = "\"\\u0001a\\u0001a\\u0001a\\u0001a\""
assert_equal json, generate(data)
data = "\u0001aa\u0001aa"
json = "\"\\u0001aa\\u0001aa\""
assert_equal json, generate(data)
data = "\u0001aa\u0001aa\u0001aa"
json = "\"\\u0001aa\\u0001aa\\u0001aa\""
assert_equal json, generate(data)
data = "\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa"
json = "\"\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\""
assert_equal json, generate(data)
data = "\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002"
json = "\"\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\""
assert_equal json, generate(data)
data = "ab\u0002c"
json = "\"ab\\u0002c\""
assert_equal json, generate(data)
data = "ab\u0002cab\u0002cab\u0002cab\u0002c"
json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
assert_equal json, generate(data)
data = "ab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002c"
json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
assert_equal json, generate(data)
data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f"
json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\""
assert_equal json, generate(data)
data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b"
json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\""
assert_equal json, generate(data)
data = "a\n\t\f\b\n\t\f\b\n\t\f\b\n\t"
json = "\"a\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\""
assert_equal json, generate(data)
end
def test_string_subclass