[ruby/json] Introduce ARM Neon and SSE2 SIMD.

(https://github.com/ruby/json/pull/743)

See the pull request for the long development history: https://github.com/ruby/json/pull/743

```
== Encoding activitypub.json (52595 bytes)
ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after     2.913k i/100ms
Calculating -------------------------------------
               after     29.377k (± 2.0%) i/s   (34.04 μs/i) -    148.563k in   5.059169s

Comparison:
              before:    23314.1 i/s
               after:    29377.3 i/s - 1.26x  faster

== Encoding citm_catalog.json (500298 bytes)
ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after   152.000 i/100ms
Calculating -------------------------------------
               after      1.569k (± 0.8%) i/s  (637.49 μs/i) -      7.904k in   5.039001s

Comparison:
              before:     1485.6 i/s
               after:     1568.7 i/s - 1.06x  faster

== Encoding twitter.json (466906 bytes)
ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after   309.000 i/100ms
Calculating -------------------------------------
               after      3.115k (± 3.1%) i/s  (321.01 μs/i) -     15.759k in   5.063776s

Comparison:
              before:     2508.3 i/s
               after:     3115.2 i/s - 1.24x  faster
```

https://github.com/ruby/json/commit/49003523da
This commit is contained in:
Scott Myron 2025-04-28 07:57:10 -05:00 committed by Jean Boussier
parent 7f0c6d30d3
commit a3ec53bbb0
4 changed files with 584 additions and 13 deletions

View File

@ -6,5 +6,36 @@ if RUBY_ENGINE == 'truffleruby'
else else
append_cflags("-std=c99") append_cflags("-std=c99")
$defs << "-DJSON_GENERATOR" $defs << "-DJSON_GENERATOR"
if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
# Try to compile a small program using NEON instructions
if have_header('arm_neon.h')
have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
#include <arm_neon.h>
int main() {
uint8x16_t test = vdupq_n_u8(32);
return 0;
}
SRC
$defs.push("-DENABLE_SIMD")
end
end
if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC', opt='-msse2')
#include <x86intrin.h>
int main() {
__m128i test = _mm_set1_epi8(32);
return 0;
}
SRC
$defs.push("-DENABLE_SIMD")
end
have_header('cpuid.h')
end
create_header
create_makefile 'json/ext/generator' create_makefile 'json/ext/generator'
end end

View File

@ -5,6 +5,8 @@
#include <math.h> #include <math.h>
#include <ctype.h> #include <ctype.h>
#include "simd.h"
/* ruby api and some helpers */ /* ruby api and some helpers */
typedef struct JSON_Generator_StateStruct { typedef struct JSON_Generator_StateStruct {
@ -109,12 +111,40 @@ typedef struct _search_state {
const char *end; const char *end;
const char *cursor; const char *cursor;
FBuffer *buffer; FBuffer *buffer;
#ifdef ENABLE_SIMD
const char *chunk_base;
const char *chunk_end;
bool has_matches;
#ifdef HAVE_SIMD_NEON
uint64_t matches_mask;
#elif HAVE_SIMD_SSE2
int matches_mask;
#else
#error "Unknown SIMD Implementation."
#endif /* HAVE_SIMD_NEON */
#endif /* ENABLE_SIMD */
} search_state; } search_state;
static inline void search_flush(search_state *search) #if (defined(__GNUC__ ) || defined(__clang__))
#define FORCE_INLINE __attribute__((always_inline))
#else
#define FORCE_INLINE
#endif
static inline FORCE_INLINE void search_flush(search_state *search)
{ {
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); // Do not remove this conditional without profiling, specifically escape-heavy text.
search->cursor = search->ptr; // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
// For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
// will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
// nothing needs to be flushed, we can save a few memory references with this conditional.
if (search->ptr > search->cursor) {
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
search->cursor = search->ptr;
}
} }
static const unsigned char escape_table_basic[256] = { static const unsigned char escape_table_basic[256] = {
@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}; };
static unsigned char (*search_escape_basic_impl)(search_state *);
static inline unsigned char search_escape_basic(search_state *search) static inline unsigned char search_escape_basic(search_state *search)
{ {
while (search->ptr < search->end) { while (search->ptr < search->end) {
@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
return 0; return 0;
} }
static inline void escape_UTF8_char_basic(search_state *search) { static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
{
const unsigned char ch = (unsigned char)*search->ptr; const unsigned char ch = (unsigned char)*search->ptr;
switch (ch) { switch (ch) {
case '"': fbuffer_append(search->buffer, "\\\"", 2); break; case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
@ -156,11 +189,15 @@ static inline void escape_UTF8_char_basic(search_state *search) {
case '\r': fbuffer_append(search->buffer, "\\r", 2); break; case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
case '\t': fbuffer_append(search->buffer, "\\t", 2); break; case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
default: { default: {
const char *hexdig = "0123456789abcdef"; if (ch < ' ') {
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; const char *hexdig = "0123456789abcdef";
scratch[4] = hexdig[(ch >> 4) & 0xf]; char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
scratch[5] = hexdig[ch & 0xf]; scratch[4] = hexdig[(ch >> 4) & 0xf];
fbuffer_append(search->buffer, scratch, 6); scratch[5] = hexdig[ch & 0xf];
fbuffer_append(search->buffer, scratch, 6);
} else {
fbuffer_append_char(search->buffer, ch);
}
break; break;
} }
} }
@ -186,12 +223,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
*/ */
static inline void convert_UTF8_to_JSON(search_state *search) static inline void convert_UTF8_to_JSON(search_state *search)
{ {
while (search_escape_basic(search)) { while (search_escape_basic_impl(search)) {
escape_UTF8_char_basic(search); escape_UTF8_char_basic(search);
} }
} }
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) { static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
{
const unsigned char ch = (unsigned char)*search->ptr; const unsigned char ch = (unsigned char)*search->ptr;
switch (ch_len) { switch (ch_len) {
case 1: { case 1: {
@ -227,6 +265,285 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
search->cursor = (search->ptr += ch_len); search->cursor = (search->ptr += ch_len);
} }
#ifdef ENABLE_SIMD
static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
{
// Flush the buffer so everything up until the last 'len' characters are unflushed.
search_flush(search);
FBuffer *buf = search->buffer;
fbuffer_inc_capa(buf, vec_len);
char *s = (buf->ptr + buf->len);
// Pad the buffer with dummy characters that won't need escaping.
// This seem wateful at first sight, but memset of vector length is very fast.
memset(s, 'X', vec_len);
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
MEMCPY(s, search->ptr, char, len);
return s;
}
#ifdef HAVE_SIMD_NEON
static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
{
uint64_t mask = search->matches_mask;
uint32_t index = trailing_zeros64(mask) >> 2;
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
// If we want to use a similar approach for full escaping we'll need to ensure:
// search->chunk_base + index >= search->ptr
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
// is one byte after the previous match then:
// search->chunk_base + index == search->ptr
search->ptr = search->chunk_base + index;
mask &= mask - 1;
search->matches_mask = mask;
search_flush(search);
return 1;
}
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
{
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
return mask & 0x8888888888888888ull;
}
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
{
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
const uint8x16_t lower_bound = vdupq_n_u8(' ');
const uint8x16_t backslash = vdupq_n_u8('\\');
const uint8x16_t dblquote = vdupq_n_u8('\"');
uint8x16_t too_low = vcltq_u8(chunk, lower_bound);
uint8x16_t has_backslash = vceqq_u8(chunk, backslash);
uint8x16_t has_dblquote = vceqq_u8(chunk, dblquote);
uint8x16_t needs_escape = vorrq_u8(too_low, vorrq_u8(has_backslash, has_dblquote));
return neon_match_mask(needs_escape);
}
static inline unsigned char search_escape_basic_neon(search_state *search)
{
if (RB_UNLIKELY(search->has_matches)) {
// There are more matches if search->matches_mask > 0.
if (search->matches_mask > 0) {
return neon_next_match(search);
} else {
// neon_next_match will only advance search->ptr up to the last matching character.
// Skip over any characters in the last chunk that occur after the last match.
search->has_matches = false;
search->ptr = search->chunk_end;
}
}
/*
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
* need to be escaped.
*
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
*
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
* the vector insructions may work on larger vectors.
*
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
*
* lower_bound: [20 20 20 20 20 20 20 20]
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
* dblquote: [22 22 22 22 22 22 22 22]
*
* Next we load the first chunk of the ptr:
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
*
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
* as no bytes are less than 32 (0x20):
* [0 0 0 0 0 0 0 0]
*
* Next, we check if any byte in chunk is equal to a backslash:
* [0 0 0 FF 0 0 0 0]
*
* Finally we check if any byte in chunk is equal to a double quote:
* [FF 0 0 0 0 0 0 0]
*
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
* This is the needs_escape vector and it is equal to:
* [FF 0 0 FF 0 0 0 0]
*
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
*
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
* have at least one byte that needs to be escaped.
*/
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
uint64_t mask = neon_rules_update(search->ptr);
if (!mask) {
search->ptr += sizeof(uint8x16_t);
continue;
}
search->matches_mask = mask;
search->has_matches = true;
search->chunk_base = search->ptr;
search->chunk_end = search->ptr + sizeof(uint8x16_t);
return neon_next_match(search);
}
// There are fewer than 16 bytes left.
unsigned long remaining = (search->end - search->ptr);
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
uint64_t mask = neon_rules_update(s);
if (!mask) {
// Nothing to escape, ensure search_flush doesn't do anything by setting
// search->cursor to search->ptr.
search->buffer->len += remaining;
search->ptr = search->end;
search->cursor = search->end;
return 0;
}
search->matches_mask = mask;
search->has_matches = true;
search->chunk_end = search->end;
search->chunk_base = search->ptr;
return neon_next_match(search);
}
if (search->ptr < search->end) {
return search_escape_basic(search);
}
search_flush(search);
return 0;
}
#endif /* HAVE_SIMD_NEON */
#ifdef HAVE_SIMD_SSE2
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
{
int mask = search->matches_mask;
int index = trailing_zeros(mask);
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
// If we want to use a similar approach for full escaping we'll need to ensure:
// search->chunk_base + index >= search->ptr
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
// is one byte after the previous match then:
// search->chunk_base + index == search->ptr
search->ptr = search->chunk_base + index;
mask &= mask - 1;
search->matches_mask = mask;
search_flush(search);
return 1;
}
#if defined(__clang__) || defined(__GNUC__)
#define TARGET_SSE2 __attribute__((target("sse2")))
#else
#define TARGET_SSE2
#endif
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
{
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
const __m128i lower_bound = _mm_set1_epi8(' ');
const __m128i backslash = _mm_set1_epi8('\\');
const __m128i dblquote = _mm_set1_epi8('\"');
__m128i too_low = _mm_cmplt_epu8(chunk, lower_bound);
__m128i has_backslash = _mm_cmpeq_epi8(chunk, backslash);
__m128i has_dblquote = _mm_cmpeq_epi8(chunk, dblquote);
__m128i needs_escape = _mm_or_si128(too_low, _mm_or_si128(has_backslash, has_dblquote));
return _mm_movemask_epi8(needs_escape);
}
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
{
if (RB_UNLIKELY(search->has_matches)) {
// There are more matches if search->matches_mask > 0.
if (search->matches_mask > 0) {
return sse2_next_match(search);
} else {
// sse2_next_match will only advance search->ptr up to the last matching character.
// Skip over any characters in the last chunk that occur after the last match.
search->has_matches = false;
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
search->ptr = search->end;
} else {
search->ptr = search->chunk_base + sizeof(__m128i);
}
}
}
while (search->ptr + sizeof(__m128i) <= search->end) {
int needs_escape_mask = sse2_update(search->ptr);
if (needs_escape_mask == 0) {
search->ptr += sizeof(__m128i);
continue;
}
search->has_matches = true;
search->matches_mask = needs_escape_mask;
search->chunk_base = search->ptr;
return sse2_next_match(search);
}
// There are fewer than 16 bytes left.
unsigned long remaining = (search->end - search->ptr);
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
int needs_escape_mask = sse2_update(s);
if (needs_escape_mask == 0) {
// Nothing to escape, ensure search_flush doesn't do anything by setting
// search->cursor to search->ptr.
search->buffer->len += remaining;
search->ptr = search->end;
search->cursor = search->end;
return 0;
}
search->has_matches = true;
search->matches_mask = needs_escape_mask;
search->chunk_base = search->ptr;
return sse2_next_match(search);
}
if (search->ptr < search->end) {
return search_escape_basic(search);
}
search_flush(search);
return 0;
}
#endif /* HAVE_SIMD_SSE2 */
#endif /* ENABLE_SIMD */
static const unsigned char script_safe_escape_table[256] = { static const unsigned char script_safe_escape_table[256] = {
// ASCII Control Characters // ASCII Control Characters
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@ -990,6 +1307,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
search.cursor = search.ptr; search.cursor = search.ptr;
search.end = search.ptr + len; search.end = search.ptr + len;
#ifdef ENABLE_SIMD
search.matches_mask = 0;
search.has_matches = false;
search.chunk_base = NULL;
#endif /* ENABLE_SIMD */
switch(rb_enc_str_coderange(obj)) { switch(rb_enc_str_coderange(obj)) {
case ENC_CODERANGE_7BIT: case ENC_CODERANGE_7BIT:
case ENC_CODERANGE_VALID: case ENC_CODERANGE_VALID:
@ -1853,4 +2176,23 @@ void Init_generator(void)
binary_encindex = rb_ascii8bit_encindex(); binary_encindex = rb_ascii8bit_encindex();
rb_require("json/ext/generator/state"); rb_require("json/ext/generator/state");
switch(find_simd_implementation()) {
#ifdef ENABLE_SIMD
#ifdef HAVE_SIMD_NEON
case SIMD_NEON:
search_escape_basic_impl = search_escape_basic_neon;
break;
#endif /* HAVE_SIMD_NEON */
#ifdef HAVE_SIMD_SSE2
case SIMD_SSE2:
search_escape_basic_impl = search_escape_basic_sse2;
break;
#endif /* HAVE_SIMD_SSE2 */
#endif /* ENABLE_SIMD */
default:
search_escape_basic_impl = search_escape_basic;
break;
}
} }

112
ext/json/generator/simd.h Normal file
View File

@ -0,0 +1,112 @@
#include "extconf.h"
typedef enum {
SIMD_NONE,
SIMD_NEON,
SIMD_SSE2
} SIMD_Implementation;
#ifdef ENABLE_SIMD
#ifdef __clang__
#if __has_builtin(__builtin_ctzll)
#define HAVE_BUILTIN_CTZLL 1
#else
#define HAVE_BUILTIN_CTZLL 0
#endif
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#define HAVE_BUILTIN_CTZLL 1
#else
#define HAVE_BUILTIN_CTZLL 0
#endif
static inline uint32_t trailing_zeros64(uint64_t input) {
#if HAVE_BUILTIN_CTZLL
return __builtin_ctzll(input);
#else
uint32_t trailing_zeros = 0;
uint64_t temp = input;
while ((temp & 1) == 0 && temp > 0) {
trailing_zeros++;
temp >>= 1;
}
return trailing_zeros;
#endif
}
static inline int trailing_zeros(int input) {
#if HAVE_BUILTIN_CTZLL
return __builtin_ctz(input);
#else
int trailing_zeros = 0;
int temp = input;
while ((temp & 1) == 0 && temp > 0) {
trailing_zeros++;
temp >>= 1;
}
return trailing_zeros;
#endif
}
#define SIMD_MINIMUM_THRESHOLD 6
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
#include <arm_neon.h>
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
static SIMD_Implementation find_simd_implementation(void) {
return SIMD_NEON;
}
#define HAVE_SIMD_NEON 1
uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
uint8x16x4_t tab;
tab.val[0] = vld1q_u8(table);
tab.val[1] = vld1q_u8(table+16);
tab.val[2] = vld1q_u8(table+32);
tab.val[3] = vld1q_u8(table+48);
return tab;
}
#endif /* ARM Neon Support.*/
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#ifdef HAVE_X86INTRIN_H
#include <x86intrin.h>
#define HAVE_SIMD_SSE2 1
#ifdef HAVE_CPUID_H
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
#include <cpuid.h>
#endif /* HAVE_CPUID_H */
static SIMD_Implementation find_simd_implementation(void) {
#if defined(__GNUC__ ) || defined(__clang__)
#ifdef __GNUC__
__builtin_cpu_init();
#endif /* __GNUC__ */
// TODO Revisit. I think the SSE version now only uses SSE2 instructions.
if (__builtin_cpu_supports("sse2")) {
return SIMD_SSE2;
}
#endif /* __GNUC__ || __clang__*/
return SIMD_NONE;
}
#endif /* HAVE_X86INTRIN_H */
#endif /* X86_64 Support */
#endif /* ENABLE_SIMD */
#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
static SIMD_Implementation find_simd_implementation(void) {
return SIMD_NONE;
}
#endif

View File

@ -410,18 +410,34 @@ class JSONGeneratorTest < Test::Unit::TestCase
json = '["\\\\.(?i:gif|jpe?g|png)$"]' json = '["\\\\.(?i:gif|jpe?g|png)$"]'
assert_equal json, generate(data) assert_equal json, generate(data)
# #
data = [ '\\"' ] data = [ '\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$' ]
json = '["\\\\\""]' json = '["\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$"]'
assert_equal json, generate(data)
#
data = [ '\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"' ]
json = '["\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\""]'
assert_equal json, generate(data) assert_equal json, generate(data)
# #
data = [ '/' ] data = [ '/' ]
json = '["/"]' json = '["/"]'
assert_equal json, generate(data) assert_equal json, generate(data)
# #
data = [ '////////////////////////////////////////////////////////////////////////////////////' ]
json = '["////////////////////////////////////////////////////////////////////////////////////"]'
assert_equal json, generate(data)
#
data = [ '/' ] data = [ '/' ]
json = '["\/"]' json = '["\/"]'
assert_equal json, generate(data, :script_safe => true) assert_equal json, generate(data, :script_safe => true)
# #
data = [ '///////////' ]
json = '["\/\/\/\/\/\/\/\/\/\/\/"]'
assert_equal json, generate(data, :script_safe => true)
#
data = [ '///////////////////////////////////////////////////////' ]
json = '["\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/"]'
assert_equal json, generate(data, :script_safe => true)
#
data = [ "\u2028\u2029" ] data = [ "\u2028\u2029" ]
json = '["\u2028\u2029"]' json = '["\u2028\u2029"]'
assert_equal json, generate(data, :script_safe => true) assert_equal json, generate(data, :script_safe => true)
@ -438,6 +454,10 @@ class JSONGeneratorTest < Test::Unit::TestCase
json = '["\""]' json = '["\""]'
assert_equal json, generate(data) assert_equal json, generate(data)
# #
data = ['"""""""""""""""""""""""""']
json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
assert_equal json, generate(data)
#
data = ["'"] data = ["'"]
json = '["\\\'"]' json = '["\\\'"]'
assert_equal '["\'"]', generate(data) assert_equal '["\'"]', generate(data)
@ -445,6 +465,72 @@ class JSONGeneratorTest < Test::Unit::TestCase
data = ["", ""] data = ["", ""]
json = '["倩","瀨"]' json = '["倩","瀨"]'
assert_equal json, generate(data, script_safe: true) assert_equal json, generate(data, script_safe: true)
#
data = '["This is a "test" of the emergency broadcast system."]'
json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\""
assert_equal json, generate(data)
#
data = '\tThis is a test of the emergency broadcast system.'
json = "\"\\\\tThis is a test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This\tis a test of the emergency broadcast system.'
json = "\"This\\\\tis a test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This is\ta test of the emergency broadcast system.'
json = "\"This is\\\\ta test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This is a test of the emergency broadcast\tsystem.'
json = "\"This is a test of the emergency broadcast\\\\tsystem.\""
assert_equal json, generate(data)
#
data = 'This is a test of the emergency broadcast\tsystem.\n'
json = "\"This is a test of the emergency broadcast\\\\tsystem.\\\\n\""
assert_equal json, generate(data)
data = '"' * 15
json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\""
assert_equal json, generate(data)
data = "\"\"\"\"\"\"\"\"\"\"\"\"\"\"a"
json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"a\""
assert_equal json, generate(data)
data = "\u0001\u0001\u0001\u0001"
json = "\"\\u0001\\u0001\\u0001\\u0001\""
assert_equal json, generate(data)
data = "\u0001a\u0001a\u0001a\u0001a"
json = "\"\\u0001a\\u0001a\\u0001a\\u0001a\""
assert_equal json, generate(data)
data = "\u0001aa\u0001aa"
json = "\"\\u0001aa\\u0001aa\""
assert_equal json, generate(data)
data = "\u0001aa\u0001aa\u0001aa"
json = "\"\\u0001aa\\u0001aa\\u0001aa\""
assert_equal json, generate(data)
data = "\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa"
json = "\"\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\""
assert_equal json, generate(data)
data = "\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002"
json = "\"\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\""
assert_equal json, generate(data)
data = "ab\u0002c"
json = "\"ab\\u0002c\""
assert_equal json, generate(data)
data = "ab\u0002cab\u0002cab\u0002cab\u0002c"
json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
assert_equal json, generate(data)
data = "ab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002c"
json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
assert_equal json, generate(data)
data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f"
json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\""
assert_equal json, generate(data)
data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b"
json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\""
assert_equal json, generate(data)
data = "a\n\t\f\b\n\t\f\b\n\t\f\b\n\t"
json = "\"a\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\""
assert_equal json, generate(data)
end end
def test_string_subclass def test_string_subclass