[ruby/json] Delete code that is based on CVTUTF
I did this based on manual inspection, comparing the code to my re-created history of CVTUTF at https://git.lukeshu.com/2git/cvtutf/ (created by the scripts at https://git.lukeshu.com/2git/cvtutf-make/) https://github.com/ruby/json/commit/0819553144
This commit is contained in:
parent
bad4ad63bf
commit
6e47968929
@ -18,54 +18,6 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
|
|||||||
i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
|
i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
|
||||||
i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict;
|
i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict;
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright 2001-2004 Unicode, Inc.
|
|
||||||
*
|
|
||||||
* Disclaimer
|
|
||||||
*
|
|
||||||
* This source code is provided as is by Unicode, Inc. No claims are
|
|
||||||
* made as to fitness for any particular purpose. No warranties of any
|
|
||||||
* kind are expressed or implied. The recipient agrees to determine
|
|
||||||
* applicability of information provided. If this file has been
|
|
||||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
|
||||||
* sole remedy for any claim will be exchange of defective media
|
|
||||||
* within 90 days of receipt.
|
|
||||||
*
|
|
||||||
* Limitations on Rights to Redistribute This Code
|
|
||||||
*
|
|
||||||
* Unicode, Inc. hereby grants the right to freely use the information
|
|
||||||
* supplied in this file in the creation of products supporting the
|
|
||||||
* Unicode Standard, and to make copies of this file in any form
|
|
||||||
* for internal or external distribution as long as this notice
|
|
||||||
* remains attached.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Index into the table below with the first byte of a UTF-8 sequence to
|
|
||||||
* get the number of trailing bytes that are supposed to follow it.
|
|
||||||
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
|
|
||||||
* left as-is for anyone who may want to do such conversion, which was
|
|
||||||
* allowed in earlier algorithms.
|
|
||||||
*/
|
|
||||||
static const char trailingBytesForUTF8[256] = {
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Magic values subtracted from a buffer value during UTF8 conversion.
|
|
||||||
* This table contains as many values as there might be trailing bytes
|
|
||||||
* in a UTF-8 sequence.
|
|
||||||
*/
|
|
||||||
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
|
||||||
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
|
||||||
|
|
||||||
/* Escapes the UTF16 character and stores the result in the buffer buf. */
|
/* Escapes the UTF16 character and stores the result in the buffer buf. */
|
||||||
static void unicode_escape(char *buf, UTF16 character)
|
static void unicode_escape(char *buf, UTF16 character)
|
||||||
{
|
{
|
||||||
@ -94,98 +46,6 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char scrip
|
|||||||
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
|
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
|
||||||
char buf[6] = { '\\', 'u' };
|
char buf[6] = { '\\', 'u' };
|
||||||
|
|
||||||
int ascii_only = rb_enc_str_asciionly_p(string);
|
|
||||||
|
|
||||||
if (!ascii_only) {
|
|
||||||
if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) {
|
|
||||||
rb_raise(rb_path2class("JSON::GeneratorError"),
|
|
||||||
"source sequence is illegal/malformed utf-8");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while (source < sourceEnd) {
|
|
||||||
UTF32 ch = 0;
|
|
||||||
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
|
||||||
/*
|
|
||||||
* The cases all fall through. See "Note A" below.
|
|
||||||
*/
|
|
||||||
switch (extraBytesToRead) {
|
|
||||||
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
|
||||||
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
|
||||||
case 3: ch += *source++; ch <<= 6;
|
|
||||||
case 2: ch += *source++; ch <<= 6;
|
|
||||||
case 1: ch += *source++; ch <<= 6;
|
|
||||||
case 0: ch += *source++;
|
|
||||||
}
|
|
||||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
|
||||||
|
|
||||||
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
|
||||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
|
||||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
|
||||||
#if UNI_STRICT_CONVERSION
|
|
||||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
|
||||||
rb_raise(rb_path2class("JSON::GeneratorError"),
|
|
||||||
"source sequence is illegal/malformed utf-8");
|
|
||||||
#else
|
|
||||||
unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
/* normal case */
|
|
||||||
if (ch >= 0x20 && ch <= 0x7f) {
|
|
||||||
switch (ch) {
|
|
||||||
case '\\':
|
|
||||||
fbuffer_append(buffer, "\\\\", 2);
|
|
||||||
break;
|
|
||||||
case '"':
|
|
||||||
fbuffer_append(buffer, "\\\"", 2);
|
|
||||||
break;
|
|
||||||
case '/':
|
|
||||||
if(script_safe) {
|
|
||||||
fbuffer_append(buffer, "\\/", 2);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
fbuffer_append_char(buffer, (char)ch);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
switch (ch) {
|
|
||||||
case '\n':
|
|
||||||
fbuffer_append(buffer, "\\n", 2);
|
|
||||||
break;
|
|
||||||
case '\r':
|
|
||||||
fbuffer_append(buffer, "\\r", 2);
|
|
||||||
break;
|
|
||||||
case '\t':
|
|
||||||
fbuffer_append(buffer, "\\t", 2);
|
|
||||||
break;
|
|
||||||
case '\f':
|
|
||||||
fbuffer_append(buffer, "\\f", 2);
|
|
||||||
break;
|
|
||||||
case '\b':
|
|
||||||
fbuffer_append(buffer, "\\b", 2);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (ch > UNI_MAX_UTF16) {
|
|
||||||
#if UNI_STRICT_CONVERSION
|
|
||||||
source -= (extraBytesToRead+1); /* return to the start */
|
|
||||||
rb_raise(rb_path2class("JSON::GeneratorError"),
|
|
||||||
"source sequence is illegal/malformed utf8");
|
|
||||||
#else
|
|
||||||
unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
|
||||||
ch -= halfBase;
|
|
||||||
unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
|
|
||||||
unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
RB_GC_GUARD(string);
|
RB_GC_GUARD(string);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,98 +62,6 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe
|
|||||||
unsigned char c;
|
unsigned char c;
|
||||||
char buf[6] = { '\\', 'u' };
|
char buf[6] = { '\\', 'u' };
|
||||||
int ascii_only = rb_enc_str_asciionly_p(string);
|
int ascii_only = rb_enc_str_asciionly_p(string);
|
||||||
|
|
||||||
if (!ascii_only) {
|
|
||||||
if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) {
|
|
||||||
rb_raise(rb_path2class("JSON::GeneratorError"),
|
|
||||||
"source sequence is illegal/malformed utf-8");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (start = 0, end = 0; end < len;) {
|
|
||||||
p = ptr + end;
|
|
||||||
c = (unsigned char) *p;
|
|
||||||
if (c < 0x20) {
|
|
||||||
switch (c) {
|
|
||||||
case '\n':
|
|
||||||
escape = "\\n";
|
|
||||||
escape_len = 2;
|
|
||||||
break;
|
|
||||||
case '\r':
|
|
||||||
escape = "\\r";
|
|
||||||
escape_len = 2;
|
|
||||||
break;
|
|
||||||
case '\t':
|
|
||||||
escape = "\\t";
|
|
||||||
escape_len = 2;
|
|
||||||
break;
|
|
||||||
case '\f':
|
|
||||||
escape = "\\f";
|
|
||||||
escape_len = 2;
|
|
||||||
break;
|
|
||||||
case '\b':
|
|
||||||
escape = "\\b";
|
|
||||||
escape_len = 2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unicode_escape(buf, (UTF16) *p);
|
|
||||||
escape = buf;
|
|
||||||
escape_len = 6;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
switch (c) {
|
|
||||||
case '\\':
|
|
||||||
escape = "\\\\";
|
|
||||||
escape_len = 2;
|
|
||||||
break;
|
|
||||||
case '"':
|
|
||||||
escape = "\\\"";
|
|
||||||
escape_len = 2;
|
|
||||||
break;
|
|
||||||
case '/':
|
|
||||||
if(script_safe) {
|
|
||||||
escape = "\\/";
|
|
||||||
escape_len = 2;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
unsigned short clen = 1;
|
|
||||||
if (!ascii_only) {
|
|
||||||
clen += trailingBytesForUTF8[c];
|
|
||||||
if (end + clen > len) {
|
|
||||||
rb_raise(rb_path2class("JSON::GeneratorError"),
|
|
||||||
"partial character in source, but hit end");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (script_safe && c == 0xE2) {
|
|
||||||
unsigned char c2 = (unsigned char) *(p+1);
|
|
||||||
unsigned char c3 = (unsigned char) *(p+2);
|
|
||||||
if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
|
|
||||||
fbuffer_append(buffer, ptr + start, end - start);
|
|
||||||
start = end = (end + clen);
|
|
||||||
if (c3 == 0xA8) {
|
|
||||||
fbuffer_append(buffer, "\\u2028", 6);
|
|
||||||
} else {
|
|
||||||
fbuffer_append(buffer, "\\u2029", 6);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
end += clen;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fbuffer_append(buffer, ptr + start, end - start);
|
|
||||||
fbuffer_append(buffer, escape, escape_len);
|
|
||||||
start = ++end;
|
|
||||||
escape = NULL;
|
|
||||||
}
|
|
||||||
fbuffer_append(buffer, ptr + start, end - start);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *fstrndup(const char *ptr, unsigned long len) {
|
static char *fstrndup(const char *ptr, unsigned long len) {
|
||||||
|
@ -22,30 +22,6 @@
|
|||||||
|
|
||||||
#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
|
#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
|
||||||
|
|
||||||
/* unicode definitions */
|
|
||||||
|
|
||||||
#define UNI_STRICT_CONVERSION 1
|
|
||||||
|
|
||||||
typedef unsigned long UTF32; /* at least 32 bits */
|
|
||||||
typedef unsigned short UTF16; /* at least 16 bits */
|
|
||||||
typedef unsigned char UTF8; /* typically 8 bits */
|
|
||||||
|
|
||||||
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
|
|
||||||
#define UNI_MAX_BMP (UTF32)0x0000FFFF
|
|
||||||
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
|
|
||||||
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
|
|
||||||
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
|
|
||||||
|
|
||||||
#define UNI_SUR_HIGH_START (UTF32)0xD800
|
|
||||||
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
|
|
||||||
#define UNI_SUR_LOW_START (UTF32)0xDC00
|
|
||||||
#define UNI_SUR_LOW_END (UTF32)0xDFFF
|
|
||||||
|
|
||||||
static const int halfShift = 10; /* used for shifting by 10 bits */
|
|
||||||
|
|
||||||
static const UTF32 halfBase = 0x0010000UL;
|
|
||||||
static const UTF32 halfMask = 0x3FFUL;
|
|
||||||
|
|
||||||
static void unicode_escape(char *buf, UTF16 character);
|
static void unicode_escape(char *buf, UTF16 character);
|
||||||
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
|
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
|
||||||
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);
|
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);
|
||||||
|
@ -19,18 +19,6 @@
|
|||||||
|
|
||||||
#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
|
#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
|
||||||
|
|
||||||
/* unicode */
|
|
||||||
|
|
||||||
typedef unsigned long UTF32; /* at least 32 bits */
|
|
||||||
typedef unsigned short UTF16; /* at least 16 bits */
|
|
||||||
typedef unsigned char UTF8; /* typically 8 bits */
|
|
||||||
|
|
||||||
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
|
|
||||||
#define UNI_SUR_HIGH_START (UTF32)0xD800
|
|
||||||
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
|
|
||||||
#define UNI_SUR_LOW_START (UTF32)0xDC00
|
|
||||||
#define UNI_SUR_LOW_END (UTF32)0xDFFF
|
|
||||||
|
|
||||||
typedef struct JSON_ParserStruct {
|
typedef struct JSON_ParserStruct {
|
||||||
VALUE Vsource;
|
VALUE Vsource;
|
||||||
char *source;
|
char *source;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user