[ruby/json] Delete code that is based on CVTUTF

I did this based on manual inspection, comparing the code to my re-created history of CVTUTF at https://git.lukeshu.com/2git/cvtutf/ (created by the scripts at https://git.lukeshu.com/2git/cvtutf-make/) https://github.com/ruby/json/commit/0819553144
2024-02-22 20:51:28 -07:00 · 2024-02-22 20:51:28 -07:00 · 6e47968929
commit 6e47968929
parent bad4ad63bf
3 changed files with 0 additions and 268 deletions
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@ -18,54 +18,6 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
          i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
          i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict;
 /*
 * Copyright 2001-2004 Unicode, Inc.
 *
 * Disclaimer
 *
 * This source code is provided as is by Unicode, Inc. No claims are
 * made as to fitness for any particular purpose. No warranties of any
 * kind are expressed or implied. The recipient agrees to determine
 * applicability of information provided. If this file has been
 * purchased on magnetic or optical media from Unicode, Inc., the
 * sole remedy for any claim will be exchange of defective media
 * within 90 days of receipt.
 *
 * Limitations on Rights to Redistribute This Code
 *
 * Unicode, Inc. hereby grants the right to freely use the information
 * supplied in this file in the creation of products supporting the
 * Unicode Standard, and to make copies of this file in any form
 * for internal or external distribution as long as this notice
 * remains attached.
 */
 /*
 * Index into the table below with the first byte of a UTF-8 sequence to
 * get the number of trailing bytes that are supposed to follow it.
 * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
 * left as-is for anyone who may want to do such conversion, which was
 * allowed in earlier algorithms.
 */
 static const char trailingBytesForUTF8[256] = {
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
 };
 /*
 * Magic values subtracted from a buffer value during UTF8 conversion.
 * This table contains as many values as there might be trailing bytes
 * in a UTF-8 sequence.
 */
 static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
    0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 /* Escapes the UTF16 character and stores the result in the buffer buf. */
 static void unicode_escape(char *buf, UTF16 character)
 {
@ -94,98 +46,6 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char scrip
    const UTF8 *sourceEnd = source + RSTRING_LEN(string);
    char buf[6] = { '\\', 'u' };
    int ascii_only = rb_enc_str_asciionly_p(string);
    if (!ascii_only) {
        if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) {
            rb_raise(rb_path2class("JSON::GeneratorError"),
                    "source sequence is illegal/malformed utf-8");
        }
    }
    while (source < sourceEnd) {
        UTF32 ch = 0;
        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
        /*
         * The cases all fall through. See "Note A" below.
         */
        switch (extraBytesToRead) {
            case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
            case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
            case 3: ch += *source++; ch <<= 6;
            case 2: ch += *source++; ch <<= 6;
            case 1: ch += *source++; ch <<= 6;
            case 0: ch += *source++;
        }
        ch -= offsetsFromUTF8[extraBytesToRead];
        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
            /* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
 #if UNI_STRICT_CONVERSION
                source -= (extraBytesToRead+1); /* return to the illegal value itself */
                rb_raise(rb_path2class("JSON::GeneratorError"),
                        "source sequence is illegal/malformed utf-8");
 #else
                unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
 #endif
            } else {
                /* normal case */
                if (ch >= 0x20 && ch <= 0x7f) {
                    switch (ch) {
                        case '\\':
                            fbuffer_append(buffer, "\\\\", 2);
                            break;
                        case '"':
                            fbuffer_append(buffer, "\\\"", 2);
                            break;
                        case '/':
                            if(script_safe) {
                                fbuffer_append(buffer, "\\/", 2);
                                break;
                            }
                        default:
                            fbuffer_append_char(buffer, (char)ch);
                            break;
                    }
                } else {
                    switch (ch) {
                        case '\n':
                            fbuffer_append(buffer, "\\n", 2);
                            break;
                        case '\r':
                            fbuffer_append(buffer, "\\r", 2);
                            break;
                        case '\t':
                            fbuffer_append(buffer, "\\t", 2);
                            break;
                        case '\f':
                            fbuffer_append(buffer, "\\f", 2);
                            break;
                        case '\b':
                            fbuffer_append(buffer, "\\b", 2);
                            break;
                        default:
                            unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
                            break;
                    }
                }
            }
        } else if (ch > UNI_MAX_UTF16) {
 #if UNI_STRICT_CONVERSION
            source -= (extraBytesToRead+1); /* return to the start */
            rb_raise(rb_path2class("JSON::GeneratorError"),
                    "source sequence is illegal/malformed utf8");
 #else
            unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
 #endif
        } else {
            /* target is a character in range 0xFFFF - 0x10FFFF. */
            ch -= halfBase;
            unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
            unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
        }
    }
    RB_GC_GUARD(string);
 }
@ -202,98 +62,6 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe
    unsigned char c;
    char buf[6] = { '\\', 'u' };
    int ascii_only = rb_enc_str_asciionly_p(string);
    if (!ascii_only) {
        if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) {
            rb_raise(rb_path2class("JSON::GeneratorError"),
                    "source sequence is illegal/malformed utf-8");
        }
    }
    for (start = 0, end = 0; end < len;) {
        p = ptr + end;
        c = (unsigned char) *p;
        if (c < 0x20) {
            switch (c) {
                case '\n':
                    escape = "\\n";
                    escape_len = 2;
                    break;
                case '\r':
                    escape = "\\r";
                    escape_len = 2;
                    break;
                case '\t':
                    escape = "\\t";
                    escape_len = 2;
                    break;
                case '\f':
                    escape = "\\f";
                    escape_len = 2;
                    break;
                case '\b':
                    escape = "\\b";
                    escape_len = 2;
                    break;
                default:
                    unicode_escape(buf, (UTF16) *p);
                    escape = buf;
                    escape_len = 6;
                    break;
            }
        } else {
            switch (c) {
                case '\\':
                    escape = "\\\\";
                    escape_len = 2;
                    break;
                case '"':
                    escape =  "\\\"";
                    escape_len = 2;
                    break;
                case '/':
                    if(script_safe) {
                        escape = "\\/";
                        escape_len = 2;
                        break;
                    }
                default:
                    {
                        unsigned short clen = 1;
                        if (!ascii_only) {
                            clen += trailingBytesForUTF8[c];
                            if (end + clen > len) {
                                rb_raise(rb_path2class("JSON::GeneratorError"),
                                        "partial character in source, but hit end");
                            }
                            if (script_safe && c == 0xE2) {
                                unsigned char c2 = (unsigned char) *(p+1);
                                unsigned char c3 = (unsigned char) *(p+2);
                                if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
                                    fbuffer_append(buffer, ptr + start, end - start);
                                    start = end = (end + clen);
                                    if (c3 == 0xA8) {
                                        fbuffer_append(buffer, "\\u2028", 6);
                                    } else {
                                        fbuffer_append(buffer, "\\u2029", 6);
                                    }
                                    continue;
                                }
                            }
                        }
                        end += clen;
                    }
                    continue;
                    break;
            }
        }
        fbuffer_append(buffer, ptr + start, end - start);
        fbuffer_append(buffer, escape, escape_len);
        start = ++end;
        escape = NULL;
    }
    fbuffer_append(buffer, ptr + start, end - start);
 }
 static char *fstrndup(const char *ptr, unsigned long len) {
--- a/ext/json/generator/generator.h
+++ b/ext/json/generator/generator.h
@ -22,30 +22,6 @@
 #define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
 /* unicode definitions */
 #define UNI_STRICT_CONVERSION 1
 typedef unsigned long  UTF32; /* at least 32 bits */
 typedef unsigned short UTF16; /* at least 16 bits */
 typedef unsigned char  UTF8;  /* typically 8 bits */
 #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
 #define UNI_MAX_BMP (UTF32)0x0000FFFF
 #define UNI_MAX_UTF16 (UTF32)0x0010FFFF
 #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
 #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
 #define UNI_SUR_HIGH_START  (UTF32)0xD800
 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
 #define UNI_SUR_LOW_START   (UTF32)0xDC00
 #define UNI_SUR_LOW_END     (UTF32)0xDFFF
 static const int halfShift  = 10; /* used for shifting by 10 bits */
 static const UTF32 halfBase = 0x0010000UL;
 static const UTF32 halfMask = 0x3FFUL;
 static void unicode_escape(char *buf, UTF16 character);
 static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
 static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);
--- a/ext/json/parser/parser.h
+++ b/ext/json/parser/parser.h
@ -19,18 +19,6 @@
 #define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
 /* unicode */
 typedef unsigned long UTF32;  /* at least 32 bits */
 typedef unsigned short UTF16; /* at least 16 bits */
 typedef unsigned char UTF8;   /* typically 8 bits */
 #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
 #define UNI_SUR_HIGH_START  (UTF32)0xD800
 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
 #define UNI_SUR_LOW_START   (UTF32)0xDC00
 #define UNI_SUR_LOW_END     (UTF32)0xDFFF
 typedef struct JSON_ParserStruct {
    VALUE Vsource;
    char *source;