[ruby/json] Emit warnings when dumping binary strings
Because of it's Ruby 1.8 heritage, the C extension doesn't care much about strings encoding. We should get stricter over time. https://github.com/ruby/json/commit/42402fc13f
This commit is contained in:
parent
f2b8829df0
commit
3782600f0f
@ -46,6 +46,8 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat
|
||||
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
|
||||
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
|
||||
|
||||
static int usascii_encindex, utf8_encindex, binary_encindex;
|
||||
|
||||
/* Converts in_string to a JSON string (without the wrapping '"'
|
||||
* characters) in FBuffer out_buffer.
|
||||
*
|
||||
@ -535,7 +537,7 @@ static VALUE mString_to_json_raw_object(VALUE self)
|
||||
VALUE result = rb_hash_new();
|
||||
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
|
||||
ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
|
||||
rb_hash_aset(result, rb_str_new2("raw"), ary);
|
||||
rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -822,8 +824,6 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
|
||||
fbuffer_append_char(buffer, ']');
|
||||
}
|
||||
|
||||
static int usascii_encindex, utf8_encindex, binary_encindex;
|
||||
|
||||
static inline int enc_utf8_compatible_p(int enc_idx)
|
||||
{
|
||||
if (enc_idx == usascii_encindex) return 1;
|
||||
@ -837,13 +837,14 @@ static inline VALUE ensure_valid_encoding(VALUE str)
|
||||
VALUE utf8_string;
|
||||
if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
|
||||
if (encindex == binary_encindex) {
|
||||
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
||||
// TODO: Deprecate in 2.8.0
|
||||
// TODO: Remove in 3.0.0
|
||||
utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
|
||||
switch (rb_enc_str_coderange(utf8_string)) {
|
||||
case ENC_CODERANGE_7BIT:
|
||||
return utf8_string;
|
||||
case ENC_CODERANGE_VALID:
|
||||
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
||||
// TODO: Raise in 3.0.0
|
||||
rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
|
||||
return utf8_string;
|
||||
break;
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ class BigDecimal
|
||||
def as_json(*)
|
||||
{
|
||||
JSON.create_id => self.class.name,
|
||||
'b' => _dump,
|
||||
'b' => _dump.force_encoding(Encoding::UTF_8),
|
||||
}
|
||||
end
|
||||
|
||||
|
@ -1798,9 +1798,12 @@ static VALUE convert_encoding(VALUE source)
|
||||
|
||||
if (encindex == binary_encindex) {
|
||||
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
||||
// TODO: Deprecate in 2.8.0
|
||||
// TODO: Remove in 3.0.0
|
||||
return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
|
||||
VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
|
||||
switch (rb_enc_str_coderange(utf8_string)) {
|
||||
case ENC_CODERANGE_7BIT:
|
||||
case ENC_CODERANGE_VALID:
|
||||
return utf8_string;
|
||||
}
|
||||
}
|
||||
|
||||
return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
|
||||
@ -1955,7 +1958,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
|
||||
}
|
||||
|
||||
|
||||
#line 1959 "parser.c"
|
||||
#line 1962 "parser.c"
|
||||
enum {JSON_start = 1};
|
||||
enum {JSON_first_final = 10};
|
||||
enum {JSON_error = 0};
|
||||
@ -1963,7 +1966,7 @@ enum {JSON_error = 0};
|
||||
enum {JSON_en_main = 1};
|
||||
|
||||
|
||||
#line 867 "parser.rl"
|
||||
#line 870 "parser.rl"
|
||||
|
||||
|
||||
/*
|
||||
@ -1981,16 +1984,16 @@ static VALUE cParser_parse(VALUE self)
|
||||
GET_PARSER;
|
||||
|
||||
|
||||
#line 1985 "parser.c"
|
||||
#line 1988 "parser.c"
|
||||
{
|
||||
cs = JSON_start;
|
||||
}
|
||||
|
||||
#line 884 "parser.rl"
|
||||
#line 887 "parser.rl"
|
||||
p = json->source;
|
||||
pe = p + json->len;
|
||||
|
||||
#line 1994 "parser.c"
|
||||
#line 1997 "parser.c"
|
||||
{
|
||||
if ( p == pe )
|
||||
goto _test_eof;
|
||||
@ -2024,7 +2027,7 @@ st0:
|
||||
cs = 0;
|
||||
goto _out;
|
||||
tr2:
|
||||
#line 859 "parser.rl"
|
||||
#line 862 "parser.rl"
|
||||
{
|
||||
char *np = JSON_parse_value(json, p, pe, &result, 0);
|
||||
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
|
||||
@ -2034,7 +2037,7 @@ st10:
|
||||
if ( ++p == pe )
|
||||
goto _test_eof10;
|
||||
case 10:
|
||||
#line 2038 "parser.c"
|
||||
#line 2041 "parser.c"
|
||||
switch( (*p) ) {
|
||||
case 13: goto st10;
|
||||
case 32: goto st10;
|
||||
@ -2123,7 +2126,7 @@ case 9:
|
||||
_out: {}
|
||||
}
|
||||
|
||||
#line 887 "parser.rl"
|
||||
#line 890 "parser.rl"
|
||||
|
||||
if (cs >= JSON_first_final && p == pe) {
|
||||
return result;
|
||||
|
@ -693,9 +693,12 @@ static VALUE convert_encoding(VALUE source)
|
||||
|
||||
if (encindex == binary_encindex) {
|
||||
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
||||
// TODO: Deprecate in 2.8.0
|
||||
// TODO: Remove in 3.0.0
|
||||
return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
|
||||
VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
|
||||
switch (rb_enc_str_coderange(utf8_string)) {
|
||||
case ENC_CODERANGE_7BIT:
|
||||
case ENC_CODERANGE_VALID:
|
||||
return utf8_string;
|
||||
}
|
||||
}
|
||||
|
||||
return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
|
||||
|
@ -507,8 +507,13 @@ class JSONGeneratorTest < Test::Unit::TestCase
|
||||
wrong_encoding_string = utf8_string.b
|
||||
# This behavior is historical. Not necessary desirable. We should deprecated it.
|
||||
# The pure and java version of the gem already don't behave this way.
|
||||
assert_equal utf8_string.to_json, wrong_encoding_string.to_json
|
||||
assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string)
|
||||
assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do
|
||||
assert_equal utf8_string.to_json, wrong_encoding_string.to_json
|
||||
end
|
||||
|
||||
assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do
|
||||
assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string)
|
||||
end
|
||||
end
|
||||
|
||||
def test_string_ext_included_calls_super
|
||||
|
Loading…
x
Reference in New Issue
Block a user