Thu Feb 21 17:15:15 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
* transcode.c: Added basic support for passing options to String#encode via a hash. Currently only one option, with one value, is supported: invalid: :ignore (dropping invalid byte sequences instead of producing an error). Option naming is not yet stable! * test/ruby/test_transcode.rb: Added a single test for invalid: :ignore option. Not more tests because most data does not yet distinguish between INVALID and UNKNOWN. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15565 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
ca22f3e168
commit
6d5ef97a32
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
|||||||
|
Thu Feb 21 17:15:15 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||||
|
|
||||||
|
* transcode.c: Added basic support for passing options to String#encode
|
||||||
|
via a hash. Currently only one option, with one value, is supported:
|
||||||
|
invalid: :ignore (dropping invalid byte sequences instead of
|
||||||
|
producing an error). Option naming is not yet stable!
|
||||||
|
|
||||||
|
* test/ruby/test_transcode.rb: Added a single test for invalid: :ignore
|
||||||
|
option. Not more tests because most data does not yet distinguish
|
||||||
|
between INVALID and UNKNOWN.
|
||||||
|
|
||||||
Thu Feb 21 16:35:26 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Thu Feb 21 16:35:26 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* array.c (rb_ary_unshift_m): expands enough for argc. [ruby-dev:33880]
|
* array.c (rb_ary_unshift_m): expands enough for argc. [ruby-dev:33880]
|
||||||
|
@ -19,8 +19,6 @@ class TestTranscode < Test::Unit::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
def test_errors
|
def test_errors
|
||||||
# we don't have semantics for conversion without attribute yet
|
|
||||||
# maybe 'convert to UTF-8' would be nice :-)
|
|
||||||
assert_raise(ArgumentError) { 'abc'.encode }
|
assert_raise(ArgumentError) { 'abc'.encode }
|
||||||
assert_raise(ArgumentError) { 'abc'.encode! }
|
assert_raise(ArgumentError) { 'abc'.encode! }
|
||||||
assert_raise(ArgumentError) { 'abc'.encode('foo', 'bar') }
|
assert_raise(ArgumentError) { 'abc'.encode('foo', 'bar') }
|
||||||
@ -241,4 +239,9 @@ class TestTranscode < Test::Unit::TestCase
|
|||||||
check_utf_32_both_ways("\u{8FF00}", "\x00\x08\xFF\x00")
|
check_utf_32_both_ways("\u{8FF00}", "\x00\x08\xFF\x00")
|
||||||
check_utf_32_both_ways("\u{F00FF}", "\x00\x0F\x00\xFF")
|
check_utf_32_both_ways("\u{F00FF}", "\x00\x0F\x00\xFF")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_invalid_ignore
|
||||||
|
# arguments only
|
||||||
|
'abc'.encode('utf-8', invalid: :ignore)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
33
transcode.c
33
transcode.c
@ -15,6 +15,9 @@
|
|||||||
#include "transcode_data.h"
|
#include "transcode_data.h"
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
|
static VALUE sym_invalid, sym_ignore;
|
||||||
|
#define INVALID_IGNORE 0x1
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Dispatch data and logic
|
* Dispatch data and logic
|
||||||
*/
|
*/
|
||||||
@ -132,7 +135,8 @@ static void
|
|||||||
transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
|
transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
|
||||||
unsigned char *in_stop, unsigned char *out_stop,
|
unsigned char *in_stop, unsigned char *out_stop,
|
||||||
const rb_transcoder *my_transcoder,
|
const rb_transcoder *my_transcoder,
|
||||||
rb_transcoding *my_transcoding)
|
rb_transcoding *my_transcoding,
|
||||||
|
const int opt)
|
||||||
{
|
{
|
||||||
unsigned char *in_p = *in_pos, *out_p = *out_pos;
|
unsigned char *in_p = *in_pos, *out_p = *out_pos;
|
||||||
const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
|
const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
|
||||||
@ -211,14 +215,17 @@ transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
|
|||||||
case INVALID:
|
case INVALID:
|
||||||
goto invalid;
|
goto invalid;
|
||||||
case UNDEF:
|
case UNDEF:
|
||||||
/* todo: add code for alternative behaviors */
|
/* todo: add code for alternate behaviors */
|
||||||
rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
invalid:
|
invalid:
|
||||||
/* deal with invalid byte sequence */
|
/* deal with invalid byte sequence */
|
||||||
/* todo: add code for alternative behaviors */
|
/* todo: add more alternative behaviors */
|
||||||
|
if (opt&INVALID_IGNORE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
|
rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -254,7 +261,22 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||||||
const rb_transcoder *my_transcoder;
|
const rb_transcoder *my_transcoder;
|
||||||
rb_transcoding my_transcoding;
|
rb_transcoding my_transcoding;
|
||||||
int final_encoding = 0;
|
int final_encoding = 0;
|
||||||
|
VALUE opt;
|
||||||
|
int options = 0;
|
||||||
|
|
||||||
|
opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
|
||||||
|
if (!NIL_P(opt)) {
|
||||||
|
VALUE v;
|
||||||
|
|
||||||
|
argc--;
|
||||||
|
v = rb_hash_aref(opt, sym_invalid);
|
||||||
|
if (NIL_P(v)) {
|
||||||
|
rb_raise(rb_eArgError, "unknown value for invalid: setting");
|
||||||
|
}
|
||||||
|
else if (v==sym_ignore) {
|
||||||
|
options |= INVALID_IGNORE;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (argc < 1 || argc > 2) {
|
if (argc < 1 || argc > 2) {
|
||||||
rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
|
rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
|
||||||
}
|
}
|
||||||
@ -325,7 +347,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||||||
my_transcoding.ruby_string_dest = dest;
|
my_transcoding.ruby_string_dest = dest;
|
||||||
my_transcoding.flush_func = str_transcoding_resize;
|
my_transcoding.flush_func = str_transcoding_resize;
|
||||||
|
|
||||||
transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding);
|
transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options);
|
||||||
if (fromp != sp+slen) {
|
if (fromp != sp+slen) {
|
||||||
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
|
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
|
||||||
}
|
}
|
||||||
@ -426,6 +448,9 @@ Init_transcode(void)
|
|||||||
transcoder_lib_table = st_init_strcasetable();
|
transcoder_lib_table = st_init_strcasetable();
|
||||||
init_transcoder_table();
|
init_transcoder_table();
|
||||||
|
|
||||||
|
sym_invalid = ID2SYM(rb_intern("invalid"));
|
||||||
|
sym_ignore = ID2SYM(rb_intern("ignore"));
|
||||||
|
|
||||||
rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
|
rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
|
||||||
rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
|
rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user