* include/ruby/encoding.h (rb_econv_set_replacemenet): declared.
* transcode.c (rb_econv_t): new fields: replacement_str, replacement_len, replacement_enc and replacement_allocated. (get_replacement_character): make len as size_t. (rb_econv_open_by_transcoder_entries): initialize the new fields. (rb_econv_close): deallocate replacement_str if it allocated. (make_replacement): new function. (output_replacement_character): use make_replacement. (rb_econv_set_replacemenet): defined. (econv_get_replacement): new method. (econv_set_replacement): new method. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19108 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
c359b80127
commit
0ebcad6a7b
15
ChangeLog
15
ChangeLog
@ -1,3 +1,18 @@
|
|||||||
|
Thu Sep 4 01:30:26 2008 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* include/ruby/encoding.h (rb_econv_set_replacemenet): declared.
|
||||||
|
|
||||||
|
* transcode.c (rb_econv_t): new fields: replacement_str,
|
||||||
|
replacement_len, replacement_enc and replacement_allocated.
|
||||||
|
(get_replacement_character): make len as size_t.
|
||||||
|
(rb_econv_open_by_transcoder_entries): initialize the new fields.
|
||||||
|
(rb_econv_close): deallocate replacement_str if it allocated.
|
||||||
|
(make_replacement): new function.
|
||||||
|
(output_replacement_character): use make_replacement.
|
||||||
|
(rb_econv_set_replacemenet): defined.
|
||||||
|
(econv_get_replacement): new method.
|
||||||
|
(econv_set_replacement): new method.
|
||||||
|
|
||||||
Thu Sep 4 01:12:03 2008 NAKAMURA Usaku <usa@ruby-lang.org>
|
Thu Sep 4 01:12:03 2008 NAKAMURA Usaku <usa@ruby-lang.org>
|
||||||
|
|
||||||
* win32/win32.c (filetime_to_timeval): new function, split from
|
* win32/win32.c (filetime_to_timeval): new function, split from
|
||||||
|
@ -219,6 +219,9 @@ rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
|
|||||||
int flags);
|
int flags);
|
||||||
void rb_econv_close(rb_econv_t *ec);
|
void rb_econv_close(rb_econv_t *ec);
|
||||||
|
|
||||||
|
/* result: 0:success -1:failure */
|
||||||
|
int rb_econv_set_replacemenet(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname);
|
||||||
|
|
||||||
VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags);
|
VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags);
|
||||||
|
|
||||||
/* result: 0:success -1:failure */
|
/* result: 0:success -1:failure */
|
||||||
|
@ -640,4 +640,18 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||||||
assert_kind_of(Encoding::ConversionUndefined, err)
|
assert_kind_of(Encoding::ConversionUndefined, err)
|
||||||
assert_equal("\u{3042}", err.error_char)
|
assert_equal("\u{3042}", err.error_char)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_get_replacement
|
||||||
|
ec = Encoding::Converter.new("euc-jp", "iso-8859-1")
|
||||||
|
assert_equal("?", ec.replacement)
|
||||||
|
|
||||||
|
ec = Encoding::Converter.new("euc-jp", "utf-8")
|
||||||
|
assert_equal("\uFFFD", ec.replacement)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_set_replacement
|
||||||
|
ec = Encoding::Converter.new("utf-8", "us-ascii", Encoding::Converter::UNDEF_REPLACE)
|
||||||
|
ec.replacement = "<undef>"
|
||||||
|
assert_equal("a <undef> b", ec.convert("a \u3042 b"))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
144
transcode.c
144
transcode.c
@ -87,6 +87,11 @@ struct rb_econv_t {
|
|||||||
const char *source_encoding_name;
|
const char *source_encoding_name;
|
||||||
const char *destination_encoding_name;
|
const char *destination_encoding_name;
|
||||||
|
|
||||||
|
const unsigned char *replacement_str;
|
||||||
|
size_t replacement_len;
|
||||||
|
const char *replacement_enc;
|
||||||
|
int replacement_allocated;
|
||||||
|
|
||||||
unsigned char *in_buf_start;
|
unsigned char *in_buf_start;
|
||||||
unsigned char *in_data_start;
|
unsigned char *in_data_start;
|
||||||
unsigned char *in_data_end;
|
unsigned char *in_data_end;
|
||||||
@ -357,7 +362,7 @@ load_transcoder_entry(transcoder_entry_t *entry)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static const char*
|
static const char*
|
||||||
get_replacement_character(rb_encoding *enc, int *len_ret, const char **repl_enc_ptr)
|
get_replacement_character(rb_encoding *enc, size_t *len_ret, const char **repl_enc_ptr)
|
||||||
{
|
{
|
||||||
static rb_encoding *utf16be_encoding, *utf16le_encoding;
|
static rb_encoding *utf16be_encoding, *utf16le_encoding;
|
||||||
static rb_encoding *utf32be_encoding, *utf32le_encoding;
|
static rb_encoding *utf32be_encoding, *utf32le_encoding;
|
||||||
@ -793,6 +798,9 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
|
|||||||
ec->flags = 0;
|
ec->flags = 0;
|
||||||
ec->source_encoding_name = NULL;
|
ec->source_encoding_name = NULL;
|
||||||
ec->destination_encoding_name = NULL;
|
ec->destination_encoding_name = NULL;
|
||||||
|
ec->replacement_str = NULL;
|
||||||
|
ec->replacement_len = 0;
|
||||||
|
ec->replacement_allocated = 0;
|
||||||
ec->in_buf_start = NULL;
|
ec->in_buf_start = NULL;
|
||||||
ec->in_data_start = NULL;
|
ec->in_data_start = NULL;
|
||||||
ec->in_data_end = NULL;
|
ec->in_data_end = NULL;
|
||||||
@ -1481,6 +1489,9 @@ rb_econv_close(rb_econv_t *ec)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
if (ec->replacement_allocated) {
|
||||||
|
xfree((void *)ec->replacement_str);
|
||||||
|
}
|
||||||
for (i = 0; i < ec->num_trans; i++) {
|
for (i = 0; i < ec->num_trans; i++) {
|
||||||
rb_transcoding_close(ec->elems[i].tc);
|
rb_transcoding_close(ec->elems[i].tc);
|
||||||
if (ec->elems[i].out_buf_start)
|
if (ec->elems[i].out_buf_start)
|
||||||
@ -1773,15 +1784,19 @@ more_output_buffer(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
output_replacement_character(rb_econv_t *ec)
|
make_replacement(rb_econv_t *ec)
|
||||||
{
|
{
|
||||||
rb_transcoding *tc;
|
rb_transcoding *tc;
|
||||||
const rb_transcoder *tr;
|
const rb_transcoder *tr;
|
||||||
rb_encoding *enc;
|
rb_encoding *enc;
|
||||||
const unsigned char *replacement;
|
const unsigned char *replacement;
|
||||||
const char *repl_enc;
|
const char *repl_enc;
|
||||||
int len;
|
const char *ins_enc;
|
||||||
int ret;
|
size_t len;
|
||||||
|
int allocated = 0;
|
||||||
|
|
||||||
|
if (ec->replacement_str)
|
||||||
|
return 0;
|
||||||
|
|
||||||
tc = ec->last_tc;
|
tc = ec->last_tc;
|
||||||
if (tc) {
|
if (tc) {
|
||||||
@ -1795,7 +1810,62 @@ output_replacement_character(rb_econv_t *ec)
|
|||||||
repl_enc = "";
|
repl_enc = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = rb_econv_insert_output(ec, replacement, len, repl_enc);
|
ins_enc = rb_econv_encoding_to_insert_output(ec);
|
||||||
|
if (*repl_enc && !encoding_equal(repl_enc, ins_enc)) {
|
||||||
|
replacement = allocate_converted_string(repl_enc, ins_enc, replacement, len, &len);
|
||||||
|
if (!replacement)
|
||||||
|
return -1;
|
||||||
|
allocated = 1;
|
||||||
|
repl_enc = ins_enc;
|
||||||
|
}
|
||||||
|
ec->replacement_str = replacement;
|
||||||
|
ec->replacement_len = len;
|
||||||
|
ec->replacement_enc = repl_enc;
|
||||||
|
ec->replacement_allocated = allocated;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
rb_econv_set_replacemenet(rb_econv_t *ec,
|
||||||
|
const unsigned char *str, size_t len, const char *encname)
|
||||||
|
{
|
||||||
|
unsigned char *str2;
|
||||||
|
size_t len2;
|
||||||
|
const char *encname2;
|
||||||
|
|
||||||
|
encname2 = rb_econv_encoding_to_insert_output(ec);
|
||||||
|
|
||||||
|
if (encoding_equal(encname, encname2)) {
|
||||||
|
str2 = xmalloc(len);
|
||||||
|
MEMCPY(str2, str, unsigned char, len); /* xxx: str may be invalid */
|
||||||
|
len2 = len;
|
||||||
|
encname2 = encname;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
str2 = allocate_converted_string(encname, encname2, str, len, &len2);
|
||||||
|
if (!str2)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ec->replacement_allocated) {
|
||||||
|
xfree((void *)ec->replacement_str);
|
||||||
|
}
|
||||||
|
ec->replacement_allocated = 1;
|
||||||
|
ec->replacement_str = str2;
|
||||||
|
ec->replacement_len = len2;
|
||||||
|
ec->replacement_enc = encname2;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
output_replacement_character(rb_econv_t *ec)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (make_replacement(ec) == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
ret = rb_econv_insert_output(ec, ec->replacement_str, ec->replacement_len, ec->replacement_enc);
|
||||||
if (ret == -1)
|
if (ret == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -2917,6 +2987,68 @@ econv_last_error(VALUE self)
|
|||||||
return exc;
|
return exc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* ec.replacement -> string
|
||||||
|
*
|
||||||
|
* returns the replacement string.
|
||||||
|
*
|
||||||
|
* ec = Encoding::Converter.new("euc-jp", "us-ascii")
|
||||||
|
* p ec.replacement #=> "?"
|
||||||
|
*
|
||||||
|
* ec = Encoding::Converter.new("euc-jp", "utf-8")
|
||||||
|
* p ec.replacement #=> "\uFFFD"
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
econv_get_replacement(VALUE self)
|
||||||
|
{
|
||||||
|
rb_econv_t *ec = check_econv(self);
|
||||||
|
int ret;
|
||||||
|
rb_encoding *enc;
|
||||||
|
|
||||||
|
ret = make_replacement(ec);
|
||||||
|
if (ret == -1) {
|
||||||
|
rb_raise(rb_eConversionUndefined, "replacement character setup failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
enc = rb_enc_find(ec->replacement_enc);
|
||||||
|
return rb_enc_str_new((const char *)ec->replacement_str, (long)ec->replacement_len, enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* ec.replacement = string
|
||||||
|
*
|
||||||
|
* sets the replacement string.
|
||||||
|
*
|
||||||
|
* ec = Encoding::Converter.new("utf-8", "us-ascii", Encoding::Converter::UNDEF_REPLACE)
|
||||||
|
* ec.replacement = "<undef>"
|
||||||
|
* p ec.convert("a \u3042 b") #=> "a <undef> b"
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
econv_set_replacement(VALUE self, VALUE arg)
|
||||||
|
{
|
||||||
|
rb_econv_t *ec = check_econv(self);
|
||||||
|
VALUE string = arg;
|
||||||
|
int ret;
|
||||||
|
rb_encoding *enc;
|
||||||
|
|
||||||
|
StringValue(string);
|
||||||
|
enc = rb_enc_get(string);
|
||||||
|
|
||||||
|
ret = rb_econv_set_replacemenet(ec,
|
||||||
|
(const unsigned char *)RSTRING_PTR(string),
|
||||||
|
RSTRING_LEN(string),
|
||||||
|
enc->name);
|
||||||
|
|
||||||
|
if (ret == -1) {
|
||||||
|
/* xxx: rb_eInvalidByteSequence? */
|
||||||
|
rb_raise(rb_eConversionUndefined, "replacement character setup failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
return arg;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
rb_econv_check_error(rb_econv_t *ec)
|
rb_econv_check_error(rb_econv_t *ec)
|
||||||
{
|
{
|
||||||
@ -3114,6 +3246,8 @@ Init_transcode(void)
|
|||||||
rb_define_method(rb_cEncodingConverter, "insert_output", econv_insert_output, 1);
|
rb_define_method(rb_cEncodingConverter, "insert_output", econv_insert_output, 1);
|
||||||
rb_define_method(rb_cEncodingConverter, "putback", econv_putback, -1);
|
rb_define_method(rb_cEncodingConverter, "putback", econv_putback, -1);
|
||||||
rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0);
|
rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0);
|
||||||
|
rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0);
|
||||||
|
rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1);
|
||||||
rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
|
rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
|
||||||
rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE));
|
rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE));
|
||||||
rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));
|
rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user