Web Mar 5 17:43:43 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
* transcode.c (transcode_loop): Adjusted detection of invalid (ill-formed) UTF-8 sequences. Fixing potential security issue, see http://www.unicode.org/versions/Unicode5.1.0/#Notable_Changes. * test/ruby/test_transcode.rb: Added two tests for above fix. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15692 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
39787ea14d
commit
08631278ad
@ -1,3 +1,11 @@
|
|||||||
|
Web Mar 5 17:43:43 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||||
|
|
||||||
|
* transcode.c (transcode_loop): Adjusted detection of invalid
|
||||||
|
(ill-formed) UTF-8 sequences. Fixing potential security issue, see
|
||||||
|
http://www.unicode.org/versions/Unicode5.1.0/#Notable_Changes.
|
||||||
|
|
||||||
|
* test/ruby/test_transcode.rb: Added two tests for above fix.
|
||||||
|
|
||||||
Wed Mar 5 14:00:49 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
|
Wed Mar 5 14:00:49 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||||
|
|
||||||
* numeric.c (fix_to_s): avoid rb_scan_args() when no argument
|
* numeric.c (fix_to_s): avoid rb_scan_args() when no argument
|
||||||
|
@ -242,6 +242,11 @@ class TestTranscode < Test::Unit::TestCase
|
|||||||
|
|
||||||
def test_invalid_ignore
|
def test_invalid_ignore
|
||||||
# arguments only
|
# arguments only
|
||||||
'abc'.encode('utf-8', invalid: :ignore)
|
assert_nothing_raised { 'abc'.encode('utf-8', invalid: :ignore) }
|
||||||
|
# check handling of UTF-8 ill-formed subsequences
|
||||||
|
assert_equal("\x00\x41\x00\x3E\x00\x42".force_encoding('UTF-16BE'),
|
||||||
|
"\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
||||||
|
assert_equal("\x00\x41\x00\xF1\x00\x42".force_encoding('UTF-16BE'),
|
||||||
|
"\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
24
transcode.c
24
transcode.c
@ -177,8 +177,10 @@ transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
|
|||||||
if (from_utf8) {
|
if (from_utf8) {
|
||||||
if ((next_byte&0xC0) == 0x80)
|
if ((next_byte&0xC0) == 0x80)
|
||||||
next_byte -= 0x80;
|
next_byte -= 0x80;
|
||||||
else
|
else {
|
||||||
|
in_p--; /* may need to add more code later to revert other things */
|
||||||
goto invalid;
|
goto invalid;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
next_table = (const BYTE_LOOKUP *)next_info;
|
next_table = (const BYTE_LOOKUP *)next_info;
|
||||||
goto follow_byte;
|
goto follow_byte;
|
||||||
@ -390,13 +392,15 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.encode!(encoding) => str
|
* str.encode!(encoding [, options] ) => str
|
||||||
* str.encode!(to_encoding, from_encoding) => str
|
* str.encode!(to_encoding, from_encoding [, options] ) => str
|
||||||
*
|
*
|
||||||
* With one argument, transcodes the contents of <i>str</i> from
|
* The first form transcodes the contents of <i>str</i> from
|
||||||
* str.encoding to +encoding+.
|
* str.encoding to +encoding+.
|
||||||
* With two arguments, transcodes the contents of <i>str</i> from
|
* The second form transcodes the contents of <i>str</i> from
|
||||||
* from_encoding to to_encoding.
|
* from_encoding to to_encoding.
|
||||||
|
* The options Hash gives details for conversion. See String#encode
|
||||||
|
* for details.
|
||||||
* Returns the string even if no changes were made.
|
* Returns the string even if no changes were made.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -414,13 +418,15 @@ rb_str_transcode_bang(int argc, VALUE *argv, VALUE str)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.encode(encoding) => str
|
* str.encode(encoding [, options] ) => str
|
||||||
* str.encode(to_encoding, from_encoding) => str
|
* str.encode(to_encoding, from_encoding [, options] ) => str
|
||||||
*
|
*
|
||||||
* With one argument, returns a copy of <i>str</i> transcoded
|
* The first form returns a copy of <i>str</i> transcoded
|
||||||
* to encoding +encoding+.
|
* to encoding +encoding+.
|
||||||
* With two arguments, returns a copy of <i>str</i> transcoded
|
* The second form returns a copy of <i>str</i> transcoded
|
||||||
* from from_encoding to to_encoding.
|
* from from_encoding to to_encoding.
|
||||||
|
* The options Hash gives details for conversion. Details
|
||||||
|
* to be added.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
Loading…
x
Reference in New Issue
Block a user