string.c: wchar succ
* string.c (enc_succ_char, enc_pred_char): consider wchar case. [ruby-core:56071] [Bug #8653] * string.c (rb_str_succ): do not replace with invalid char. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@42078 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
241ad8877d
commit
e6a6dd8e7e
@ -1,4 +1,9 @@
|
|||||||
Sat Jul 20 12:13:37 2013 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Sat Jul 20 12:14:07 2013 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* string.c (enc_succ_char, enc_pred_char): consider wchar case.
|
||||||
|
[ruby-core:56071] [Bug #8653]
|
||||||
|
|
||||||
|
* string.c (rb_str_succ): do not replace with invalid char.
|
||||||
|
|
||||||
* encoding.c (rb_enc_code_to_mbclen): add new function which returns
|
* encoding.c (rb_enc_code_to_mbclen): add new function which returns
|
||||||
mbclen from codepoint like as rb_enc_codelen() but 0 for invalid
|
mbclen from codepoint like as rb_enc_codelen() but 0 for invalid
|
||||||
|
45
string.c
45
string.c
@ -2870,6 +2870,24 @@ enc_succ_char(char *p, long len, rb_encoding *enc)
|
|||||||
{
|
{
|
||||||
long i;
|
long i;
|
||||||
int l;
|
int l;
|
||||||
|
|
||||||
|
if (rb_enc_mbminlen(enc) > 1) {
|
||||||
|
/* wchar, trivial case */
|
||||||
|
int r = rb_enc_precise_mbclen(p, p + len, enc), c;
|
||||||
|
if (!MBCLEN_CHARFOUND_P(r)) {
|
||||||
|
return NEIGHBOR_NOT_CHAR;
|
||||||
|
}
|
||||||
|
c = rb_enc_mbc_to_codepoint(p, p + len, enc) + 1;
|
||||||
|
l = rb_enc_code_to_mbclen(c, enc);
|
||||||
|
if (!l) return NEIGHBOR_NOT_CHAR;
|
||||||
|
if (l != len) return NEIGHBOR_WRAPPED;
|
||||||
|
rb_enc_mbcput(c, p, enc);
|
||||||
|
r = rb_enc_precise_mbclen(p, p + len, enc);
|
||||||
|
if (!MBCLEN_CHARFOUND_P(r)) {
|
||||||
|
return NEIGHBOR_NOT_CHAR;
|
||||||
|
}
|
||||||
|
return NEIGHBOR_FOUND;
|
||||||
|
}
|
||||||
while (1) {
|
while (1) {
|
||||||
for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
|
for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
|
||||||
p[i] = '\0';
|
p[i] = '\0';
|
||||||
@ -2904,6 +2922,25 @@ enc_pred_char(char *p, long len, rb_encoding *enc)
|
|||||||
{
|
{
|
||||||
long i;
|
long i;
|
||||||
int l;
|
int l;
|
||||||
|
if (rb_enc_mbminlen(enc) > 1) {
|
||||||
|
/* wchar, trivial case */
|
||||||
|
int r = rb_enc_precise_mbclen(p, p + len, enc), c;
|
||||||
|
if (!MBCLEN_CHARFOUND_P(r)) {
|
||||||
|
return NEIGHBOR_NOT_CHAR;
|
||||||
|
}
|
||||||
|
c = rb_enc_mbc_to_codepoint(p, p + len, enc);
|
||||||
|
if (!c) return NEIGHBOR_NOT_CHAR;
|
||||||
|
--c;
|
||||||
|
l = rb_enc_code_to_mbclen(c, enc);
|
||||||
|
if (!l) return NEIGHBOR_NOT_CHAR;
|
||||||
|
if (l != len) return NEIGHBOR_WRAPPED;
|
||||||
|
rb_enc_mbcput(c, p, enc);
|
||||||
|
r = rb_enc_precise_mbclen(p, p + len, enc);
|
||||||
|
if (!MBCLEN_CHARFOUND_P(r)) {
|
||||||
|
return NEIGHBOR_NOT_CHAR;
|
||||||
|
}
|
||||||
|
return NEIGHBOR_FOUND;
|
||||||
|
}
|
||||||
while (1) {
|
while (1) {
|
||||||
for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
|
for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
|
||||||
p[i] = '\xff';
|
p[i] = '\xff';
|
||||||
@ -3074,12 +3111,16 @@ rb_str_succ(VALUE orig)
|
|||||||
s = e;
|
s = e;
|
||||||
while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
|
while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
|
||||||
enum neighbor_char neighbor;
|
enum neighbor_char neighbor;
|
||||||
|
char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
|
||||||
l = rb_enc_precise_mbclen(s, e, enc);
|
l = rb_enc_precise_mbclen(s, e, enc);
|
||||||
if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue;
|
if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue;
|
||||||
l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
|
l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
|
||||||
neighbor = enc_succ_char(s, l, enc);
|
MEMCPY(tmp, s, char, l);
|
||||||
if (neighbor == NEIGHBOR_FOUND)
|
neighbor = enc_succ_char(tmp, l, enc);
|
||||||
|
if (neighbor == NEIGHBOR_FOUND) {
|
||||||
|
MEMCPY(s, tmp, char, l);
|
||||||
return str;
|
return str;
|
||||||
|
}
|
||||||
if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
|
if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
|
||||||
/* wrapped to \0...\0. search next valid char. */
|
/* wrapped to \0...\0. search next valid char. */
|
||||||
enc_succ_char(s, l, enc);
|
enc_succ_char(s, l, enc);
|
||||||
|
@ -50,10 +50,12 @@ class TestM17NComb < Test::Unit::TestCase
|
|||||||
# for transitivity test
|
# for transitivity test
|
||||||
u("\xe0\xa0\xa1"), e("\xe0\xa0\xa1"), s("\xe0\xa0\xa1"), # [ruby-dev:32693]
|
u("\xe0\xa0\xa1"), e("\xe0\xa0\xa1"), s("\xe0\xa0\xa1"), # [ruby-dev:32693]
|
||||||
e("\xa1\xa1"), a("\xa1\xa1"), s("\xa1\xa1"), # [ruby-dev:36484]
|
e("\xa1\xa1"), a("\xa1\xa1"), s("\xa1\xa1"), # [ruby-dev:36484]
|
||||||
|
]
|
||||||
|
|
||||||
#"aa".force_encoding("utf-16be"),
|
WSTRINGS = [
|
||||||
#"aaaa".force_encoding("utf-32be"),
|
"aa".force_encoding("utf-16be"),
|
||||||
#"aaa".force_encoding("utf-32be"),
|
"aaaa".force_encoding("utf-32be"),
|
||||||
|
"aaa".force_encoding("utf-32be"),
|
||||||
]
|
]
|
||||||
|
|
||||||
def combination(*args, &b)
|
def combination(*args, &b)
|
||||||
@ -84,7 +86,7 @@ class TestM17NComb < Test::Unit::TestCase
|
|||||||
r
|
r
|
||||||
end
|
end
|
||||||
|
|
||||||
def enccall(recv, meth, *args, &block)
|
def assert_enccall(recv, meth, *args, &block)
|
||||||
desc = ''
|
desc = ''
|
||||||
if String === recv
|
if String === recv
|
||||||
desc << encdump(recv)
|
desc << encdump(recv)
|
||||||
@ -113,6 +115,7 @@ class TestM17NComb < Test::Unit::TestCase
|
|||||||
}
|
}
|
||||||
result
|
result
|
||||||
end
|
end
|
||||||
|
alias enccall assert_enccall
|
||||||
|
|
||||||
def assert_str_enc_propagation(t, s1, s2)
|
def assert_str_enc_propagation(t, s1, s2)
|
||||||
if !s1.ascii_only?
|
if !s1.ascii_only?
|
||||||
@ -1327,6 +1330,14 @@ class TestM17NComb < Test::Unit::TestCase
|
|||||||
s = t
|
s = t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Encoding.list.each do |enc|
|
||||||
|
next if enc.dummy?
|
||||||
|
{"A"=>"B", "A1"=>"A2", "A9"=>"B0", "9"=>"10", "Z"=>"AA"}.each do |orig, expected|
|
||||||
|
s = orig.encode(enc)
|
||||||
|
assert_strenc(expected.encode(enc), enc, s.succ, proc {"#{orig.dump}.encode(#{enc}).succ"})
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_str_hash
|
def test_str_hash
|
||||||
|
Loading…
x
Reference in New Issue
Block a user