Use mbuf instead of bitset for character class for small UTF. Fixes #16145
This commit is contained in:
parent
025832c385
commit
a50fbc56a3
Notes:
git
2025-02-28 03:34:55 +00:00
@ -5669,7 +5669,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
|
||||
if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
|
||||
(is_in == 0 && IS_NCCLASS_NOT(cc))) {
|
||||
if (add_flag) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= 0x80) {
|
||||
r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
@ -5681,7 +5681,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
|
||||
#else
|
||||
if (is_in != 0) {
|
||||
if (add_flag) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= 0x80) {
|
||||
if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
|
||||
r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
|
||||
if (r < 0) return r;
|
||||
|
@ -2114,4 +2114,17 @@ class TestRegexp < Test::Unit::TestCase
|
||||
re =~ s
|
||||
end
|
||||
end
|
||||
|
||||
def test_bug_16145_caseinsensitive_small_utf # [Bug#16145]
|
||||
o_acute_lower = 243.chr('UTF-8')
|
||||
o_acute_upper = 211.chr('UTF-8')
|
||||
# [xó] =~ "abcÓ"
|
||||
assert(/[x#{o_acute_lower}]/i.match?("abc#{o_acute_upper}"), "should match o acute case insensitive")
|
||||
|
||||
|
||||
e_acute_lower = 233.chr('UTF-8')
|
||||
e_acute_upper = 201.chr('UTF-8')
|
||||
# [xé] =~ 'CAFÉ'
|
||||
assert(/[x#{e_acute_lower}]/i.match?("CAF#{e_acute_upper}"), "should match e acute case insensitive")
|
||||
end
|
||||
end
|
||||
|
Loading…
x
Reference in New Issue
Block a user