[Bug #20305] Fix matching against an incomplete character

When matching against an incomplete character, some `enclen` calls are
expected not to exceed the limit, and some are expected to return the
required length and then the results are checked if it exceeds.
This commit is contained in:
Nobuyoshi Nakada 2024-02-27 12:20:35 +09:00
parent 7b56353d97
commit 3a04ea2d03

View File

@ -1943,6 +1943,19 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
# define ABSENT_END_POS end # define ABSENT_END_POS end
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
static inline int
enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
{
if (enc->max_enc_len == enc->min_enc_len) {
return (p < e ? enc->min_enc_len : 0);
}
else {
return onigenc_mbclen_approximate(p, e, enc);
}
}
#ifdef USE_CAPTURE_HISTORY #ifdef USE_CAPTURE_HISTORY
static int static int
@ -2923,7 +2936,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int mb_len; int mb_len;
DATA_ENSURE(1); DATA_ENSURE(1);
mb_len = enclen(encode, s, end); mb_len = enclen_approx(encode, s, end);
DATA_ENSURE(mb_len); DATA_ENSURE(mb_len);
ss = s; ss = s;
s += mb_len; s += mb_len;
@ -3028,7 +3041,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR); CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
DATA_ENSURE(1); DATA_ENSURE(1);
n = enclen(encode, s, end); n = enclen_approx(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
s += n; s += n;
@ -3037,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML); CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
DATA_ENSURE(1); DATA_ENSURE(1);
n = enclen(encode, s, end); n = enclen_approx(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
s += n; s += n;
MOP_OUT; MOP_OUT;
@ -3047,7 +3060,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (DATA_ENSURE_CHECK1) { while (DATA_ENSURE_CHECK1) {
CHECK_MATCH_CACHE; CHECK_MATCH_CACHE;
STACK_PUSH_ALT(p, s, sprev, pkeep); STACK_PUSH_ALT(p, s, sprev, pkeep);
n = enclen(encode, s, end); n = enclen_approx(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
sprev = s; sprev = s;
@ -3060,7 +3073,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (DATA_ENSURE_CHECK1) { while (DATA_ENSURE_CHECK1) {
CHECK_MATCH_CACHE; CHECK_MATCH_CACHE;
STACK_PUSH_ALT(p, s, sprev, pkeep); STACK_PUSH_ALT(p, s, sprev, pkeep);
n = enclen(encode, s, end); n = enclen_approx(encode, s, end);
if (n > 1) { if (n > 1) {
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
@ -3086,7 +3099,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
msa->num_fails++; msa->num_fails++;
#endif #endif
} }
n = enclen(encode, s, end); n = enclen_approx(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
sprev = s; sprev = s;
@ -3108,7 +3121,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
msa->num_fails++; msa->num_fails++;
#endif #endif
} }
n = enclen(encode, s, end); n = enclen_approx(encode, s, end);
if (n > 1) { if (n > 1) {
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
@ -3131,7 +3144,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (scv) goto fail; if (scv) goto fail;
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
n = enclen(encode, s, end); n = enclen_approx(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
sprev = s; sprev = s;
@ -3149,7 +3162,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (scv) goto fail; if (scv) goto fail;
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
n = enclen(encode, s, end); n = enclen_approx(encode, s, end);
if (n > 1) { if (n > 1) {
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
@ -3491,7 +3504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
STRING_CMP(pstart, s, n); STRING_CMP(pstart, s, n);
while (sprev + (len = enclen(encode, sprev, end)) < s) while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
sprev += len; sprev += len;
MOP_OUT; MOP_OUT;
@ -3522,7 +3535,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
STRING_CMP_IC(case_fold_flag, pstart, &s, n, end); STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
while (sprev + (len = enclen(encode, sprev, end)) < s) while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
sprev += len; sprev += len;
MOP_OUT; MOP_OUT;
@ -3557,7 +3570,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STRING_CMP_VALUE(pstart, swork, n, is_fail); STRING_CMP_VALUE(pstart, swork, n, is_fail);
if (is_fail) continue; if (is_fail) continue;
s = swork; s = swork;
while (sprev + (len = enclen(encode, sprev, end)) < s) while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
sprev += len; sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1)); p += (SIZE_MEMNUM * (tlen - i - 1));