[Bug #20305] Fix matching against an incomplete character
When matching against an incomplete character, some `enclen` calls are expected not to exceed the limit, and some are expected to return the required length and then the results are checked if it exceeds.
This commit is contained in:
parent
7b56353d97
commit
3a04ea2d03
37
regexec.c
37
regexec.c
@ -1943,6 +1943,19 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
|
|||||||
# define ABSENT_END_POS end
|
# define ABSENT_END_POS end
|
||||||
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
|
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
|
||||||
|
|
||||||
|
int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
|
||||||
|
{
|
||||||
|
if (enc->max_enc_len == enc->min_enc_len) {
|
||||||
|
return (p < e ? enc->min_enc_len : 0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return onigenc_mbclen_approximate(p, e, enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef USE_CAPTURE_HISTORY
|
#ifdef USE_CAPTURE_HISTORY
|
||||||
static int
|
static int
|
||||||
@ -2923,7 +2936,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
int mb_len;
|
int mb_len;
|
||||||
|
|
||||||
DATA_ENSURE(1);
|
DATA_ENSURE(1);
|
||||||
mb_len = enclen(encode, s, end);
|
mb_len = enclen_approx(encode, s, end);
|
||||||
DATA_ENSURE(mb_len);
|
DATA_ENSURE(mb_len);
|
||||||
ss = s;
|
ss = s;
|
||||||
s += mb_len;
|
s += mb_len;
|
||||||
@ -3028,7 +3041,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
|
|
||||||
CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
|
CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
|
||||||
DATA_ENSURE(1);
|
DATA_ENSURE(1);
|
||||||
n = enclen(encode, s, end);
|
n = enclen_approx(encode, s, end);
|
||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
|
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
|
||||||
s += n;
|
s += n;
|
||||||
@ -3037,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
|
|
||||||
CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
|
CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
|
||||||
DATA_ENSURE(1);
|
DATA_ENSURE(1);
|
||||||
n = enclen(encode, s, end);
|
n = enclen_approx(encode, s, end);
|
||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
s += n;
|
s += n;
|
||||||
MOP_OUT;
|
MOP_OUT;
|
||||||
@ -3047,7 +3060,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
while (DATA_ENSURE_CHECK1) {
|
while (DATA_ENSURE_CHECK1) {
|
||||||
CHECK_MATCH_CACHE;
|
CHECK_MATCH_CACHE;
|
||||||
STACK_PUSH_ALT(p, s, sprev, pkeep);
|
STACK_PUSH_ALT(p, s, sprev, pkeep);
|
||||||
n = enclen(encode, s, end);
|
n = enclen_approx(encode, s, end);
|
||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
|
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
|
||||||
sprev = s;
|
sprev = s;
|
||||||
@ -3060,7 +3073,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
while (DATA_ENSURE_CHECK1) {
|
while (DATA_ENSURE_CHECK1) {
|
||||||
CHECK_MATCH_CACHE;
|
CHECK_MATCH_CACHE;
|
||||||
STACK_PUSH_ALT(p, s, sprev, pkeep);
|
STACK_PUSH_ALT(p, s, sprev, pkeep);
|
||||||
n = enclen(encode, s, end);
|
n = enclen_approx(encode, s, end);
|
||||||
if (n > 1) {
|
if (n > 1) {
|
||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
sprev = s;
|
sprev = s;
|
||||||
@ -3086,7 +3099,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
msa->num_fails++;
|
msa->num_fails++;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
n = enclen(encode, s, end);
|
n = enclen_approx(encode, s, end);
|
||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
|
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
|
||||||
sprev = s;
|
sprev = s;
|
||||||
@ -3108,7 +3121,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
msa->num_fails++;
|
msa->num_fails++;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
n = enclen(encode, s, end);
|
n = enclen_approx(encode, s, end);
|
||||||
if (n > 1) {
|
if (n > 1) {
|
||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
sprev = s;
|
sprev = s;
|
||||||
@ -3131,7 +3144,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
if (scv) goto fail;
|
if (scv) goto fail;
|
||||||
|
|
||||||
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
|
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
|
||||||
n = enclen(encode, s, end);
|
n = enclen_approx(encode, s, end);
|
||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
|
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
|
||||||
sprev = s;
|
sprev = s;
|
||||||
@ -3149,7 +3162,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
if (scv) goto fail;
|
if (scv) goto fail;
|
||||||
|
|
||||||
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
|
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
|
||||||
n = enclen(encode, s, end);
|
n = enclen_approx(encode, s, end);
|
||||||
if (n > 1) {
|
if (n > 1) {
|
||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
sprev = s;
|
sprev = s;
|
||||||
@ -3491,7 +3504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
sprev = s;
|
sprev = s;
|
||||||
STRING_CMP(pstart, s, n);
|
STRING_CMP(pstart, s, n);
|
||||||
while (sprev + (len = enclen(encode, sprev, end)) < s)
|
while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
|
||||||
sprev += len;
|
sprev += len;
|
||||||
|
|
||||||
MOP_OUT;
|
MOP_OUT;
|
||||||
@ -3522,7 +3535,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
DATA_ENSURE(n);
|
DATA_ENSURE(n);
|
||||||
sprev = s;
|
sprev = s;
|
||||||
STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
|
STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
|
||||||
while (sprev + (len = enclen(encode, sprev, end)) < s)
|
while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
|
||||||
sprev += len;
|
sprev += len;
|
||||||
|
|
||||||
MOP_OUT;
|
MOP_OUT;
|
||||||
@ -3557,7 +3570,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||||||
STRING_CMP_VALUE(pstart, swork, n, is_fail);
|
STRING_CMP_VALUE(pstart, swork, n, is_fail);
|
||||||
if (is_fail) continue;
|
if (is_fail) continue;
|
||||||
s = swork;
|
s = swork;
|
||||||
while (sprev + (len = enclen(encode, sprev, end)) < s)
|
while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
|
||||||
sprev += len;
|
sprev += len;
|
||||||
|
|
||||||
p += (SIZE_MEMNUM * (tlen - i - 1));
|
p += (SIZE_MEMNUM * (tlen - i - 1));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user