Update PCRE to 8.37

Change-Id: I0668a8ccdebc1a6b5f1cb8bbb74d91b44ac937f8
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com>
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
This commit is contained in:
Giuseppe D'Angelo 2015-04-28 16:01:36 +02:00
parent 49107dd53b
commit 8567cfd9f0
8 changed files with 97 additions and 45 deletions

View File

@ -8,7 +8,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service, University of Cambridge Computing Service,
Cambridge, England. Cambridge, England.
Copyright (c) 1997-2014 University of Cambridge Copyright (c) 1997-2015 University of Cambridge
All rights reserved All rights reserved
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester Email local part: hzmester
Emain domain: freemail.hu Emain domain: freemail.hu
Copyright(c) 2010-2014 Zoltan Herczeg Copyright(c) 2010-2015 Zoltan Herczeg
All rights reserved. All rights reserved.
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester Email local part: hzmester
Emain domain: freemail.hu Emain domain: freemail.hu
Copyright(c) 2009-2014 Zoltan Herczeg Copyright(c) 2009-2015 Zoltan Herczeg
All rights reserved. All rights reserved.

View File

@ -6,7 +6,8 @@ and semantics are as close as possible to those of the Perl 5 language.
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
specified below. The documentation for PCRE, supplied in the "doc" specified below. The documentation for PCRE, supplied in the "doc"
directory, is distributed under the same terms as the software itself. directory, is distributed under the same terms as the software itself. The data
in the testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also The basic library functions are written in C and are freestanding. Also
included in the distribution is a set of C++ wrapper functions, and a included in the distribution is a set of C++ wrapper functions, and a
@ -24,7 +25,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service, University of Cambridge Computing Service,
Cambridge, England. Cambridge, England.
Copyright (c) 1997-2014 University of Cambridge Copyright (c) 1997-2015 University of Cambridge
All rights reserved. All rights reserved.
@ -35,7 +36,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester Email local part: hzmester
Emain domain: freemail.hu Emain domain: freemail.hu
Copyright(c) 2010-2014 Zoltan Herczeg Copyright(c) 2010-2015 Zoltan Herczeg
All rights reserved. All rights reserved.
@ -46,7 +47,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester Email local part: hzmester
Emain domain: freemail.hu Emain domain: freemail.hu
Copyright(c) 2009-2014 Zoltan Herczeg Copyright(c) 2009-2015 Zoltan Herczeg
All rights reserved. All rights reserved.

View File

@ -43,8 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
#define PCRE_MAJOR 8 #define PCRE_MAJOR 8
#define PCRE_MINOR 37 #define PCRE_MINOR 37
#define PCRE_PRERELEASE -RC1 #define PCRE_PRERELEASE
#define PCRE_DATE 2015-02-03 #define PCRE_DATE 2015-04-28
/* When an application links to a PCRE DLL in Windows, the symbols that are /* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate imported have to be identified as such. When building PCRE, the appropriate

View File

@ -866,14 +866,6 @@ static const pcre_uint8 opcode_possessify[] = {
}; };
/* Structure for mutual recursion detection. */
typedef struct recurse_check {
struct recurse_check *prev;
const pcre_uchar *group;
} recurse_check;
/************************************************* /*************************************************
* Find an error text * * Find an error text *
@ -5532,13 +5524,13 @@ for (;; ptr++)
PUT(previous, 1, (int)(code - previous)); PUT(previous, 1, (int)(code - previous));
break; /* End of class handling */ break; /* End of class handling */
} }
#endif
/* Even though any XCLASS list is now discarded, we must allow for /* Even though any XCLASS list is now discarded, we must allow for
its memory. */ its memory. */
if (lengthptr != NULL) if (lengthptr != NULL)
*lengthptr += (int)(class_uchardata - class_uchardata_base); *lengthptr += (int)(class_uchardata - class_uchardata_base);
#endif
/* If there are no characters > 255, or they are all to be included or /* If there are no characters > 255, or they are all to be included or
excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the

View File

@ -2446,7 +2446,7 @@ typedef struct compile_data {
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */ BOOL check_lookbehind; /* Lookbehinds need later checking */
BOOL dupnames; /* Duplicate names exist */ BOOL dupnames; /* Duplicate names exist */
BOOL iscondassert; /* Next assert is a condition */ BOOL iscondassert; /* Next assert is a condition */
int nltype; /* Newline type */ int nltype; /* Newline type */
int nllen; /* Newline string length */ int nllen; /* Newline string length */
pcre_uchar nl[4]; /* Newline string when fixed length */ pcre_uchar nl[4]; /* Newline string when fixed length */
@ -2460,6 +2460,13 @@ typedef struct branch_chain {
pcre_uchar *current_branch; pcre_uchar *current_branch;
} branch_chain; } branch_chain;
/* Structure for mutual recursion detection. */
typedef struct recurse_check {
struct recurse_check *prev;
const pcre_uchar *group;
} recurse_check;
/* Structure for items in a linked list that represents an explicit recursive /* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_exec(). */ call within the pattern; used by pcre_exec(). */

View File

@ -1533,7 +1533,11 @@ while (cc < ccend)
{ {
case OP_KET: case OP_KET:
if (PRIVATE_DATA(cc) != 0) if (PRIVATE_DATA(cc) != 0)
{
private_data_length++; private_data_length++;
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
cc += PRIVATE_DATA(cc + 1);
}
cc += 1 + LINK_SIZE; cc += 1 + LINK_SIZE;
break; break;
@ -1548,6 +1552,7 @@ while (cc < ccend)
case OP_SBRAPOS: case OP_SBRAPOS:
case OP_SCOND: case OP_SCOND:
private_data_length++; private_data_length++;
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
cc += 1 + LINK_SIZE; cc += 1 + LINK_SIZE;
break; break;
@ -1710,6 +1715,8 @@ do
{ {
count = 1; count = 1;
srcw[0] = PRIVATE_DATA(cc); srcw[0] = PRIVATE_DATA(cc);
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
cc += PRIVATE_DATA(cc + 1);
} }
cc += 1 + LINK_SIZE; cc += 1 + LINK_SIZE;
break; break;

View File

@ -70,7 +70,7 @@ Arguments:
code pointer to start of group (the bracket) code pointer to start of group (the bracket)
startcode pointer to start of the whole pattern's code startcode pointer to start of the whole pattern's code
options the compiling options options the compiling options
int RECURSE depth recurses chain of recurse_check to catch mutual recursion
Returns: the minimum length Returns: the minimum length
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@ -80,12 +80,13 @@ Returns: the minimum length
static int static int
find_minlength(const REAL_PCRE *re, const pcre_uchar *code, find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
const pcre_uchar *startcode, int options, int recurse_depth) const pcre_uchar *startcode, int options, recurse_check *recurses)
{ {
int length = -1; int length = -1;
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ /* PCRE_UTF16 has the same value as PCRE_UTF8. */
BOOL utf = (options & PCRE_UTF8) != 0; BOOL utf = (options & PCRE_UTF8) != 0;
BOOL had_recurse = FALSE; BOOL had_recurse = FALSE;
recurse_check this_recurse;
register int branchlength = 0; register int branchlength = 0;
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
@ -130,7 +131,7 @@ for (;;)
case OP_SBRAPOS: case OP_SBRAPOS:
case OP_ONCE: case OP_ONCE:
case OP_ONCE_NC: case OP_ONCE_NC:
d = find_minlength(re, cc, startcode, options, recurse_depth); d = find_minlength(re, cc, startcode, options, recurses);
if (d < 0) return d; if (d < 0) return d;
branchlength += d; branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT); do cc += GET(cc, 1); while (*cc == OP_ALT);
@ -393,7 +394,7 @@ for (;;)
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
if (cs == NULL) return -2; if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT); do ce += GET(ce, 1); while (*ce == OP_ALT);
if ((cc > cs && cc < ce) || recurse_depth > 10) if (cc > cs && cc < ce) /* Simple recursion */
{ {
d = 0; d = 0;
had_recurse = TRUE; had_recurse = TRUE;
@ -401,8 +402,22 @@ for (;;)
} }
else else
{ {
int dd = find_minlength(re, cs, startcode, options, recurse_depth+1); recurse_check *r = recurses;
if (dd < d) d = dd; for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL) /* Mutual recursion */
{
d = 0;
had_recurse = TRUE;
break;
}
else
{
int dd;
this_recurse.prev = recurses;
this_recurse.group = cs;
dd = find_minlength(re, cs, startcode, options, &this_recurse);
if (dd < d) d = dd;
}
} }
slot += re->name_entry_size; slot += re->name_entry_size;
} }
@ -418,14 +433,26 @@ for (;;)
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
if (cs == NULL) return -2; if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT); do ce += GET(ce, 1); while (*ce == OP_ALT);
if ((cc > cs && cc < ce) || recurse_depth > 10) if (cc > cs && cc < ce) /* Simple recursion */
{ {
d = 0; d = 0;
had_recurse = TRUE; had_recurse = TRUE;
} }
else else
{ {
d = find_minlength(re, cs, startcode, options, recurse_depth + 1); recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL) /* Mutual recursion */
{
d = 0;
had_recurse = TRUE;
}
else
{
this_recurse.prev = recurses;
this_recurse.group = cs;
d = find_minlength(re, cs, startcode, options, &this_recurse);
}
} }
} }
else d = 0; else d = 0;
@ -474,12 +501,21 @@ for (;;)
case OP_RECURSE: case OP_RECURSE:
cs = ce = (pcre_uchar *)startcode + GET(cc, 1); cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
do ce += GET(ce, 1); while (*ce == OP_ALT); do ce += GET(ce, 1); while (*ce == OP_ALT);
if ((cc > cs && cc < ce) || recurse_depth > 10) if (cc > cs && cc < ce) /* Simple recursion */
had_recurse = TRUE; had_recurse = TRUE;
else else
{ {
branchlength += find_minlength(re, cs, startcode, options, recurse_check *r = recurses;
recurse_depth + 1); for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL) /* Mutual recursion */
had_recurse = TRUE;
else
{
this_recurse.prev = recurses;
this_recurse.group = cs;
branchlength += find_minlength(re, cs, startcode, options,
&this_recurse);
}
} }
cc += 1 + LINK_SIZE; cc += 1 + LINK_SIZE;
break; break;
@ -1503,7 +1539,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
/* Find the minimum length of subject string. */ /* Find the minimum length of subject string. */
switch(min = find_minlength(re, code, code, re->options, 0)) switch(min = find_minlength(re, code, code, re->options, NULL))
{ {
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL; case -3: *errorptr = "internal error: opcode not recognized"; return NULL;

View File

@ -1081,12 +1081,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
offs = (local_size - saved_regs_size) << (15 - 3); offs = (local_size - saved_regs_size) << (15 - 3);
} else { } else {
compiler->local_size += 2 * sizeof(sljit_sw); offs = 0 << 15;
local_size -= saved_regs_size; if (saved_regs_size & 0x8) {
saved_regs_size += 2 * sizeof(sljit_sw); offs = 1 << 15;
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) saved_regs_size += sizeof(sljit_sw);
| RN(TMP_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15))); }
offs = 2 << 15; local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
} }
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
@ -1122,6 +1123,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
} }
if (local_size) if (local_size)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
| RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
} }
@ -1145,8 +1148,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
local_size = (local_size + 15) & ~0xf; local_size = (local_size + 15) & ~0xf;
if (local_size > (63 * sizeof(sljit_sw)))
local_size += 2 * sizeof(sljit_sw);
compiler->local_size = local_size; compiler->local_size = local_size;
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -1167,16 +1168,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi
if (local_size <= (63 * sizeof(sljit_sw))) if (local_size <= (63 * sizeof(sljit_sw)))
offs = (local_size - saved_regs_size) << (15 - 3); offs = (local_size - saved_regs_size) << (15 - 3);
else { else {
saved_regs_size += 2 * sizeof(sljit_sw); FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
local_size -= saved_regs_size; | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
offs = 0 << 15;
if (saved_regs_size & 0x8) {
offs = 1 << 15;
saved_regs_size += sizeof(sljit_sw);
}
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
if (local_size > 0xfff) { if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff; local_size &= 0xfff;
} }
if (local_size) if (local_size)
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
local_size = saved_regs_size;
offs = 2 << 15;
} }
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
@ -1204,8 +1209,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi
if (prev != -1) if (prev != -1)
FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
} else {
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
}
FAIL_IF(push_inst(compiler, RET | RN(TMP_LR))); FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
return SLJIT_SUCCESS; return SLJIT_SUCCESS;