Move pcre2_find_bracket() into its own module to avoid circular dependency.
This commit is contained in:
parent
328c7920cc
commit
3326a72f6d
|
@ -67,7 +67,8 @@
|
|||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
|
||||
# 2015-04024 PH added support for PCRE2_DEBUG
|
||||
# 2015-04-24 PH added support for PCRE2_DEBUG
|
||||
# 2015-07-16 PH updated for new pcre2_find_bracket source module
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
|
@ -390,6 +391,7 @@ SET(PCRE2_SOURCES
|
|||
src/pcre2_context.c
|
||||
src/pcre2_dfa_match.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_find_bracket.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_maketables.c
|
||||
src/pcre2_match.c
|
||||
|
|
|
@ -25,6 +25,10 @@ during the scan for named groups, and hence the external # was incorrectly
|
|||
treated as a comment and the invalid (?' at the end of the pattern was not
|
||||
diagnosed. This caused a buffer overflow during the real compile.
|
||||
|
||||
7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its
|
||||
own source module to avoid a circular dependency between src/pcre2_compile.c
|
||||
and src/pcre2_study.c
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
|
@ -319,6 +319,7 @@ COMMON_SOURCES = \
|
|||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
|
|
|
@ -97,6 +97,7 @@ can skip ahead to the CMake section.
|
|||
pcre2_context.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_find_bracket.c
|
||||
pcre2_jit_compile.c
|
||||
pcre2_maketables.c
|
||||
pcre2_match.c
|
||||
|
@ -388,4 +389,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
|
|||
recommended download site.
|
||||
|
||||
=============================
|
||||
Last Updated: 15 June 2015
|
||||
Last Updated: 16 July 2015
|
||||
|
|
|
@ -204,6 +204,7 @@ files="\
|
|||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
|
|
3
README
3
README
|
@ -724,6 +724,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
|
@ -832,4 +833,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 24 April 2015
|
||||
Last updated: 16 July 2015
|
||||
|
|
|
@ -2333,175 +2333,6 @@ return p;
|
|||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans through a compiled pattern until it finds a
|
||||
capturing bracket with the given number, or, if the number is negative, an
|
||||
instance of OP_REVERSE for a lookbehind. The function is global in the C sense
|
||||
so that it can be called from pcre2_study() when finding the minimum matching
|
||||
length.
|
||||
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE in UTF mode
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
register PCRE2_UCHAR c = *code;
|
||||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. CALLOUT_STR is used for
|
||||
callouts with string arguments. In both cases the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle recursion */
|
||||
|
||||
else if (c == OP_REVERSE)
|
||||
{
|
||||
if (number < 0) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||
{
|
||||
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||
if (n == number) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||
must add in its length. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for recursion reference *
|
||||
*************************************************/
|
||||
|
|
|
@ -0,0 +1,218 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2015 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a single function that scans through a compiled pattern
|
||||
until it finds a capturing bracket with the given number, or, if the number is
|
||||
negative, an instance of OP_REVERSE for a lookbehind. The function is called
|
||||
from pcre2_compile.c and also from pcre2_study.c when finding the minimum
|
||||
matching length. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE in UTF mode
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
register PCRE2_UCHAR c = *code;
|
||||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. CALLOUT_STR is used for
|
||||
callouts with string arguments. In both cases the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle recursion */
|
||||
|
||||
else if (c == OP_REVERSE)
|
||||
{
|
||||
if (number < 0) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||
{
|
||||
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||
if (n == number) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||
must add in its length. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_find_bracket.c */
|
Loading…
Reference in New Issue