Implement (*NOTEMPTY) and (?(VERSION= features.

This commit is contained in:
Philip.Hazel 2014-08-26 11:46:21 +00:00
parent ac70cacd29
commit 7858fa702d
12 changed files with 391 additions and 97 deletions

View File

@ -130,8 +130,8 @@ functions, so take care not to define synonyms by mistake. */
#define PCRE2_NOTBOL 0x00000001u
#define PCRE2_NOTEOL 0x00000002u
#define PCRE2_NOTEMPTY 0x00000004u
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
#define PCRE2_PARTIAL_SOFT 0x00000010u
#define PCRE2_PARTIAL_HARD 0x00000020u

View File

@ -566,7 +566,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78 };
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -574,6 +574,7 @@ compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is
generic and always supported. */
enum { PSO_OPT, /* Value is an option bit */
PSO_FLG, /* Value is a flag bit */
PSO_NL, /* Value is a newline type */
PSO_BSR, /* Value is a \R type */
PSO_LIMM, /* Read integer value for match limit */
@ -592,6 +593,8 @@ static pso pso_list[] = {
{ (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF },
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
@ -854,7 +857,8 @@ for (;;)
case OP_CLOSE:
case OP_COMMIT:
case OP_CREF:
case OP_DEF:
case OP_FALSE:
case OP_TRUE:
case OP_DNCREF:
case OP_DNRREF:
case OP_DOLL:
@ -1118,7 +1122,8 @@ for (;;)
case OP_DNCREF:
case OP_RREF:
case OP_DNRREF:
case OP_DEF:
case OP_FALSE:
case OP_TRUE:
code += PRIV(OP_lengths)[*code];
break;
@ -4449,10 +4454,12 @@ for (;; ptr++)
PCRE2_UCHAR *bralink = NULL;
PCRE2_UCHAR *brazeroptr = NULL;
/* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
we just ignore the repeat. */
/* Repeating a DEFINE group (or any group where the condition is always
FALSE and there is only one branch) is pointless, but Perl allows the
syntax, so we just ignore the repeat. */
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
previous[GET(previous, 1)] != OP_ALT)
goto END_REPEAT;
/* There is no sense in actually repeating assertions. The only potential
@ -5159,10 +5166,66 @@ for (;; ptr++)
namelen = -1; /* => not a name; must set to avoid warning */
name = NULL; /* Always set to avoid warning */
recno = 0; /* Always set to avoid warning */
/* Point at character after (?( */
ptr++;
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
users of PCRE2 via an application can discover which release of PCRE2
is being used. */
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
ptr[7] != CHAR_RIGHT_PARENTHESIS)
{
BOOL ge = FALSE;
int major = 0;
int minor = 0;
ptr += 7;
if (*ptr == CHAR_GREATER_THAN_SIGN)
{
ge = TRUE;
ptr++;
}
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
references its argument twice. */
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
{
*errorcodeptr = ERR79;
goto FAILED;
}
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
if (*ptr == CHAR_DOT)
{
ptr++;
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
}
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR79;
goto FAILED;
}
if (ge)
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
OP_TRUE : OP_FALSE;
else
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
OP_TRUE : OP_FALSE;
ptr++;
skipbytes = 1;
break; /* End of condition processing */
}
/* Check for a test for recursion in a named group. */
ptr++;
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
{
terminator = -1;
@ -5338,11 +5401,13 @@ for (;; ptr++)
}
/* Similarly, check for the (?(DEFINE) "condition", which is always
false. */
false. During compilation we set OP_DEFINE to distinguish this from
other OP_FALSE conditions so that it can be checked for having only one
branch, but after that the opcode is changed to OP_FALSE. */
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
{
code[1+LINK_SIZE] = OP_DEF;
code[1+LINK_SIZE] = OP_DEFINE;
skipbytes = 1;
}
@ -6065,16 +6130,18 @@ for (;; ptr++)
while (*tc != OP_KET);
/* A DEFINE group is never obeyed inline (the "condition" is always
false). It must have only one branch. */
false). It must have only one branch. Having checked this, change the
opcode to OP_FALSE. */
if (code[LINK_SIZE+1] == OP_DEF)
if (code[LINK_SIZE+1] == OP_DEFINE)
{
if (condcount > 1)
{
*errorcodeptr = ERR54;
goto FAILED;
}
bravalue = OP_DEF; /* Just a flag to suppress char handling below */
code[LINK_SIZE+1] = OP_FALSE;
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
}
/* A "normal" conditional group. If there is just one branch, we must not
@ -6127,7 +6194,7 @@ for (;; ptr++)
/* For a DEFINE group, required and first character settings are not
relevant. */
if (bravalue == OP_DEF) break;
if (bravalue == OP_DEFINE) break;
/* Handle updating of the required and first characters for other types of
group. Update for normal brackets of all kinds, and conditions with two
@ -7011,7 +7078,8 @@ do {
case OP_DNCREF:
case OP_RREF:
case OP_DNRREF:
case OP_DEF:
case OP_FALSE:
case OP_TRUE:
return FALSE;
default: /* Assertion */
@ -7413,6 +7481,10 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
case PSO_OPT:
cb.external_options |= p->value;
break;
case PSO_FLG:
setflags |= p->value;
break;
case PSO_NL:
newline = p->value;

View File

@ -177,12 +177,12 @@ static const uint8_t coptable[] = {
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, /* FALSE, TRUE */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0 /* CLOSE, SKIPZERO */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
};
/* This table identifies those opcodes that inspect a character. It is used to
@ -249,12 +249,12 @@ static const uint8_t poptable[] = {
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, /* FALSE, TRUE */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0 /* CLOSE, SKIPZERO */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
};
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
@ -2642,8 +2642,13 @@ for (;;)
/* The DEFINE condition is always false */
if (condcode == OP_DEF)
if (condcode == OP_FALSE)
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
/* There is also an always-true condition */
if (condcode == OP_TRUE)
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
/* The only supported version of OP_RREF is for the value RREF_ANY,
which means "test if in any recursion". We can't test for specifically
@ -3115,6 +3120,24 @@ if (re->magic_number != MAGIC_NUMBER)
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE;
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
#undef FF
#undef OO
/* A NULL match context means "use a default context" */
if (mcontext == NULL)

View File

@ -157,7 +157,8 @@ static const char compile_error_texts[] =
"using UCP is disabled by the application\0"
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0"
"digits missing in \\x{} or \\o{}\0"
"syntax error in (?(VERSION condition\0"
;
/* Match-time and UTF error texts are in the same format. */

View File

@ -223,10 +223,10 @@ else
#endif /* not HAVE_MEMMOVE */
#endif /* not VPCOMPAT */
/* External (in the C sense) functions and tables that are private to the
/* External (in the C sense) functions and tables that are private to the
libraries are always referenced using the PRIV macro. This makes it possible
for pcre2test.c to include some of the source files from the libraries using a
different PRIV definition to avoid name clashes. It also makes it clear in the
different PRIV definition to avoid name clashes. It also makes it clear in the
code that a non-static object is being referenced. */
#ifndef PRIV
@ -387,10 +387,10 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
#ifndef EBCDIC
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
space. However, in many other sources it is listed as a space and has been in
PCRE for a long time. */
PCRE for a long time. */
#define HSPACE_LIST \
CHAR_HT, CHAR_SPACE, 0xa0, \
@ -532,6 +532,8 @@ bytes in a code unit in that mode. */
#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */
#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */
#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */
#define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
@ -895,25 +897,28 @@ a positive value. */
#define STRING_xdigit "xdigit"
#define STRING_DEFINE "DEFINE"
#define STRING_VERSION "VERSION"
#define STRING_WEIRD_STARTWORD "[:<:]]"
#define STRING_WEIRD_ENDWORD "[:>:]]"
#define STRING_CR_RIGHTPAR "CR)"
#define STRING_LF_RIGHTPAR "LF)"
#define STRING_CRLF_RIGHTPAR "CRLF)"
#define STRING_ANY_RIGHTPAR "ANY)"
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
#define STRING_UTF8_RIGHTPAR "UTF8)"
#define STRING_UTF16_RIGHTPAR "UTF16)"
#define STRING_UTF32_RIGHTPAR "UTF32)"
#define STRING_UTF_RIGHTPAR "UTF)"
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#define STRING_CR_RIGHTPAR "CR)"
#define STRING_LF_RIGHTPAR "LF)"
#define STRING_CRLF_RIGHTPAR "CRLF)"
#define STRING_ANY_RIGHTPAR "ANY)"
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
#define STRING_UTF8_RIGHTPAR "UTF8)"
#define STRING_UTF16_RIGHTPAR "UTF16)"
#define STRING_UTF32_RIGHTPAR "UTF32)"
#define STRING_UTF_RIGHTPAR "UTF)"
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#else /* SUPPORT_UTF */
@ -1161,25 +1166,28 @@ only. */
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#endif /* SUPPORT_UTF */
@ -1517,39 +1525,47 @@ enum {
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
OP_RREF, /* 143 Used to hold a recursion number as condition */
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
OP_DEF, /* 145 The DEFINE condition */
OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
OP_TRUE, /* 146 Always true (used by VERSION) */
OP_BRAZERO, /* 146 These two must remain together and in this */
OP_BRAMINZERO, /* 147 order. */
OP_BRAPOSZERO, /* 148 */
OP_BRAZERO, /* 147 These two must remain together and in this */
OP_BRAMINZERO, /* 148 order. */
OP_BRAPOSZERO, /* 149 */
/* These are backtracking control verbs */
OP_MARK, /* 149 always has an argument */
OP_PRUNE, /* 150 */
OP_PRUNE_ARG, /* 151 same, but with argument */
OP_SKIP, /* 152 */
OP_SKIP_ARG, /* 153 same, but with argument */
OP_THEN, /* 154 */
OP_THEN_ARG, /* 155 same, but with argument */
OP_COMMIT, /* 156 */
OP_MARK, /* 150 always has an argument */
OP_PRUNE, /* 151 */
OP_PRUNE_ARG, /* 152 same, but with argument */
OP_SKIP, /* 153 */
OP_SKIP_ARG, /* 154 same, but with argument */
OP_THEN, /* 155 */
OP_THEN_ARG, /* 156 same, but with argument */
OP_COMMIT, /* 157 */
/* These are forced failure and success verbs */
OP_FAIL, /* 157 */
OP_ACCEPT, /* 158 */
OP_ASSERT_ACCEPT, /* 159 Used inside assertions */
OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */
OP_FAIL, /* 158 */
OP_ACCEPT, /* 159 */
OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 161 */
OP_SKIPZERO, /* 162 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
OP_DEFINE, /* 163 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
some in the past. */
OP_TABLE_LENGTH
};
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
@ -1594,12 +1610,13 @@ some cases doesn't actually use these names at all). */
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
"SCond", \
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \
"Cond false", "Cond true", \
"Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
"*ACCEPT", "*ASSERT_ACCEPT", \
"Close", "Skip zero"
"Close", "Skip zero", "Define"
/* This macro defines the length of fixed length operations in the compiled
@ -1684,14 +1701,15 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* SCOND */ \
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
1, /* DEF */ \
1, 1, /* FALSE, TRUE */ \
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
1, 3, /* SKIP, SKIP_ARG */ \
1, 3, /* THEN, THEN_ARG */ \
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
1 /* DEFINE */
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
#define RREF_ANY 0xffff
@ -1757,7 +1775,7 @@ typedef struct {
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
defined, so the following items are omitted. */
#ifdef PCRE2_CODE_UNIT_WIDTH
@ -1776,11 +1794,11 @@ However, UTF-8 tables are needed only when compiling the 8-bit library. */
#if PCRE2_CODE_UNIT_WIDTH == 8
extern const int PRIV(utf8_table1)[];
extern const int PRIV(utf8_table1_size);
extern const int PRIV(utf8_table1_size);
extern const int PRIV(utf8_table2)[];
extern const int PRIV(utf8_table3)[];
extern const uint8_t PRIV(utf8_table4)[];
#endif
extern const uint8_t PRIV(utf8_table4)[];
#endif
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
@ -1857,7 +1875,7 @@ is available. */
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_block *);
extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL);
extern size_t _pcre2_jit_get_size(void *);
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
@ -1870,7 +1888,7 @@ extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
extern int _pcre2_study(pcre2_real_code *);
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
#endif /* PCRE2_CODE_UNIT_WIDTH */

View File

@ -1363,8 +1363,12 @@ for (;;)
}
break;
case OP_DEF: /* DEFINE - always false */
case OP_FALSE:
break;
case OP_TRUE:
condition = TRUE;
break;
/* The condition is an assertion. Call match() to evaluate it - setting
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
@ -6362,6 +6366,24 @@ if (re->magic_number != MAGIC_NUMBER)
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE;
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
#undef FF
#undef OO
/* A NULL match context means "use a default context" */

View File

@ -422,8 +422,12 @@ for(;;)
}
break;
case OP_DEF:
fprintf(f, " Cond def");
case OP_FALSE:
fprintf(f, " Cond false");
break;
case OP_TRUE:
fprintf(f, " Cond true");
break;
case OP_STARI:

View File

@ -181,7 +181,8 @@ for (;;)
case OP_DNCREF:
case OP_RREF:
case OP_DNRREF:
case OP_DEF:
case OP_FALSE:
case OP_TRUE:
case OP_CALLOUT:
case OP_SOD:
case OP_SOM:
@ -792,7 +793,8 @@ do
case OP_COMMIT:
case OP_COND:
case OP_CREF:
case OP_DEF:
case OP_FALSE:
case OP_TRUE:
case OP_DNCREF:
case OP_DNREF:
case OP_DNREFI:

36
testdata/testinput2 vendored
View File

@ -4070,4 +4070,40 @@ a random value. /Ix
/abc(?=abcde)(?=ab)/allusedtext
abcabcdefg
/a*?b*?/
ab
/(*NOTEMPTY)a*?b*?/
ab
ba
cb
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
ab
cdab
/(?(VERSION>=10.0)yes|no)/I
yesno
/(?(VERSION=8)yes){3}/BI,aftertext
yesno
/(?(VERSION=8)yes|no){3}/I
yesnononoyes
** Failers
yesno
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
abcyes
xyzno
** Failers
abcno
xyzyes
/(?(VERSION<10)yes|no)/
/(?(VERSION>10)yes|no)/
/(?(VERSION>=10.0.0)yes|no)/
# End of testinput2

12
testdata/testinput6 vendored
View File

@ -4798,4 +4798,16 @@
/abc(?=abcde)(?=ab)/allusedtext
abcabcdefg
/a*?b*?/
ab
/(*NOTEMPTY)a*?b*?/
ab
ba
cb
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
ab
cdab
# End of testinput6

90
testdata/testoutput2 vendored
View File

@ -9357,7 +9357,7 @@ Partial match at offset 3: +ab
Recurse
Recurse
Cond
Cond def
Cond false
CBra 1
<
[^m]
@ -9379,7 +9379,7 @@ Partial match at offset 3: +ab
Recurse
Recurse
Cond
Cond def
Cond false
CBra 1
<
[\x00-/:-\xff] (neg)
@ -10095,7 +10095,7 @@ No match
Recurse
KetRpos
Cond
Cond def
Cond false
CBra 1
Any
Ket
@ -10114,7 +10114,7 @@ No match
Recurse
KetRmax
Cond
Cond def
Cond false
CBra 1
Any
Ket
@ -11058,7 +11058,7 @@ Matched, but too many substrings
------------------------------------------------------------------
Bra
Cond
Cond def
Cond false
CBra 1
a
Ket
@ -13720,4 +13720,84 @@ No match
0: abcabcde
>>>>>
/a*?b*?/
ab
0:
/(*NOTEMPTY)a*?b*?/
ab
0: a
ba
0: b
cb
0: b
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
ab
0: a
0+ b
cdab
0:
0+ dab
/(?(VERSION>=10.0)yes|no)/I
Capturing subpattern count = 0
Subject length lower bound = 2
yesno
0: yes
/(?(VERSION=8)yes){3}/BI,aftertext
------------------------------------------------------------------
Bra
Cond
Cond false
yes
Ket
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
yesno
0:
0+ yesno
/(?(VERSION=8)yes|no){3}/I
Capturing subpattern count = 0
Subject length lower bound = 6
yesnononoyes
0: nonono
** Failers
No match
yesno
No match
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
Capturing subpattern count = 1
Named capturing subpatterns:
VERSION 1
Starting code units: a x
Subject length lower bound = 5
abcyes
0: abcyes
1: abc
xyzno
0: xyzno
** Failers
No match
abcno
No match
xyzyes
No match
/(?(VERSION<10)yes|no)/
Failed: error 179 at offset 10: syntax error in (?(VERSION condition
/(?(VERSION>10)yes|no)/
Failed: error 179 at offset 11: syntax error in (?(VERSION condition
/(?(VERSION>=10.0.0)yes|no)/
Failed: error 179 at offset 16: syntax error in (?(VERSION condition
# End of testinput2

24
testdata/testoutput6 vendored
View File

@ -7689,4 +7689,28 @@ Matched, but offsets vector is too small to show all matches
0: abcabcde
>>>>>
/a*?b*?/
ab
0: ab
1: a
2:
/(*NOTEMPTY)a*?b*?/
ab
0: ab
1: a
ba
0: b
cb
0: b
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
ab
0: ab
0+
1: a
cdab
0:
0+ dab
# End of testinput6