Implement (*NOTEMPTY) and (?(VERSION= features.

This commit is contained in:
Philip.Hazel 2014-08-26 11:46:21 +00:00
parent ac70cacd29
commit 7858fa702d
12 changed files with 391 additions and 97 deletions

View File

@ -130,8 +130,8 @@ functions, so take care not to define synonyms by mistake. */
#define PCRE2_NOTBOL 0x00000001u #define PCRE2_NOTBOL 0x00000001u
#define PCRE2_NOTEOL 0x00000002u #define PCRE2_NOTEOL 0x00000002u
#define PCRE2_NOTEMPTY 0x00000004u #define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u #define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
#define PCRE2_PARTIAL_SOFT 0x00000010u #define PCRE2_PARTIAL_SOFT 0x00000010u
#define PCRE2_PARTIAL_HARD 0x00000020u #define PCRE2_PARTIAL_HARD 0x00000020u

View File

@ -566,7 +566,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78 }; ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such /* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -574,6 +574,7 @@ compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is
generic and always supported. */ generic and always supported. */
enum { PSO_OPT, /* Value is an option bit */ enum { PSO_OPT, /* Value is an option bit */
PSO_FLG, /* Value is a flag bit */
PSO_NL, /* Value is a newline type */ PSO_NL, /* Value is a newline type */
PSO_BSR, /* Value is a \R type */ PSO_BSR, /* Value is a \R type */
PSO_LIMM, /* Read integer value for match limit */ PSO_LIMM, /* Read integer value for match limit */
@ -592,6 +593,8 @@ static pso pso_list[] = {
{ (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF }, { (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF },
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, { (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, { (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS }, { (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE }, { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, { (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
@ -854,7 +857,8 @@ for (;;)
case OP_CLOSE: case OP_CLOSE:
case OP_COMMIT: case OP_COMMIT:
case OP_CREF: case OP_CREF:
case OP_DEF: case OP_FALSE:
case OP_TRUE:
case OP_DNCREF: case OP_DNCREF:
case OP_DNRREF: case OP_DNRREF:
case OP_DOLL: case OP_DOLL:
@ -1118,7 +1122,8 @@ for (;;)
case OP_DNCREF: case OP_DNCREF:
case OP_RREF: case OP_RREF:
case OP_DNRREF: case OP_DNRREF:
case OP_DEF: case OP_FALSE:
case OP_TRUE:
code += PRIV(OP_lengths)[*code]; code += PRIV(OP_lengths)[*code];
break; break;
@ -4449,10 +4454,12 @@ for (;; ptr++)
PCRE2_UCHAR *bralink = NULL; PCRE2_UCHAR *bralink = NULL;
PCRE2_UCHAR *brazeroptr = NULL; PCRE2_UCHAR *brazeroptr = NULL;
/* Repeating a DEFINE group is pointless, but Perl allows the syntax, so /* Repeating a DEFINE group (or any group where the condition is always
we just ignore the repeat. */ FALSE and there is only one branch) is pointless, but Perl allows the
syntax, so we just ignore the repeat. */
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF) if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
previous[GET(previous, 1)] != OP_ALT)
goto END_REPEAT; goto END_REPEAT;
/* There is no sense in actually repeating assertions. The only potential /* There is no sense in actually repeating assertions. The only potential
@ -5160,9 +5167,65 @@ for (;; ptr++)
name = NULL; /* Always set to avoid warning */ name = NULL; /* Always set to avoid warning */
recno = 0; /* Always set to avoid warning */ recno = 0; /* Always set to avoid warning */
/* Check for a test for recursion in a named group. */ /* Point at character after (?( */
ptr++; ptr++;
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
users of PCRE2 via an application can discover which release of PCRE2
is being used. */
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
ptr[7] != CHAR_RIGHT_PARENTHESIS)
{
BOOL ge = FALSE;
int major = 0;
int minor = 0;
ptr += 7;
if (*ptr == CHAR_GREATER_THAN_SIGN)
{
ge = TRUE;
ptr++;
}
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
references its argument twice. */
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
{
*errorcodeptr = ERR79;
goto FAILED;
}
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
if (*ptr == CHAR_DOT)
{
ptr++;
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
}
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR79;
goto FAILED;
}
if (ge)
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
OP_TRUE : OP_FALSE;
else
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
OP_TRUE : OP_FALSE;
ptr++;
skipbytes = 1;
break; /* End of condition processing */
}
/* Check for a test for recursion in a named group. */
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND) if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
{ {
terminator = -1; terminator = -1;
@ -5338,11 +5401,13 @@ for (;; ptr++)
} }
/* Similarly, check for the (?(DEFINE) "condition", which is always /* Similarly, check for the (?(DEFINE) "condition", which is always
false. */ false. During compilation we set OP_DEFINE to distinguish this from
other OP_FALSE conditions so that it can be checked for having only one
branch, but after that the opcode is changed to OP_FALSE. */
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0) else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
{ {
code[1+LINK_SIZE] = OP_DEF; code[1+LINK_SIZE] = OP_DEFINE;
skipbytes = 1; skipbytes = 1;
} }
@ -6065,16 +6130,18 @@ for (;; ptr++)
while (*tc != OP_KET); while (*tc != OP_KET);
/* A DEFINE group is never obeyed inline (the "condition" is always /* A DEFINE group is never obeyed inline (the "condition" is always
false). It must have only one branch. */ false). It must have only one branch. Having checked this, change the
opcode to OP_FALSE. */
if (code[LINK_SIZE+1] == OP_DEF) if (code[LINK_SIZE+1] == OP_DEFINE)
{ {
if (condcount > 1) if (condcount > 1)
{ {
*errorcodeptr = ERR54; *errorcodeptr = ERR54;
goto FAILED; goto FAILED;
} }
bravalue = OP_DEF; /* Just a flag to suppress char handling below */ code[LINK_SIZE+1] = OP_FALSE;
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
} }
/* A "normal" conditional group. If there is just one branch, we must not /* A "normal" conditional group. If there is just one branch, we must not
@ -6127,7 +6194,7 @@ for (;; ptr++)
/* For a DEFINE group, required and first character settings are not /* For a DEFINE group, required and first character settings are not
relevant. */ relevant. */
if (bravalue == OP_DEF) break; if (bravalue == OP_DEFINE) break;
/* Handle updating of the required and first characters for other types of /* Handle updating of the required and first characters for other types of
group. Update for normal brackets of all kinds, and conditions with two group. Update for normal brackets of all kinds, and conditions with two
@ -7011,7 +7078,8 @@ do {
case OP_DNCREF: case OP_DNCREF:
case OP_RREF: case OP_RREF:
case OP_DNRREF: case OP_DNRREF:
case OP_DEF: case OP_FALSE:
case OP_TRUE:
return FALSE; return FALSE;
default: /* Assertion */ default: /* Assertion */
@ -7414,6 +7482,10 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
cb.external_options |= p->value; cb.external_options |= p->value;
break; break;
case PSO_FLG:
setflags |= p->value;
break;
case PSO_NL: case PSO_NL:
newline = p->value; newline = p->value;
setflags |= PCRE2_NL_SET; setflags |= PCRE2_NL_SET;

View File

@ -177,12 +177,12 @@ static const uint8_t coptable[] = {
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */ 0, 0, /* CREF, DNCREF */
0, 0, /* RREF, DNRREF */ 0, 0, /* RREF, DNRREF */
0, /* DEF */ 0, 0, /* FALSE, TRUE */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0 /* CLOSE, SKIPZERO */ 0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
}; };
/* This table identifies those opcodes that inspect a character. It is used to /* This table identifies those opcodes that inspect a character. It is used to
@ -249,12 +249,12 @@ static const uint8_t poptable[] = {
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */ 0, 0, /* CREF, DNCREF */
0, 0, /* RREF, DNRREF */ 0, 0, /* RREF, DNRREF */
0, /* DEF */ 0, 0, /* FALSE, TRUE */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0 /* CLOSE, SKIPZERO */ 0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
}; };
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
@ -2642,9 +2642,14 @@ for (;;)
/* The DEFINE condition is always false */ /* The DEFINE condition is always false */
if (condcode == OP_DEF) if (condcode == OP_FALSE)
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
/* There is also an always-true condition */
if (condcode == OP_TRUE)
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
/* The only supported version of OP_RREF is for the value RREF_ANY, /* The only supported version of OP_RREF is for the value RREF_ANY,
which means "test if in any recursion". We can't test for specifically which means "test if in any recursion". We can't test for specifically
recursed groups. */ recursed groups. */
@ -3115,6 +3120,24 @@ if (re->magic_number != MAGIC_NUMBER)
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE; return PCRE2_ERROR_BADMODE;
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
#undef FF
#undef OO
/* A NULL match context means "use a default context" */ /* A NULL match context means "use a default context" */
if (mcontext == NULL) if (mcontext == NULL)

View File

@ -158,6 +158,7 @@ static const char compile_error_texts[] =
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0" "character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0" "digits missing in \\x{} or \\o{}\0"
"syntax error in (?(VERSION condition\0"
; ;
/* Match-time and UTF error texts are in the same format. */ /* Match-time and UTF error texts are in the same format. */

View File

@ -532,6 +532,8 @@ bytes in a code unit in that mode. */
#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */ #define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */
#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */ #define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */
#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */ #define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */
#define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
@ -895,6 +897,7 @@ a positive value. */
#define STRING_xdigit "xdigit" #define STRING_xdigit "xdigit"
#define STRING_DEFINE "DEFINE" #define STRING_DEFINE "DEFINE"
#define STRING_VERSION "VERSION"
#define STRING_WEIRD_STARTWORD "[:<:]]" #define STRING_WEIRD_STARTWORD "[:<:]]"
#define STRING_WEIRD_ENDWORD "[:>:]]" #define STRING_WEIRD_ENDWORD "[:>:]]"
@ -912,6 +915,8 @@ a positive value. */
#define STRING_UCP_RIGHTPAR "UCP)" #define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)" #define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
@ -1161,6 +1166,7 @@ only. */
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t #define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E #define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET #define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET #define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
@ -1178,6 +1184,8 @@ only. */
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS #define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS #define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
@ -1517,39 +1525,47 @@ enum {
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */ OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
OP_RREF, /* 143 Used to hold a recursion number as condition */ OP_RREF, /* 143 Used to hold a recursion number as condition */
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */ OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
OP_DEF, /* 145 The DEFINE condition */ OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
OP_TRUE, /* 146 Always true (used by VERSION) */
OP_BRAZERO, /* 146 These two must remain together and in this */ OP_BRAZERO, /* 147 These two must remain together and in this */
OP_BRAMINZERO, /* 147 order. */ OP_BRAMINZERO, /* 148 order. */
OP_BRAPOSZERO, /* 148 */ OP_BRAPOSZERO, /* 149 */
/* These are backtracking control verbs */ /* These are backtracking control verbs */
OP_MARK, /* 149 always has an argument */ OP_MARK, /* 150 always has an argument */
OP_PRUNE, /* 150 */ OP_PRUNE, /* 151 */
OP_PRUNE_ARG, /* 151 same, but with argument */ OP_PRUNE_ARG, /* 152 same, but with argument */
OP_SKIP, /* 152 */ OP_SKIP, /* 153 */
OP_SKIP_ARG, /* 153 same, but with argument */ OP_SKIP_ARG, /* 154 same, but with argument */
OP_THEN, /* 154 */ OP_THEN, /* 155 */
OP_THEN_ARG, /* 155 same, but with argument */ OP_THEN_ARG, /* 156 same, but with argument */
OP_COMMIT, /* 156 */ OP_COMMIT, /* 157 */
/* These are forced failure and success verbs */ /* These are forced failure and success verbs */
OP_FAIL, /* 157 */ OP_FAIL, /* 158 */
OP_ACCEPT, /* 158 */ OP_ACCEPT, /* 159 */
OP_ASSERT_ACCEPT, /* 159 Used inside assertions */ OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */ OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */ /* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 161 */ OP_SKIPZERO, /* 162 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
OP_DEFINE, /* 163 */
/* This is not an opcode, but is used to check that tables indexed by opcode /* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been are the correct length, in order to catch updating errors - there have been
some in the past. */ some in the past. */
OP_TABLE_LENGTH OP_TABLE_LENGTH
}; };
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
@ -1594,12 +1610,13 @@ some cases doesn't actually use these names at all). */
"Cond", \ "Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \ "SBra", "SBraPos", "SCBra", "SCBraPos", \
"SCond", \ "SCond", \
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \ "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \
"Cond false", "Cond true", \
"Brazero", "Braminzero", "Braposzero", \ "Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
"*THEN", "*THEN", "*COMMIT", "*FAIL", \ "*THEN", "*THEN", "*COMMIT", "*FAIL", \
"*ACCEPT", "*ASSERT_ACCEPT", \ "*ACCEPT", "*ASSERT_ACCEPT", \
"Close", "Skip zero" "Close", "Skip zero", "Define"
/* This macro defines the length of fixed length operations in the compiled /* This macro defines the length of fixed length operations in the compiled
@ -1684,13 +1701,14 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* SCOND */ \ 1+LINK_SIZE, /* SCOND */ \
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
1, /* DEF */ \ 1, 1, /* FALSE, TRUE */ \
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
1, 3, /* SKIP, SKIP_ARG */ \ 1, 3, /* SKIP, SKIP_ARG */ \
1, 3, /* THEN, THEN_ARG */ \ 1, 3, /* THEN, THEN_ARG */ \
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */ 1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
1 /* DEFINE */
/* A magic value for OP_RREF to indicate the "any recursion" condition. */ /* A magic value for OP_RREF to indicate the "any recursion" condition. */

View File

@ -1363,7 +1363,11 @@ for (;;)
} }
break; break;
case OP_DEF: /* DEFINE - always false */ case OP_FALSE:
break;
case OP_TRUE:
condition = TRUE;
break; break;
/* The condition is an assertion. Call match() to evaluate it - setting /* The condition is an assertion. Call match() to evaluate it - setting
@ -6363,6 +6367,24 @@ if (re->magic_number != MAGIC_NUMBER)
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE; return PCRE2_ERROR_BADMODE;
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
#undef FF
#undef OO
/* A NULL match context means "use a default context" */ /* A NULL match context means "use a default context" */
if (mcontext == NULL) if (mcontext == NULL)

View File

@ -422,8 +422,12 @@ for(;;)
} }
break; break;
case OP_DEF: case OP_FALSE:
fprintf(f, " Cond def"); fprintf(f, " Cond false");
break;
case OP_TRUE:
fprintf(f, " Cond true");
break; break;
case OP_STARI: case OP_STARI:

View File

@ -181,7 +181,8 @@ for (;;)
case OP_DNCREF: case OP_DNCREF:
case OP_RREF: case OP_RREF:
case OP_DNRREF: case OP_DNRREF:
case OP_DEF: case OP_FALSE:
case OP_TRUE:
case OP_CALLOUT: case OP_CALLOUT:
case OP_SOD: case OP_SOD:
case OP_SOM: case OP_SOM:
@ -792,7 +793,8 @@ do
case OP_COMMIT: case OP_COMMIT:
case OP_COND: case OP_COND:
case OP_CREF: case OP_CREF:
case OP_DEF: case OP_FALSE:
case OP_TRUE:
case OP_DNCREF: case OP_DNCREF:
case OP_DNREF: case OP_DNREF:
case OP_DNREFI: case OP_DNREFI:

36
testdata/testinput2 vendored
View File

@ -4070,4 +4070,40 @@ a random value. /Ix
/abc(?=abcde)(?=ab)/allusedtext /abc(?=abcde)(?=ab)/allusedtext
abcabcdefg abcabcdefg
/a*?b*?/
ab
/(*NOTEMPTY)a*?b*?/
ab
ba
cb
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
ab
cdab
/(?(VERSION>=10.0)yes|no)/I
yesno
/(?(VERSION=8)yes){3}/BI,aftertext
yesno
/(?(VERSION=8)yes|no){3}/I
yesnononoyes
** Failers
yesno
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
abcyes
xyzno
** Failers
abcno
xyzyes
/(?(VERSION<10)yes|no)/
/(?(VERSION>10)yes|no)/
/(?(VERSION>=10.0.0)yes|no)/
# End of testinput2 # End of testinput2

12
testdata/testinput6 vendored
View File

@ -4798,4 +4798,16 @@
/abc(?=abcde)(?=ab)/allusedtext /abc(?=abcde)(?=ab)/allusedtext
abcabcdefg abcabcdefg
/a*?b*?/
ab
/(*NOTEMPTY)a*?b*?/
ab
ba
cb
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
ab
cdab
# End of testinput6 # End of testinput6

90
testdata/testoutput2 vendored
View File

@ -9357,7 +9357,7 @@ Partial match at offset 3: +ab
Recurse Recurse
Recurse Recurse
Cond Cond
Cond def Cond false
CBra 1 CBra 1
< <
[^m] [^m]
@ -9379,7 +9379,7 @@ Partial match at offset 3: +ab
Recurse Recurse
Recurse Recurse
Cond Cond
Cond def Cond false
CBra 1 CBra 1
< <
[\x00-/:-\xff] (neg) [\x00-/:-\xff] (neg)
@ -10095,7 +10095,7 @@ No match
Recurse Recurse
KetRpos KetRpos
Cond Cond
Cond def Cond false
CBra 1 CBra 1
Any Any
Ket Ket
@ -10114,7 +10114,7 @@ No match
Recurse Recurse
KetRmax KetRmax
Cond Cond
Cond def Cond false
CBra 1 CBra 1
Any Any
Ket Ket
@ -11058,7 +11058,7 @@ Matched, but too many substrings
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
Cond Cond
Cond def Cond false
CBra 1 CBra 1
a a
Ket Ket
@ -13720,4 +13720,84 @@ No match
0: abcabcde 0: abcabcde
>>>>> >>>>>
/a*?b*?/
ab
0:
/(*NOTEMPTY)a*?b*?/
ab
0: a
ba
0: b
cb
0: b
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
ab
0: a
0+ b
cdab
0:
0+ dab
/(?(VERSION>=10.0)yes|no)/I
Capturing subpattern count = 0
Subject length lower bound = 2
yesno
0: yes
/(?(VERSION=8)yes){3}/BI,aftertext
------------------------------------------------------------------
Bra
Cond
Cond false
yes
Ket
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
yesno
0:
0+ yesno
/(?(VERSION=8)yes|no){3}/I
Capturing subpattern count = 0
Subject length lower bound = 6
yesnononoyes
0: nonono
** Failers
No match
yesno
No match
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
Capturing subpattern count = 1
Named capturing subpatterns:
VERSION 1
Starting code units: a x
Subject length lower bound = 5
abcyes
0: abcyes
1: abc
xyzno
0: xyzno
** Failers
No match
abcno
No match
xyzyes
No match
/(?(VERSION<10)yes|no)/
Failed: error 179 at offset 10: syntax error in (?(VERSION condition
/(?(VERSION>10)yes|no)/
Failed: error 179 at offset 11: syntax error in (?(VERSION condition
/(?(VERSION>=10.0.0)yes|no)/
Failed: error 179 at offset 16: syntax error in (?(VERSION condition
# End of testinput2 # End of testinput2

24
testdata/testoutput6 vendored
View File

@ -7689,4 +7689,28 @@ Matched, but offsets vector is too small to show all matches
0: abcabcde 0: abcabcde
>>>>> >>>>>
/a*?b*?/
ab
0: ab
1: a
2:
/(*NOTEMPTY)a*?b*?/
ab
0: ab
1: a
ba
0: b
cb
0: b
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
ab
0: ab
0+
1: a
cdab
0:
0+ dab
# End of testinput6 # End of testinput6