Implement (*NOTEMPTY) and (?(VERSION= features.
This commit is contained in:
parent
ac70cacd29
commit
7858fa702d
|
@ -130,8 +130,8 @@ functions, so take care not to define synonyms by mistake. */
|
|||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
|
||||
|
|
|
@ -566,7 +566,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
|||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78 };
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79 };
|
||||
|
||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||
|
@ -574,6 +574,7 @@ compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is
|
|||
generic and always supported. */
|
||||
|
||||
enum { PSO_OPT, /* Value is an option bit */
|
||||
PSO_FLG, /* Value is a flag bit */
|
||||
PSO_NL, /* Value is a newline type */
|
||||
PSO_BSR, /* Value is a \R type */
|
||||
PSO_LIMM, /* Read integer value for match limit */
|
||||
|
@ -592,6 +593,8 @@ static pso pso_list[] = {
|
|||
{ (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF },
|
||||
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
|
||||
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
|
||||
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
|
||||
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
|
||||
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
|
||||
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
||||
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
||||
|
@ -854,7 +857,8 @@ for (;;)
|
|||
case OP_CLOSE:
|
||||
case OP_COMMIT:
|
||||
case OP_CREF:
|
||||
case OP_DEF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
case OP_DNCREF:
|
||||
case OP_DNRREF:
|
||||
case OP_DOLL:
|
||||
|
@ -1118,7 +1122,8 @@ for (;;)
|
|||
case OP_DNCREF:
|
||||
case OP_RREF:
|
||||
case OP_DNRREF:
|
||||
case OP_DEF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
code += PRIV(OP_lengths)[*code];
|
||||
break;
|
||||
|
||||
|
@ -4449,10 +4454,12 @@ for (;; ptr++)
|
|||
PCRE2_UCHAR *bralink = NULL;
|
||||
PCRE2_UCHAR *brazeroptr = NULL;
|
||||
|
||||
/* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
|
||||
we just ignore the repeat. */
|
||||
/* Repeating a DEFINE group (or any group where the condition is always
|
||||
FALSE and there is only one branch) is pointless, but Perl allows the
|
||||
syntax, so we just ignore the repeat. */
|
||||
|
||||
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
|
||||
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
|
||||
previous[GET(previous, 1)] != OP_ALT)
|
||||
goto END_REPEAT;
|
||||
|
||||
/* There is no sense in actually repeating assertions. The only potential
|
||||
|
@ -5159,10 +5166,66 @@ for (;; ptr++)
|
|||
namelen = -1; /* => not a name; must set to avoid warning */
|
||||
name = NULL; /* Always set to avoid warning */
|
||||
recno = 0; /* Always set to avoid warning */
|
||||
|
||||
/* Point at character after (?( */
|
||||
|
||||
ptr++;
|
||||
|
||||
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
|
||||
users of PCRE2 via an application can discover which release of PCRE2
|
||||
is being used. */
|
||||
|
||||
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
|
||||
ptr[7] != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
BOOL ge = FALSE;
|
||||
int major = 0;
|
||||
int minor = 0;
|
||||
|
||||
ptr += 7;
|
||||
if (*ptr == CHAR_GREATER_THAN_SIGN)
|
||||
{
|
||||
ge = TRUE;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
|
||||
references its argument twice. */
|
||||
|
||||
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
|
||||
{
|
||||
*errorcodeptr = ERR79;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
|
||||
if (*ptr == CHAR_DOT)
|
||||
{
|
||||
ptr++;
|
||||
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
|
||||
}
|
||||
|
||||
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
*errorcodeptr = ERR79;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
if (ge)
|
||||
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
|
||||
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
|
||||
OP_TRUE : OP_FALSE;
|
||||
else
|
||||
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
|
||||
OP_TRUE : OP_FALSE;
|
||||
|
||||
ptr++;
|
||||
skipbytes = 1;
|
||||
break; /* End of condition processing */
|
||||
}
|
||||
|
||||
/* Check for a test for recursion in a named group. */
|
||||
|
||||
ptr++;
|
||||
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
|
||||
{
|
||||
terminator = -1;
|
||||
|
@ -5338,11 +5401,13 @@ for (;; ptr++)
|
|||
}
|
||||
|
||||
/* Similarly, check for the (?(DEFINE) "condition", which is always
|
||||
false. */
|
||||
false. During compilation we set OP_DEFINE to distinguish this from
|
||||
other OP_FALSE conditions so that it can be checked for having only one
|
||||
branch, but after that the opcode is changed to OP_FALSE. */
|
||||
|
||||
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
|
||||
{
|
||||
code[1+LINK_SIZE] = OP_DEF;
|
||||
code[1+LINK_SIZE] = OP_DEFINE;
|
||||
skipbytes = 1;
|
||||
}
|
||||
|
||||
|
@ -6065,16 +6130,18 @@ for (;; ptr++)
|
|||
while (*tc != OP_KET);
|
||||
|
||||
/* A DEFINE group is never obeyed inline (the "condition" is always
|
||||
false). It must have only one branch. */
|
||||
false). It must have only one branch. Having checked this, change the
|
||||
opcode to OP_FALSE. */
|
||||
|
||||
if (code[LINK_SIZE+1] == OP_DEF)
|
||||
if (code[LINK_SIZE+1] == OP_DEFINE)
|
||||
{
|
||||
if (condcount > 1)
|
||||
{
|
||||
*errorcodeptr = ERR54;
|
||||
goto FAILED;
|
||||
}
|
||||
bravalue = OP_DEF; /* Just a flag to suppress char handling below */
|
||||
code[LINK_SIZE+1] = OP_FALSE;
|
||||
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
|
||||
}
|
||||
|
||||
/* A "normal" conditional group. If there is just one branch, we must not
|
||||
|
@ -6127,7 +6194,7 @@ for (;; ptr++)
|
|||
/* For a DEFINE group, required and first character settings are not
|
||||
relevant. */
|
||||
|
||||
if (bravalue == OP_DEF) break;
|
||||
if (bravalue == OP_DEFINE) break;
|
||||
|
||||
/* Handle updating of the required and first characters for other types of
|
||||
group. Update for normal brackets of all kinds, and conditions with two
|
||||
|
@ -7011,7 +7078,8 @@ do {
|
|||
case OP_DNCREF:
|
||||
case OP_RREF:
|
||||
case OP_DNRREF:
|
||||
case OP_DEF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
return FALSE;
|
||||
|
||||
default: /* Assertion */
|
||||
|
@ -7413,6 +7481,10 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
|
|||
case PSO_OPT:
|
||||
cb.external_options |= p->value;
|
||||
break;
|
||||
|
||||
case PSO_FLG:
|
||||
setflags |= p->value;
|
||||
break;
|
||||
|
||||
case PSO_NL:
|
||||
newline = p->value;
|
||||
|
|
|
@ -177,12 +177,12 @@ static const uint8_t coptable[] = {
|
|||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||
0, 0, /* CREF, DNCREF */
|
||||
0, 0, /* RREF, DNRREF */
|
||||
0, /* DEF */
|
||||
0, 0, /* FALSE, TRUE */
|
||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0 /* CLOSE, SKIPZERO */
|
||||
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
|
||||
};
|
||||
|
||||
/* This table identifies those opcodes that inspect a character. It is used to
|
||||
|
@ -249,12 +249,12 @@ static const uint8_t poptable[] = {
|
|||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||
0, 0, /* CREF, DNCREF */
|
||||
0, 0, /* RREF, DNRREF */
|
||||
0, /* DEF */
|
||||
0, 0, /* FALSE, TRUE */
|
||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0 /* CLOSE, SKIPZERO */
|
||||
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
|
||||
};
|
||||
|
||||
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||
|
@ -2642,8 +2642,13 @@ for (;;)
|
|||
|
||||
/* The DEFINE condition is always false */
|
||||
|
||||
if (condcode == OP_DEF)
|
||||
if (condcode == OP_FALSE)
|
||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
|
||||
/* There is also an always-true condition */
|
||||
|
||||
if (condcode == OP_TRUE)
|
||||
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
|
||||
|
||||
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
||||
which means "test if in any recursion". We can't test for specifically
|
||||
|
@ -3115,6 +3120,24 @@ if (re->magic_number != MAGIC_NUMBER)
|
|||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||
return PCRE2_ERROR_BADMODE;
|
||||
|
||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||
options variable for this function. Users of PCRE2 who are not calling the
|
||||
function directly would like to have a way of setting these flags, in the same
|
||||
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||
transferred to the options for this function. The bits are guaranteed to be
|
||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||
that the match-time bits are not more significant than the flag bits. If by
|
||||
accident this is not the case, a compile-time division by zero error will
|
||||
occur. */
|
||||
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||
#undef FF
|
||||
#undef OO
|
||||
|
||||
/* A NULL match context means "use a default context" */
|
||||
|
||||
if (mcontext == NULL)
|
||||
|
|
|
@ -157,7 +157,8 @@ static const char compile_error_texts[] =
|
|||
"using UCP is disabled by the application\0"
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
"syntax error in (?(VERSION condition\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
|
|
@ -223,10 +223,10 @@ else
|
|||
#endif /* not HAVE_MEMMOVE */
|
||||
#endif /* not VPCOMPAT */
|
||||
|
||||
/* External (in the C sense) functions and tables that are private to the
|
||||
/* External (in the C sense) functions and tables that are private to the
|
||||
libraries are always referenced using the PRIV macro. This makes it possible
|
||||
for pcre2test.c to include some of the source files from the libraries using a
|
||||
different PRIV definition to avoid name clashes. It also makes it clear in the
|
||||
different PRIV definition to avoid name clashes. It also makes it clear in the
|
||||
code that a non-static object is being referenced. */
|
||||
|
||||
#ifndef PRIV
|
||||
|
@ -387,10 +387,10 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
|||
|
||||
#ifndef EBCDIC
|
||||
|
||||
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
||||
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
||||
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
||||
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
||||
space. However, in many other sources it is listed as a space and has been in
|
||||
PCRE for a long time. */
|
||||
PCRE for a long time. */
|
||||
|
||||
#define HSPACE_LIST \
|
||||
CHAR_HT, CHAR_SPACE, 0xa0, \
|
||||
|
@ -532,6 +532,8 @@ bytes in a code unit in that mode. */
|
|||
#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */
|
||||
#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */
|
||||
#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */
|
||||
#define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */
|
||||
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
|
||||
|
||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||
|
||||
|
@ -895,25 +897,28 @@ a positive value. */
|
|||
#define STRING_xdigit "xdigit"
|
||||
|
||||
#define STRING_DEFINE "DEFINE"
|
||||
#define STRING_VERSION "VERSION"
|
||||
#define STRING_WEIRD_STARTWORD "[:<:]]"
|
||||
#define STRING_WEIRD_ENDWORD "[:>:]]"
|
||||
|
||||
#define STRING_CR_RIGHTPAR "CR)"
|
||||
#define STRING_LF_RIGHTPAR "LF)"
|
||||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
#define STRING_CR_RIGHTPAR "CR)"
|
||||
#define STRING_LF_RIGHTPAR "LF)"
|
||||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
|
||||
#else /* SUPPORT_UTF */
|
||||
|
||||
|
@ -1161,25 +1166,28 @@ only. */
|
|||
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
|
||||
|
||||
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
|
||||
#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
|
||||
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
|
||||
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
|
@ -1517,39 +1525,47 @@ enum {
|
|||
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
|
||||
OP_RREF, /* 143 Used to hold a recursion number as condition */
|
||||
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
|
||||
OP_DEF, /* 145 The DEFINE condition */
|
||||
OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
|
||||
OP_TRUE, /* 146 Always true (used by VERSION) */
|
||||
|
||||
OP_BRAZERO, /* 146 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 147 order. */
|
||||
OP_BRAPOSZERO, /* 148 */
|
||||
OP_BRAZERO, /* 147 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 148 order. */
|
||||
OP_BRAPOSZERO, /* 149 */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_MARK, /* 149 always has an argument */
|
||||
OP_PRUNE, /* 150 */
|
||||
OP_PRUNE_ARG, /* 151 same, but with argument */
|
||||
OP_SKIP, /* 152 */
|
||||
OP_SKIP_ARG, /* 153 same, but with argument */
|
||||
OP_THEN, /* 154 */
|
||||
OP_THEN_ARG, /* 155 same, but with argument */
|
||||
OP_COMMIT, /* 156 */
|
||||
OP_MARK, /* 150 always has an argument */
|
||||
OP_PRUNE, /* 151 */
|
||||
OP_PRUNE_ARG, /* 152 same, but with argument */
|
||||
OP_SKIP, /* 153 */
|
||||
OP_SKIP_ARG, /* 154 same, but with argument */
|
||||
OP_THEN, /* 155 */
|
||||
OP_THEN_ARG, /* 156 same, but with argument */
|
||||
OP_COMMIT, /* 157 */
|
||||
|
||||
/* These are forced failure and success verbs */
|
||||
|
||||
OP_FAIL, /* 157 */
|
||||
OP_ACCEPT, /* 158 */
|
||||
OP_ASSERT_ACCEPT, /* 159 Used inside assertions */
|
||||
OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */
|
||||
OP_FAIL, /* 158 */
|
||||
OP_ACCEPT, /* 159 */
|
||||
OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
|
||||
OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO, /* 161 */
|
||||
OP_SKIPZERO, /* 162 */
|
||||
|
||||
/* This is used to identify a DEFINE group during compilation so that it can
|
||||
be checked for having only one branch. It is changed to OP_FALSE before
|
||||
compilation finishes. */
|
||||
|
||||
OP_DEFINE, /* 163 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
are the correct length, in order to catch updating errors - there have been
|
||||
some in the past. */
|
||||
|
||||
OP_TABLE_LENGTH
|
||||
|
||||
};
|
||||
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
|
@ -1594,12 +1610,13 @@ some cases doesn't actually use these names at all). */
|
|||
"Cond", \
|
||||
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
||||
"SCond", \
|
||||
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \
|
||||
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \
|
||||
"Cond false", "Cond true", \
|
||||
"Brazero", "Braminzero", "Braposzero", \
|
||||
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
||||
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
|
||||
"*ACCEPT", "*ASSERT_ACCEPT", \
|
||||
"Close", "Skip zero"
|
||||
"Close", "Skip zero", "Define"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
|
@ -1684,14 +1701,15 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||
1+LINK_SIZE, /* SCOND */ \
|
||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
|
||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
|
||||
1, /* DEF */ \
|
||||
1, 1, /* FALSE, TRUE */ \
|
||||
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
||||
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
||||
1, 3, /* SKIP, SKIP_ARG */ \
|
||||
1, 3, /* THEN, THEN_ARG */ \
|
||||
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
|
||||
|
||||
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
||||
1 /* DEFINE */
|
||||
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
|
||||
#define RREF_ANY 0xffff
|
||||
|
@ -1757,7 +1775,7 @@ typedef struct {
|
|||
|
||||
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
||||
|
||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
|
||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
|
||||
defined, so the following items are omitted. */
|
||||
|
||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
||||
|
@ -1776,11 +1794,11 @@ However, UTF-8 tables are needed only when compiling the 8-bit library. */
|
|||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
extern const int PRIV(utf8_table1)[];
|
||||
extern const int PRIV(utf8_table1_size);
|
||||
extern const int PRIV(utf8_table1_size);
|
||||
extern const int PRIV(utf8_table2)[];
|
||||
extern const int PRIV(utf8_table3)[];
|
||||
extern const uint8_t PRIV(utf8_table4)[];
|
||||
#endif
|
||||
extern const uint8_t PRIV(utf8_table4)[];
|
||||
#endif
|
||||
|
||||
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
|
||||
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
|
||||
|
@ -1857,7 +1875,7 @@ is available. */
|
|||
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_block *);
|
||||
extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
|
||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||
BOOL);
|
||||
extern size_t _pcre2_jit_get_size(void *);
|
||||
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
|
||||
|
@ -1870,7 +1888,7 @@ extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
|||
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||
extern int _pcre2_study(pcre2_real_code *);
|
||||
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||
BOOL);
|
||||
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||
|
|
|
@ -1363,8 +1363,12 @@ for (;;)
|
|||
}
|
||||
break;
|
||||
|
||||
case OP_DEF: /* DEFINE - always false */
|
||||
case OP_FALSE:
|
||||
break;
|
||||
|
||||
case OP_TRUE:
|
||||
condition = TRUE;
|
||||
break;
|
||||
|
||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
|
||||
|
@ -6362,6 +6366,24 @@ if (re->magic_number != MAGIC_NUMBER)
|
|||
|
||||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||
return PCRE2_ERROR_BADMODE;
|
||||
|
||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||
options variable for this function. Users of PCRE2 who are not calling the
|
||||
function directly would like to have a way of setting these flags, in the same
|
||||
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||
transferred to the options for this function. The bits are guaranteed to be
|
||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||
that the match-time bits are not more significant than the flag bits. If by
|
||||
accident this is not the case, a compile-time division by zero error will
|
||||
occur. */
|
||||
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||
#undef FF
|
||||
#undef OO
|
||||
|
||||
/* A NULL match context means "use a default context" */
|
||||
|
||||
|
|
|
@ -422,8 +422,12 @@ for(;;)
|
|||
}
|
||||
break;
|
||||
|
||||
case OP_DEF:
|
||||
fprintf(f, " Cond def");
|
||||
case OP_FALSE:
|
||||
fprintf(f, " Cond false");
|
||||
break;
|
||||
|
||||
case OP_TRUE:
|
||||
fprintf(f, " Cond true");
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
|
|
|
@ -181,7 +181,8 @@ for (;;)
|
|||
case OP_DNCREF:
|
||||
case OP_RREF:
|
||||
case OP_DNRREF:
|
||||
case OP_DEF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
case OP_CALLOUT:
|
||||
case OP_SOD:
|
||||
case OP_SOM:
|
||||
|
@ -792,7 +793,8 @@ do
|
|||
case OP_COMMIT:
|
||||
case OP_COND:
|
||||
case OP_CREF:
|
||||
case OP_DEF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
case OP_DNCREF:
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
|
|
|
@ -4070,4 +4070,40 @@ a random value. /Ix
|
|||
/abc(?=abcde)(?=ab)/allusedtext
|
||||
abcabcdefg
|
||||
|
||||
/a*?b*?/
|
||||
ab
|
||||
|
||||
/(*NOTEMPTY)a*?b*?/
|
||||
ab
|
||||
ba
|
||||
cb
|
||||
|
||||
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
|
||||
ab
|
||||
cdab
|
||||
|
||||
/(?(VERSION>=10.0)yes|no)/I
|
||||
yesno
|
||||
|
||||
/(?(VERSION=8)yes){3}/BI,aftertext
|
||||
yesno
|
||||
|
||||
/(?(VERSION=8)yes|no){3}/I
|
||||
yesnononoyes
|
||||
** Failers
|
||||
yesno
|
||||
|
||||
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
|
||||
abcyes
|
||||
xyzno
|
||||
** Failers
|
||||
abcno
|
||||
xyzyes
|
||||
|
||||
/(?(VERSION<10)yes|no)/
|
||||
|
||||
/(?(VERSION>10)yes|no)/
|
||||
|
||||
/(?(VERSION>=10.0.0)yes|no)/
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -4798,4 +4798,16 @@
|
|||
/abc(?=abcde)(?=ab)/allusedtext
|
||||
abcabcdefg
|
||||
|
||||
/a*?b*?/
|
||||
ab
|
||||
|
||||
/(*NOTEMPTY)a*?b*?/
|
||||
ab
|
||||
ba
|
||||
cb
|
||||
|
||||
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
|
||||
ab
|
||||
cdab
|
||||
|
||||
# End of testinput6
|
||||
|
|
|
@ -9357,7 +9357,7 @@ Partial match at offset 3: +ab
|
|||
Recurse
|
||||
Recurse
|
||||
Cond
|
||||
Cond def
|
||||
Cond false
|
||||
CBra 1
|
||||
<
|
||||
[^m]
|
||||
|
@ -9379,7 +9379,7 @@ Partial match at offset 3: +ab
|
|||
Recurse
|
||||
Recurse
|
||||
Cond
|
||||
Cond def
|
||||
Cond false
|
||||
CBra 1
|
||||
<
|
||||
[\x00-/:-\xff] (neg)
|
||||
|
@ -10095,7 +10095,7 @@ No match
|
|||
Recurse
|
||||
KetRpos
|
||||
Cond
|
||||
Cond def
|
||||
Cond false
|
||||
CBra 1
|
||||
Any
|
||||
Ket
|
||||
|
@ -10114,7 +10114,7 @@ No match
|
|||
Recurse
|
||||
KetRmax
|
||||
Cond
|
||||
Cond def
|
||||
Cond false
|
||||
CBra 1
|
||||
Any
|
||||
Ket
|
||||
|
@ -11058,7 +11058,7 @@ Matched, but too many substrings
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
Cond
|
||||
Cond def
|
||||
Cond false
|
||||
CBra 1
|
||||
a
|
||||
Ket
|
||||
|
@ -13720,4 +13720,84 @@ No match
|
|||
0: abcabcde
|
||||
>>>>>
|
||||
|
||||
/a*?b*?/
|
||||
ab
|
||||
0:
|
||||
|
||||
/(*NOTEMPTY)a*?b*?/
|
||||
ab
|
||||
0: a
|
||||
ba
|
||||
0: b
|
||||
cb
|
||||
0: b
|
||||
|
||||
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
|
||||
ab
|
||||
0: a
|
||||
0+ b
|
||||
cdab
|
||||
0:
|
||||
0+ dab
|
||||
|
||||
/(?(VERSION>=10.0)yes|no)/I
|
||||
Capturing subpattern count = 0
|
||||
Subject length lower bound = 2
|
||||
yesno
|
||||
0: yes
|
||||
|
||||
/(?(VERSION=8)yes){3}/BI,aftertext
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Cond
|
||||
Cond false
|
||||
yes
|
||||
Ket
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
May match empty string
|
||||
Subject length lower bound = 0
|
||||
yesno
|
||||
0:
|
||||
0+ yesno
|
||||
|
||||
/(?(VERSION=8)yes|no){3}/I
|
||||
Capturing subpattern count = 0
|
||||
Subject length lower bound = 6
|
||||
yesnononoyes
|
||||
0: nonono
|
||||
** Failers
|
||||
No match
|
||||
yesno
|
||||
No match
|
||||
|
||||
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
|
||||
Capturing subpattern count = 1
|
||||
Named capturing subpatterns:
|
||||
VERSION 1
|
||||
Starting code units: a x
|
||||
Subject length lower bound = 5
|
||||
abcyes
|
||||
0: abcyes
|
||||
1: abc
|
||||
xyzno
|
||||
0: xyzno
|
||||
** Failers
|
||||
No match
|
||||
abcno
|
||||
No match
|
||||
xyzyes
|
||||
No match
|
||||
|
||||
/(?(VERSION<10)yes|no)/
|
||||
Failed: error 179 at offset 10: syntax error in (?(VERSION condition
|
||||
|
||||
/(?(VERSION>10)yes|no)/
|
||||
Failed: error 179 at offset 11: syntax error in (?(VERSION condition
|
||||
|
||||
/(?(VERSION>=10.0.0)yes|no)/
|
||||
Failed: error 179 at offset 16: syntax error in (?(VERSION condition
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -7689,4 +7689,28 @@ Matched, but offsets vector is too small to show all matches
|
|||
0: abcabcde
|
||||
>>>>>
|
||||
|
||||
/a*?b*?/
|
||||
ab
|
||||
0: ab
|
||||
1: a
|
||||
2:
|
||||
|
||||
/(*NOTEMPTY)a*?b*?/
|
||||
ab
|
||||
0: ab
|
||||
1: a
|
||||
ba
|
||||
0: b
|
||||
cb
|
||||
0: b
|
||||
|
||||
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
|
||||
ab
|
||||
0: ab
|
||||
0+
|
||||
1: a
|
||||
cdab
|
||||
0:
|
||||
0+ dab
|
||||
|
||||
# End of testinput6
|
||||
|
|
Loading…
Reference in New Issue