Implement (*NOTEMPTY) and (?(VERSION= features.
This commit is contained in:
parent
ac70cacd29
commit
7858fa702d
|
@ -130,8 +130,8 @@ functions, so take care not to define synonyms by mistake. */
|
||||||
|
|
||||||
#define PCRE2_NOTBOL 0x00000001u
|
#define PCRE2_NOTBOL 0x00000001u
|
||||||
#define PCRE2_NOTEOL 0x00000002u
|
#define PCRE2_NOTEOL 0x00000002u
|
||||||
#define PCRE2_NOTEMPTY 0x00000004u
|
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u
|
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||||
|
|
||||||
|
|
|
@ -566,7 +566,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
||||||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78 };
|
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79 };
|
||||||
|
|
||||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||||
|
@ -574,6 +574,7 @@ compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is
|
||||||
generic and always supported. */
|
generic and always supported. */
|
||||||
|
|
||||||
enum { PSO_OPT, /* Value is an option bit */
|
enum { PSO_OPT, /* Value is an option bit */
|
||||||
|
PSO_FLG, /* Value is a flag bit */
|
||||||
PSO_NL, /* Value is a newline type */
|
PSO_NL, /* Value is a newline type */
|
||||||
PSO_BSR, /* Value is a \R type */
|
PSO_BSR, /* Value is a \R type */
|
||||||
PSO_LIMM, /* Read integer value for match limit */
|
PSO_LIMM, /* Read integer value for match limit */
|
||||||
|
@ -592,6 +593,8 @@ static pso pso_list[] = {
|
||||||
{ (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF },
|
{ (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF },
|
||||||
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
|
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
|
||||||
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
|
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
|
||||||
|
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
|
||||||
|
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
|
||||||
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
|
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
|
||||||
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
||||||
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
||||||
|
@ -854,7 +857,8 @@ for (;;)
|
||||||
case OP_CLOSE:
|
case OP_CLOSE:
|
||||||
case OP_COMMIT:
|
case OP_COMMIT:
|
||||||
case OP_CREF:
|
case OP_CREF:
|
||||||
case OP_DEF:
|
case OP_FALSE:
|
||||||
|
case OP_TRUE:
|
||||||
case OP_DNCREF:
|
case OP_DNCREF:
|
||||||
case OP_DNRREF:
|
case OP_DNRREF:
|
||||||
case OP_DOLL:
|
case OP_DOLL:
|
||||||
|
@ -1118,7 +1122,8 @@ for (;;)
|
||||||
case OP_DNCREF:
|
case OP_DNCREF:
|
||||||
case OP_RREF:
|
case OP_RREF:
|
||||||
case OP_DNRREF:
|
case OP_DNRREF:
|
||||||
case OP_DEF:
|
case OP_FALSE:
|
||||||
|
case OP_TRUE:
|
||||||
code += PRIV(OP_lengths)[*code];
|
code += PRIV(OP_lengths)[*code];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -4449,10 +4454,12 @@ for (;; ptr++)
|
||||||
PCRE2_UCHAR *bralink = NULL;
|
PCRE2_UCHAR *bralink = NULL;
|
||||||
PCRE2_UCHAR *brazeroptr = NULL;
|
PCRE2_UCHAR *brazeroptr = NULL;
|
||||||
|
|
||||||
/* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
|
/* Repeating a DEFINE group (or any group where the condition is always
|
||||||
we just ignore the repeat. */
|
FALSE and there is only one branch) is pointless, but Perl allows the
|
||||||
|
syntax, so we just ignore the repeat. */
|
||||||
|
|
||||||
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
|
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
|
||||||
|
previous[GET(previous, 1)] != OP_ALT)
|
||||||
goto END_REPEAT;
|
goto END_REPEAT;
|
||||||
|
|
||||||
/* There is no sense in actually repeating assertions. The only potential
|
/* There is no sense in actually repeating assertions. The only potential
|
||||||
|
@ -5159,10 +5166,66 @@ for (;; ptr++)
|
||||||
namelen = -1; /* => not a name; must set to avoid warning */
|
namelen = -1; /* => not a name; must set to avoid warning */
|
||||||
name = NULL; /* Always set to avoid warning */
|
name = NULL; /* Always set to avoid warning */
|
||||||
recno = 0; /* Always set to avoid warning */
|
recno = 0; /* Always set to avoid warning */
|
||||||
|
|
||||||
|
/* Point at character after (?( */
|
||||||
|
|
||||||
|
ptr++;
|
||||||
|
|
||||||
|
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
|
||||||
|
users of PCRE2 via an application can discover which release of PCRE2
|
||||||
|
is being used. */
|
||||||
|
|
||||||
|
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
|
||||||
|
ptr[7] != CHAR_RIGHT_PARENTHESIS)
|
||||||
|
{
|
||||||
|
BOOL ge = FALSE;
|
||||||
|
int major = 0;
|
||||||
|
int minor = 0;
|
||||||
|
|
||||||
|
ptr += 7;
|
||||||
|
if (*ptr == CHAR_GREATER_THAN_SIGN)
|
||||||
|
{
|
||||||
|
ge = TRUE;
|
||||||
|
ptr++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
|
||||||
|
references its argument twice. */
|
||||||
|
|
||||||
|
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR79;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
|
||||||
|
if (*ptr == CHAR_DOT)
|
||||||
|
{
|
||||||
|
ptr++;
|
||||||
|
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR79;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ge)
|
||||||
|
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
|
||||||
|
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
|
||||||
|
OP_TRUE : OP_FALSE;
|
||||||
|
else
|
||||||
|
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
|
||||||
|
OP_TRUE : OP_FALSE;
|
||||||
|
|
||||||
|
ptr++;
|
||||||
|
skipbytes = 1;
|
||||||
|
break; /* End of condition processing */
|
||||||
|
}
|
||||||
|
|
||||||
/* Check for a test for recursion in a named group. */
|
/* Check for a test for recursion in a named group. */
|
||||||
|
|
||||||
ptr++;
|
|
||||||
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
|
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
|
||||||
{
|
{
|
||||||
terminator = -1;
|
terminator = -1;
|
||||||
|
@ -5338,11 +5401,13 @@ for (;; ptr++)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Similarly, check for the (?(DEFINE) "condition", which is always
|
/* Similarly, check for the (?(DEFINE) "condition", which is always
|
||||||
false. */
|
false. During compilation we set OP_DEFINE to distinguish this from
|
||||||
|
other OP_FALSE conditions so that it can be checked for having only one
|
||||||
|
branch, but after that the opcode is changed to OP_FALSE. */
|
||||||
|
|
||||||
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
|
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
|
||||||
{
|
{
|
||||||
code[1+LINK_SIZE] = OP_DEF;
|
code[1+LINK_SIZE] = OP_DEFINE;
|
||||||
skipbytes = 1;
|
skipbytes = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6065,16 +6130,18 @@ for (;; ptr++)
|
||||||
while (*tc != OP_KET);
|
while (*tc != OP_KET);
|
||||||
|
|
||||||
/* A DEFINE group is never obeyed inline (the "condition" is always
|
/* A DEFINE group is never obeyed inline (the "condition" is always
|
||||||
false). It must have only one branch. */
|
false). It must have only one branch. Having checked this, change the
|
||||||
|
opcode to OP_FALSE. */
|
||||||
|
|
||||||
if (code[LINK_SIZE+1] == OP_DEF)
|
if (code[LINK_SIZE+1] == OP_DEFINE)
|
||||||
{
|
{
|
||||||
if (condcount > 1)
|
if (condcount > 1)
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR54;
|
*errorcodeptr = ERR54;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
bravalue = OP_DEF; /* Just a flag to suppress char handling below */
|
code[LINK_SIZE+1] = OP_FALSE;
|
||||||
|
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* A "normal" conditional group. If there is just one branch, we must not
|
/* A "normal" conditional group. If there is just one branch, we must not
|
||||||
|
@ -6127,7 +6194,7 @@ for (;; ptr++)
|
||||||
/* For a DEFINE group, required and first character settings are not
|
/* For a DEFINE group, required and first character settings are not
|
||||||
relevant. */
|
relevant. */
|
||||||
|
|
||||||
if (bravalue == OP_DEF) break;
|
if (bravalue == OP_DEFINE) break;
|
||||||
|
|
||||||
/* Handle updating of the required and first characters for other types of
|
/* Handle updating of the required and first characters for other types of
|
||||||
group. Update for normal brackets of all kinds, and conditions with two
|
group. Update for normal brackets of all kinds, and conditions with two
|
||||||
|
@ -7011,7 +7078,8 @@ do {
|
||||||
case OP_DNCREF:
|
case OP_DNCREF:
|
||||||
case OP_RREF:
|
case OP_RREF:
|
||||||
case OP_DNRREF:
|
case OP_DNRREF:
|
||||||
case OP_DEF:
|
case OP_FALSE:
|
||||||
|
case OP_TRUE:
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
default: /* Assertion */
|
default: /* Assertion */
|
||||||
|
@ -7413,6 +7481,10 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
|
||||||
case PSO_OPT:
|
case PSO_OPT:
|
||||||
cb.external_options |= p->value;
|
cb.external_options |= p->value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PSO_FLG:
|
||||||
|
setflags |= p->value;
|
||||||
|
break;
|
||||||
|
|
||||||
case PSO_NL:
|
case PSO_NL:
|
||||||
newline = p->value;
|
newline = p->value;
|
||||||
|
|
|
@ -177,12 +177,12 @@ static const uint8_t coptable[] = {
|
||||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||||
0, 0, /* CREF, DNCREF */
|
0, 0, /* CREF, DNCREF */
|
||||||
0, 0, /* RREF, DNRREF */
|
0, 0, /* RREF, DNRREF */
|
||||||
0, /* DEF */
|
0, 0, /* FALSE, TRUE */
|
||||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||||
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||||
0, 0 /* CLOSE, SKIPZERO */
|
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* This table identifies those opcodes that inspect a character. It is used to
|
/* This table identifies those opcodes that inspect a character. It is used to
|
||||||
|
@ -249,12 +249,12 @@ static const uint8_t poptable[] = {
|
||||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||||
0, 0, /* CREF, DNCREF */
|
0, 0, /* CREF, DNCREF */
|
||||||
0, 0, /* RREF, DNRREF */
|
0, 0, /* RREF, DNRREF */
|
||||||
0, /* DEF */
|
0, 0, /* FALSE, TRUE */
|
||||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||||
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||||
0, 0 /* CLOSE, SKIPZERO */
|
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||||
|
@ -2642,8 +2642,13 @@ for (;;)
|
||||||
|
|
||||||
/* The DEFINE condition is always false */
|
/* The DEFINE condition is always false */
|
||||||
|
|
||||||
if (condcode == OP_DEF)
|
if (condcode == OP_FALSE)
|
||||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||||
|
|
||||||
|
/* There is also an always-true condition */
|
||||||
|
|
||||||
|
if (condcode == OP_TRUE)
|
||||||
|
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
|
||||||
|
|
||||||
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
||||||
which means "test if in any recursion". We can't test for specifically
|
which means "test if in any recursion". We can't test for specifically
|
||||||
|
@ -3115,6 +3120,24 @@ if (re->magic_number != MAGIC_NUMBER)
|
||||||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||||
return PCRE2_ERROR_BADMODE;
|
return PCRE2_ERROR_BADMODE;
|
||||||
|
|
||||||
|
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||||
|
options variable for this function. Users of PCRE2 who are not calling the
|
||||||
|
function directly would like to have a way of setting these flags, in the same
|
||||||
|
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
||||||
|
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||||
|
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||||
|
transferred to the options for this function. The bits are guaranteed to be
|
||||||
|
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||||
|
that the match-time bits are not more significant than the flag bits. If by
|
||||||
|
accident this is not the case, a compile-time division by zero error will
|
||||||
|
occur. */
|
||||||
|
|
||||||
|
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||||
|
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||||
|
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||||
|
#undef FF
|
||||||
|
#undef OO
|
||||||
|
|
||||||
/* A NULL match context means "use a default context" */
|
/* A NULL match context means "use a default context" */
|
||||||
|
|
||||||
if (mcontext == NULL)
|
if (mcontext == NULL)
|
||||||
|
|
|
@ -157,7 +157,8 @@ static const char compile_error_texts[] =
|
||||||
"using UCP is disabled by the application\0"
|
"using UCP is disabled by the application\0"
|
||||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||||
"character code point value in \\u.... sequence is too large\0"
|
"character code point value in \\u.... sequence is too large\0"
|
||||||
"digits missing in \\x{} or \\o{}\0"
|
"digits missing in \\x{} or \\o{}\0"
|
||||||
|
"syntax error in (?(VERSION condition\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Match-time and UTF error texts are in the same format. */
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
|
|
@ -223,10 +223,10 @@ else
|
||||||
#endif /* not HAVE_MEMMOVE */
|
#endif /* not HAVE_MEMMOVE */
|
||||||
#endif /* not VPCOMPAT */
|
#endif /* not VPCOMPAT */
|
||||||
|
|
||||||
/* External (in the C sense) functions and tables that are private to the
|
/* External (in the C sense) functions and tables that are private to the
|
||||||
libraries are always referenced using the PRIV macro. This makes it possible
|
libraries are always referenced using the PRIV macro. This makes it possible
|
||||||
for pcre2test.c to include some of the source files from the libraries using a
|
for pcre2test.c to include some of the source files from the libraries using a
|
||||||
different PRIV definition to avoid name clashes. It also makes it clear in the
|
different PRIV definition to avoid name clashes. It also makes it clear in the
|
||||||
code that a non-static object is being referenced. */
|
code that a non-static object is being referenced. */
|
||||||
|
|
||||||
#ifndef PRIV
|
#ifndef PRIV
|
||||||
|
@ -387,10 +387,10 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
||||||
|
|
||||||
#ifndef EBCDIC
|
#ifndef EBCDIC
|
||||||
|
|
||||||
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
||||||
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
||||||
space. However, in many other sources it is listed as a space and has been in
|
space. However, in many other sources it is listed as a space and has been in
|
||||||
PCRE for a long time. */
|
PCRE for a long time. */
|
||||||
|
|
||||||
#define HSPACE_LIST \
|
#define HSPACE_LIST \
|
||||||
CHAR_HT, CHAR_SPACE, 0xa0, \
|
CHAR_HT, CHAR_SPACE, 0xa0, \
|
||||||
|
@ -532,6 +532,8 @@ bytes in a code unit in that mode. */
|
||||||
#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */
|
#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */
|
||||||
#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */
|
#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */
|
||||||
#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */
|
#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */
|
||||||
|
#define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */
|
||||||
|
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
|
||||||
|
|
||||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||||
|
|
||||||
|
@ -895,25 +897,28 @@ a positive value. */
|
||||||
#define STRING_xdigit "xdigit"
|
#define STRING_xdigit "xdigit"
|
||||||
|
|
||||||
#define STRING_DEFINE "DEFINE"
|
#define STRING_DEFINE "DEFINE"
|
||||||
|
#define STRING_VERSION "VERSION"
|
||||||
#define STRING_WEIRD_STARTWORD "[:<:]]"
|
#define STRING_WEIRD_STARTWORD "[:<:]]"
|
||||||
#define STRING_WEIRD_ENDWORD "[:>:]]"
|
#define STRING_WEIRD_ENDWORD "[:>:]]"
|
||||||
|
|
||||||
#define STRING_CR_RIGHTPAR "CR)"
|
#define STRING_CR_RIGHTPAR "CR)"
|
||||||
#define STRING_LF_RIGHTPAR "LF)"
|
#define STRING_LF_RIGHTPAR "LF)"
|
||||||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||||
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||||
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||||
#define STRING_UTF_RIGHTPAR "UTF)"
|
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||||
#define STRING_UCP_RIGHTPAR "UCP)"
|
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
||||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
||||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||||
|
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||||
|
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||||
|
|
||||||
#else /* SUPPORT_UTF */
|
#else /* SUPPORT_UTF */
|
||||||
|
|
||||||
|
@ -1161,25 +1166,28 @@ only. */
|
||||||
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
|
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
|
||||||
|
|
||||||
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
|
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
|
||||||
|
#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
|
||||||
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||||
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||||
|
|
||||||
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||||
|
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||||
|
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UTF */
|
||||||
|
|
||||||
|
@ -1517,39 +1525,47 @@ enum {
|
||||||
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
|
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
|
||||||
OP_RREF, /* 143 Used to hold a recursion number as condition */
|
OP_RREF, /* 143 Used to hold a recursion number as condition */
|
||||||
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
|
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
|
||||||
OP_DEF, /* 145 The DEFINE condition */
|
OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
|
||||||
|
OP_TRUE, /* 146 Always true (used by VERSION) */
|
||||||
|
|
||||||
OP_BRAZERO, /* 146 These two must remain together and in this */
|
OP_BRAZERO, /* 147 These two must remain together and in this */
|
||||||
OP_BRAMINZERO, /* 147 order. */
|
OP_BRAMINZERO, /* 148 order. */
|
||||||
OP_BRAPOSZERO, /* 148 */
|
OP_BRAPOSZERO, /* 149 */
|
||||||
|
|
||||||
/* These are backtracking control verbs */
|
/* These are backtracking control verbs */
|
||||||
|
|
||||||
OP_MARK, /* 149 always has an argument */
|
OP_MARK, /* 150 always has an argument */
|
||||||
OP_PRUNE, /* 150 */
|
OP_PRUNE, /* 151 */
|
||||||
OP_PRUNE_ARG, /* 151 same, but with argument */
|
OP_PRUNE_ARG, /* 152 same, but with argument */
|
||||||
OP_SKIP, /* 152 */
|
OP_SKIP, /* 153 */
|
||||||
OP_SKIP_ARG, /* 153 same, but with argument */
|
OP_SKIP_ARG, /* 154 same, but with argument */
|
||||||
OP_THEN, /* 154 */
|
OP_THEN, /* 155 */
|
||||||
OP_THEN_ARG, /* 155 same, but with argument */
|
OP_THEN_ARG, /* 156 same, but with argument */
|
||||||
OP_COMMIT, /* 156 */
|
OP_COMMIT, /* 157 */
|
||||||
|
|
||||||
/* These are forced failure and success verbs */
|
/* These are forced failure and success verbs */
|
||||||
|
|
||||||
OP_FAIL, /* 157 */
|
OP_FAIL, /* 158 */
|
||||||
OP_ACCEPT, /* 158 */
|
OP_ACCEPT, /* 159 */
|
||||||
OP_ASSERT_ACCEPT, /* 159 Used inside assertions */
|
OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
|
||||||
OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */
|
OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
|
||||||
|
|
||||||
/* This is used to skip a subpattern with a {0} quantifier */
|
/* This is used to skip a subpattern with a {0} quantifier */
|
||||||
|
|
||||||
OP_SKIPZERO, /* 161 */
|
OP_SKIPZERO, /* 162 */
|
||||||
|
|
||||||
|
/* This is used to identify a DEFINE group during compilation so that it can
|
||||||
|
be checked for having only one branch. It is changed to OP_FALSE before
|
||||||
|
compilation finishes. */
|
||||||
|
|
||||||
|
OP_DEFINE, /* 163 */
|
||||||
|
|
||||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||||
are the correct length, in order to catch updating errors - there have been
|
are the correct length, in order to catch updating errors - there have been
|
||||||
some in the past. */
|
some in the past. */
|
||||||
|
|
||||||
OP_TABLE_LENGTH
|
OP_TABLE_LENGTH
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||||
|
@ -1594,12 +1610,13 @@ some cases doesn't actually use these names at all). */
|
||||||
"Cond", \
|
"Cond", \
|
||||||
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
||||||
"SCond", \
|
"SCond", \
|
||||||
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \
|
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \
|
||||||
|
"Cond false", "Cond true", \
|
||||||
"Brazero", "Braminzero", "Braposzero", \
|
"Brazero", "Braminzero", "Braposzero", \
|
||||||
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
||||||
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
|
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
|
||||||
"*ACCEPT", "*ASSERT_ACCEPT", \
|
"*ACCEPT", "*ASSERT_ACCEPT", \
|
||||||
"Close", "Skip zero"
|
"Close", "Skip zero", "Define"
|
||||||
|
|
||||||
|
|
||||||
/* This macro defines the length of fixed length operations in the compiled
|
/* This macro defines the length of fixed length operations in the compiled
|
||||||
|
@ -1684,14 +1701,15 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||||
1+LINK_SIZE, /* SCOND */ \
|
1+LINK_SIZE, /* SCOND */ \
|
||||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
|
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
|
||||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
|
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
|
||||||
1, /* DEF */ \
|
1, 1, /* FALSE, TRUE */ \
|
||||||
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
||||||
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
||||||
1, 3, /* SKIP, SKIP_ARG */ \
|
1, 3, /* SKIP, SKIP_ARG */ \
|
||||||
1, 3, /* THEN, THEN_ARG */ \
|
1, 3, /* THEN, THEN_ARG */ \
|
||||||
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||||
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
|
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
||||||
|
1 /* DEFINE */
|
||||||
|
|
||||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||||
|
|
||||||
#define RREF_ANY 0xffff
|
#define RREF_ANY 0xffff
|
||||||
|
@ -1757,7 +1775,7 @@ typedef struct {
|
||||||
|
|
||||||
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
||||||
|
|
||||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
|
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
|
||||||
defined, so the following items are omitted. */
|
defined, so the following items are omitted. */
|
||||||
|
|
||||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
#ifdef PCRE2_CODE_UNIT_WIDTH
|
||||||
|
@ -1776,11 +1794,11 @@ However, UTF-8 tables are needed only when compiling the 8-bit library. */
|
||||||
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
extern const int PRIV(utf8_table1)[];
|
extern const int PRIV(utf8_table1)[];
|
||||||
extern const int PRIV(utf8_table1_size);
|
extern const int PRIV(utf8_table1_size);
|
||||||
extern const int PRIV(utf8_table2)[];
|
extern const int PRIV(utf8_table2)[];
|
||||||
extern const int PRIV(utf8_table3)[];
|
extern const int PRIV(utf8_table3)[];
|
||||||
extern const uint8_t PRIV(utf8_table4)[];
|
extern const uint8_t PRIV(utf8_table4)[];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
|
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
|
||||||
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
|
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
|
||||||
|
@ -1857,7 +1875,7 @@ is available. */
|
||||||
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_block *);
|
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_block *);
|
||||||
extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
|
extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
|
||||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||||
BOOL);
|
BOOL);
|
||||||
extern size_t _pcre2_jit_get_size(void *);
|
extern size_t _pcre2_jit_get_size(void *);
|
||||||
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
|
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
|
||||||
|
@ -1870,7 +1888,7 @@ extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
||||||
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||||
extern int _pcre2_study(pcre2_real_code *);
|
extern int _pcre2_study(pcre2_real_code *);
|
||||||
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
||||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||||
BOOL);
|
BOOL);
|
||||||
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
|
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||||
|
|
|
@ -1363,8 +1363,12 @@ for (;;)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_DEF: /* DEFINE - always false */
|
case OP_FALSE:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OP_TRUE:
|
||||||
|
condition = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||||
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
|
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
|
||||||
|
@ -6362,6 +6366,24 @@ if (re->magic_number != MAGIC_NUMBER)
|
||||||
|
|
||||||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||||
return PCRE2_ERROR_BADMODE;
|
return PCRE2_ERROR_BADMODE;
|
||||||
|
|
||||||
|
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||||
|
options variable for this function. Users of PCRE2 who are not calling the
|
||||||
|
function directly would like to have a way of setting these flags, in the same
|
||||||
|
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
||||||
|
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||||
|
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||||
|
transferred to the options for this function. The bits are guaranteed to be
|
||||||
|
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||||
|
that the match-time bits are not more significant than the flag bits. If by
|
||||||
|
accident this is not the case, a compile-time division by zero error will
|
||||||
|
occur. */
|
||||||
|
|
||||||
|
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||||
|
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||||
|
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||||
|
#undef FF
|
||||||
|
#undef OO
|
||||||
|
|
||||||
/* A NULL match context means "use a default context" */
|
/* A NULL match context means "use a default context" */
|
||||||
|
|
||||||
|
|
|
@ -422,8 +422,12 @@ for(;;)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_DEF:
|
case OP_FALSE:
|
||||||
fprintf(f, " Cond def");
|
fprintf(f, " Cond false");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_TRUE:
|
||||||
|
fprintf(f, " Cond true");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_STARI:
|
case OP_STARI:
|
||||||
|
|
|
@ -181,7 +181,8 @@ for (;;)
|
||||||
case OP_DNCREF:
|
case OP_DNCREF:
|
||||||
case OP_RREF:
|
case OP_RREF:
|
||||||
case OP_DNRREF:
|
case OP_DNRREF:
|
||||||
case OP_DEF:
|
case OP_FALSE:
|
||||||
|
case OP_TRUE:
|
||||||
case OP_CALLOUT:
|
case OP_CALLOUT:
|
||||||
case OP_SOD:
|
case OP_SOD:
|
||||||
case OP_SOM:
|
case OP_SOM:
|
||||||
|
@ -792,7 +793,8 @@ do
|
||||||
case OP_COMMIT:
|
case OP_COMMIT:
|
||||||
case OP_COND:
|
case OP_COND:
|
||||||
case OP_CREF:
|
case OP_CREF:
|
||||||
case OP_DEF:
|
case OP_FALSE:
|
||||||
|
case OP_TRUE:
|
||||||
case OP_DNCREF:
|
case OP_DNCREF:
|
||||||
case OP_DNREF:
|
case OP_DNREF:
|
||||||
case OP_DNREFI:
|
case OP_DNREFI:
|
||||||
|
|
|
@ -4070,4 +4070,40 @@ a random value. /Ix
|
||||||
/abc(?=abcde)(?=ab)/allusedtext
|
/abc(?=abcde)(?=ab)/allusedtext
|
||||||
abcabcdefg
|
abcabcdefg
|
||||||
|
|
||||||
|
/a*?b*?/
|
||||||
|
ab
|
||||||
|
|
||||||
|
/(*NOTEMPTY)a*?b*?/
|
||||||
|
ab
|
||||||
|
ba
|
||||||
|
cb
|
||||||
|
|
||||||
|
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
|
||||||
|
ab
|
||||||
|
cdab
|
||||||
|
|
||||||
|
/(?(VERSION>=10.0)yes|no)/I
|
||||||
|
yesno
|
||||||
|
|
||||||
|
/(?(VERSION=8)yes){3}/BI,aftertext
|
||||||
|
yesno
|
||||||
|
|
||||||
|
/(?(VERSION=8)yes|no){3}/I
|
||||||
|
yesnononoyes
|
||||||
|
** Failers
|
||||||
|
yesno
|
||||||
|
|
||||||
|
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
|
||||||
|
abcyes
|
||||||
|
xyzno
|
||||||
|
** Failers
|
||||||
|
abcno
|
||||||
|
xyzyes
|
||||||
|
|
||||||
|
/(?(VERSION<10)yes|no)/
|
||||||
|
|
||||||
|
/(?(VERSION>10)yes|no)/
|
||||||
|
|
||||||
|
/(?(VERSION>=10.0.0)yes|no)/
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -4798,4 +4798,16 @@
|
||||||
/abc(?=abcde)(?=ab)/allusedtext
|
/abc(?=abcde)(?=ab)/allusedtext
|
||||||
abcabcdefg
|
abcabcdefg
|
||||||
|
|
||||||
|
/a*?b*?/
|
||||||
|
ab
|
||||||
|
|
||||||
|
/(*NOTEMPTY)a*?b*?/
|
||||||
|
ab
|
||||||
|
ba
|
||||||
|
cb
|
||||||
|
|
||||||
|
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
|
||||||
|
ab
|
||||||
|
cdab
|
||||||
|
|
||||||
# End of testinput6
|
# End of testinput6
|
||||||
|
|
|
@ -9357,7 +9357,7 @@ Partial match at offset 3: +ab
|
||||||
Recurse
|
Recurse
|
||||||
Recurse
|
Recurse
|
||||||
Cond
|
Cond
|
||||||
Cond def
|
Cond false
|
||||||
CBra 1
|
CBra 1
|
||||||
<
|
<
|
||||||
[^m]
|
[^m]
|
||||||
|
@ -9379,7 +9379,7 @@ Partial match at offset 3: +ab
|
||||||
Recurse
|
Recurse
|
||||||
Recurse
|
Recurse
|
||||||
Cond
|
Cond
|
||||||
Cond def
|
Cond false
|
||||||
CBra 1
|
CBra 1
|
||||||
<
|
<
|
||||||
[\x00-/:-\xff] (neg)
|
[\x00-/:-\xff] (neg)
|
||||||
|
@ -10095,7 +10095,7 @@ No match
|
||||||
Recurse
|
Recurse
|
||||||
KetRpos
|
KetRpos
|
||||||
Cond
|
Cond
|
||||||
Cond def
|
Cond false
|
||||||
CBra 1
|
CBra 1
|
||||||
Any
|
Any
|
||||||
Ket
|
Ket
|
||||||
|
@ -10114,7 +10114,7 @@ No match
|
||||||
Recurse
|
Recurse
|
||||||
KetRmax
|
KetRmax
|
||||||
Cond
|
Cond
|
||||||
Cond def
|
Cond false
|
||||||
CBra 1
|
CBra 1
|
||||||
Any
|
Any
|
||||||
Ket
|
Ket
|
||||||
|
@ -11058,7 +11058,7 @@ Matched, but too many substrings
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
Cond
|
Cond
|
||||||
Cond def
|
Cond false
|
||||||
CBra 1
|
CBra 1
|
||||||
a
|
a
|
||||||
Ket
|
Ket
|
||||||
|
@ -13720,4 +13720,84 @@ No match
|
||||||
0: abcabcde
|
0: abcabcde
|
||||||
>>>>>
|
>>>>>
|
||||||
|
|
||||||
|
/a*?b*?/
|
||||||
|
ab
|
||||||
|
0:
|
||||||
|
|
||||||
|
/(*NOTEMPTY)a*?b*?/
|
||||||
|
ab
|
||||||
|
0: a
|
||||||
|
ba
|
||||||
|
0: b
|
||||||
|
cb
|
||||||
|
0: b
|
||||||
|
|
||||||
|
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
|
||||||
|
ab
|
||||||
|
0: a
|
||||||
|
0+ b
|
||||||
|
cdab
|
||||||
|
0:
|
||||||
|
0+ dab
|
||||||
|
|
||||||
|
/(?(VERSION>=10.0)yes|no)/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Subject length lower bound = 2
|
||||||
|
yesno
|
||||||
|
0: yes
|
||||||
|
|
||||||
|
/(?(VERSION=8)yes){3}/BI,aftertext
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Cond
|
||||||
|
Cond false
|
||||||
|
yes
|
||||||
|
Ket
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
May match empty string
|
||||||
|
Subject length lower bound = 0
|
||||||
|
yesno
|
||||||
|
0:
|
||||||
|
0+ yesno
|
||||||
|
|
||||||
|
/(?(VERSION=8)yes|no){3}/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Subject length lower bound = 6
|
||||||
|
yesnononoyes
|
||||||
|
0: nonono
|
||||||
|
** Failers
|
||||||
|
No match
|
||||||
|
yesno
|
||||||
|
No match
|
||||||
|
|
||||||
|
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Named capturing subpatterns:
|
||||||
|
VERSION 1
|
||||||
|
Starting code units: a x
|
||||||
|
Subject length lower bound = 5
|
||||||
|
abcyes
|
||||||
|
0: abcyes
|
||||||
|
1: abc
|
||||||
|
xyzno
|
||||||
|
0: xyzno
|
||||||
|
** Failers
|
||||||
|
No match
|
||||||
|
abcno
|
||||||
|
No match
|
||||||
|
xyzyes
|
||||||
|
No match
|
||||||
|
|
||||||
|
/(?(VERSION<10)yes|no)/
|
||||||
|
Failed: error 179 at offset 10: syntax error in (?(VERSION condition
|
||||||
|
|
||||||
|
/(?(VERSION>10)yes|no)/
|
||||||
|
Failed: error 179 at offset 11: syntax error in (?(VERSION condition
|
||||||
|
|
||||||
|
/(?(VERSION>=10.0.0)yes|no)/
|
||||||
|
Failed: error 179 at offset 16: syntax error in (?(VERSION condition
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -7689,4 +7689,28 @@ Matched, but offsets vector is too small to show all matches
|
||||||
0: abcabcde
|
0: abcabcde
|
||||||
>>>>>
|
>>>>>
|
||||||
|
|
||||||
|
/a*?b*?/
|
||||||
|
ab
|
||||||
|
0: ab
|
||||||
|
1: a
|
||||||
|
2:
|
||||||
|
|
||||||
|
/(*NOTEMPTY)a*?b*?/
|
||||||
|
ab
|
||||||
|
0: ab
|
||||||
|
1: a
|
||||||
|
ba
|
||||||
|
0: b
|
||||||
|
cb
|
||||||
|
0: b
|
||||||
|
|
||||||
|
/(*NOTEMPTY_ATSTART)a*?b*?/aftertext
|
||||||
|
ab
|
||||||
|
0: ab
|
||||||
|
0+
|
||||||
|
1: a
|
||||||
|
cdab
|
||||||
|
0:
|
||||||
|
0+ dab
|
||||||
|
|
||||||
# End of testinput6
|
# End of testinput6
|
||||||
|
|
Loading…
Reference in New Issue