Fix incorrect compiling when [Aa] etc. are quantified
This commit is contained in:
parent
419e3c68a3
commit
fdd9479108
|
@ -86,6 +86,12 @@ Clarke PR#72.
|
||||||
21. A user discovered that the library names in CMakeLists.txt for MSVC
|
21. A user discovered that the library names in CMakeLists.txt for MSVC
|
||||||
debugger (PDB) files were incorrect - perhaps never tried for PCRE2?
|
debugger (PDB) files were incorrect - perhaps never tried for PCRE2?
|
||||||
|
|
||||||
|
22. An item such as [Aa] is optimized into a caseless single character match.
|
||||||
|
When this was quantified (e.g. [Aa]{2}) and was also the last literal item in a
|
||||||
|
pattern, the optimizing "must be present for a match" character check was not
|
||||||
|
being flagged as caseless, causing some matches that should have succeeded to
|
||||||
|
fail.
|
||||||
|
|
||||||
|
|
||||||
Version 10.39 29-October-2021
|
Version 10.39 29-October-2021
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
|
@ -2115,17 +2115,17 @@ if (c == CHAR_LEFT_CURLY_BRACKET)
|
||||||
{
|
{
|
||||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||||
c = *ptr++;
|
c = *ptr++;
|
||||||
while (c == '_' || c == '-' || isspace(c))
|
while (c == '_' || c == '-' || isspace(c))
|
||||||
{
|
{
|
||||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||||
c = *ptr++;
|
c = *ptr++;
|
||||||
}
|
}
|
||||||
if (c == CHAR_NUL) goto ERROR_RETURN;
|
if (c == CHAR_NUL) goto ERROR_RETURN;
|
||||||
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
|
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
|
||||||
name[i] = tolower(c);
|
name[i] = tolower(c);
|
||||||
if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i;
|
if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
|
if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
|
||||||
name[i] = 0;
|
name[i] = 0;
|
||||||
}
|
}
|
||||||
|
@ -2159,16 +2159,16 @@ another property can be diagnosed. */
|
||||||
if (vptr != NULL)
|
if (vptr != NULL)
|
||||||
{
|
{
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
PCRE2_UCHAR sname[8];
|
PCRE2_UCHAR sname[8];
|
||||||
|
|
||||||
*vptr = 0; /* Terminate property name */
|
*vptr = 0; /* Terminate property name */
|
||||||
if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 ||
|
if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 ||
|
||||||
PRIV(strcmp_c8)(name, STRING_bc) == 0)
|
PRIV(strcmp_c8)(name, STRING_bc) == 0)
|
||||||
{
|
{
|
||||||
offset = 4;
|
offset = 4;
|
||||||
sname[0] = CHAR_b;
|
sname[0] = CHAR_b;
|
||||||
sname[1] = CHAR_i; /* There is no strcpy_c8 function */
|
sname[1] = CHAR_i; /* There is no strcpy_c8 function */
|
||||||
sname[2] = CHAR_d;
|
sname[2] = CHAR_d;
|
||||||
sname[3] = CHAR_i;
|
sname[3] = CHAR_i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7023,14 +7023,19 @@ for (;; pptr++)
|
||||||
#endif /* MAYBE_UTF_MULTI */
|
#endif /* MAYBE_UTF_MULTI */
|
||||||
|
|
||||||
/* Handle the case of a single code unit - either with no UTF support, or
|
/* Handle the case of a single code unit - either with no UTF support, or
|
||||||
with UTF disabled, or for a single-code-unit UTF character. */
|
with UTF disabled, or for a single-code-unit UTF character. In the latter
|
||||||
|
case, for a repeated positive match, get the caseless flag for the
|
||||||
|
required code unit from the previous character, because a class like [Aa]
|
||||||
|
sets a caseless A but by now the req_caseopt flag has been reset. */
|
||||||
|
|
||||||
{
|
{
|
||||||
mcbuffer[0] = code[-1];
|
mcbuffer[0] = code[-1];
|
||||||
mclength = 1;
|
mclength = 1;
|
||||||
if (op_previous <= OP_CHARI && repeat_min > 1)
|
if (op_previous <= OP_CHARI && repeat_min > 1)
|
||||||
{
|
{
|
||||||
reqcu = mcbuffer[0];
|
reqcu = mcbuffer[0];
|
||||||
reqcuflags = req_caseopt | cb->req_varyopt;
|
reqcuflags = cb->req_varyopt;
|
||||||
|
if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
|
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
|
||||||
|
|
|
@ -5923,4 +5923,13 @@ a)"xI
|
||||||
|
|
||||||
# ---------
|
# ---------
|
||||||
|
|
||||||
|
/[Aa]{2}/BI
|
||||||
|
aabcd
|
||||||
|
|
||||||
|
/A{2}/iBI
|
||||||
|
aabcd
|
||||||
|
|
||||||
|
/[Aa]{2,3}/BI
|
||||||
|
aabcd
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -17702,6 +17702,50 @@ Failed: error -51: NULL argument passed with non-zero length
|
||||||
|
|
||||||
# ---------
|
# ---------
|
||||||
|
|
||||||
|
/[Aa]{2}/BI
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
/i A{2}
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capture group count = 0
|
||||||
|
First code unit = 'A' (caseless)
|
||||||
|
Last code unit = 'A' (caseless)
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aabcd
|
||||||
|
0: aa
|
||||||
|
|
||||||
|
/A{2}/iBI
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
/i A{2}
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capture group count = 0
|
||||||
|
Options: caseless
|
||||||
|
First code unit = 'A' (caseless)
|
||||||
|
Last code unit = 'A' (caseless)
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aabcd
|
||||||
|
0: aa
|
||||||
|
|
||||||
|
/[Aa]{2,3}/BI
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
/i A{2}
|
||||||
|
/i A?+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capture group count = 0
|
||||||
|
First code unit = 'A' (caseless)
|
||||||
|
Last code unit = 'A' (caseless)
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aabcd
|
||||||
|
0: aa
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue