Implement #newline_default and adjust testdata and scripts to use it.
This commit is contained in:
parent
6119920f26
commit
bb68c64c40
|
@ -172,6 +172,13 @@ commas.
|
|||
49. Fixed two issues in JIT. These were found by Karl Skomski with a custom
|
||||
LLVM fuzzer.
|
||||
|
||||
50. The pcre2test program has been extended by adding the #newline_default
|
||||
command. This has made it possible to run the standard tests when PCRE2 is
|
||||
compiled with either CR or CRLF as the default newline convention. As part of
|
||||
this work, the new command was added to several test files and the testing
|
||||
scripts were modified. The pcre2grep tests can now also be run when there is no
|
||||
LF in the default newline convention.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
19
RunGrepTest
19
RunGrepTest
|
@ -19,12 +19,18 @@ unset cp ls mv rm
|
|||
|
||||
builddir=`pwd`
|
||||
pcre2grep=$builddir/pcre2grep
|
||||
pcre2test=$builddir/pcre2test
|
||||
|
||||
if [ ! -x $pcre2grep ] ; then
|
||||
echo "** $pcre2grep does not exist or is not execuatble."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -x $pcre2test ] ; then
|
||||
echo "** $pcre2test does not exist or is not execuatble."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
valgrind=
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
|
@ -34,7 +40,6 @@ while [ $# -gt 0 ] ; do
|
|||
shift
|
||||
done
|
||||
|
||||
echo " "
|
||||
pcre2grep_version=`$pcre2grep -V`
|
||||
if [ "$valgrind" = "" ] ; then
|
||||
echo "Testing $pcre2grep_version"
|
||||
|
@ -69,14 +74,22 @@ fi
|
|||
|
||||
# Check for the availability of UTF-8 support
|
||||
|
||||
./pcre2test -C unicode >/dev/null
|
||||
$pcre2test -C unicode >/dev/null
|
||||
utf8=$?
|
||||
|
||||
# Check default newline convention. If it does not include LF, force LF.
|
||||
|
||||
nl=`$pcre2test -C newline`
|
||||
if [ "$nl" != "LF" -a "$nl" != "ANY" -a "$nl" != "ANYCRLF" ]; then
|
||||
pcre2grep="$pcre2grep -N LF"
|
||||
echo "Default newline setting forced to LF"
|
||||
fi
|
||||
|
||||
# ------ Function to run and check a special pcre2grep arguments test -------
|
||||
|
||||
checkspecial()
|
||||
{
|
||||
$valgrind ./pcre2grep $1 >>testtrygrep 2>&1
|
||||
$valgrind $pcre2grep $1 >>testtrygrep 2>&1
|
||||
if [ $? -ne $2 ] ; then
|
||||
echo "** pcre2grep $1 failed - check testtrygrep"
|
||||
exit 1
|
||||
|
|
10
RunTest
10
RunTest
|
@ -75,7 +75,7 @@ title17="Test 17: JIT-specific features when JIT is available"
|
|||
title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
|
||||
title19="Test 19: Tests of the POSIX interface with UTF/UCP"
|
||||
title20="Test 20: Serialization tests"
|
||||
maxtest=18
|
||||
maxtest=20
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
echo $title0
|
||||
|
@ -699,8 +699,12 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
|
||||
if [ $do14 = yes ] ; then
|
||||
echo $title14
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry
|
||||
checkresult $? 14-$bits ""
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry
|
||||
checkresult $? 14-$bits ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test non-JIT match and recursion limits
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "03 September 2015" "PCRE 10.21"
|
||||
.TH PCRE2TEST 1 "08 September 2015" "PCRE 10.21"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -259,6 +259,34 @@ described in the section entitled "Saving and restoring compiled patterns"
|
|||
.\" </a>
|
||||
below.
|
||||
.\"
|
||||
.sp
|
||||
#newline_default [<newline-list>]
|
||||
.sp
|
||||
When PCRE2 is built, a default newline convention can be specified. This
|
||||
determines which characters and/or character pairs are recognized as indicating
|
||||
a newline in a pattern or subject string. The default can be overridden when a
|
||||
pattern is compiled. The standard test files contain tests of various newline
|
||||
conventions, but the majority of the tests expect a single linefeed to be
|
||||
recognized as a newline by default. Without special action the tests would fail
|
||||
when PCRE2 is compiled with either CR or CRLF as the default newline.
|
||||
.P
|
||||
The #newline_default command specifies a list of newline types that are
|
||||
acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, or
|
||||
ANY (in upper or lower case), for example:
|
||||
.sp
|
||||
#newline_default LF Any anyCRLF
|
||||
.sp
|
||||
If the default newline is in the list, this command has no effect. Otherwise,
|
||||
except when testing the POSIX API, a \fBnewline\fP modifier that specifies the
|
||||
first newline convention in the list (LF in the above example) is added to any
|
||||
pattern that does not already have a \fBnewline\fP modifier. If the newline
|
||||
list is empty, the feature is turned off. This command is present in a number
|
||||
of the standard test input files.
|
||||
.P
|
||||
When the POSIX API is being tested there is no way to override the default
|
||||
newline convention, though it is possible to set the newline convention from
|
||||
within the pattern. A warning is given if the \fBposix\fP modifier is used when
|
||||
\fB#newline_default\fP would set a default for the non-POSIX API.
|
||||
.sp
|
||||
#pattern <modifier-list>
|
||||
.sp
|
||||
|
@ -1457,6 +1485,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 September 2015
|
||||
Last updated: 08 September 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -131,33 +131,22 @@ runtest()
|
|||
./pcre2test -C
|
||||
fi
|
||||
|
||||
nl=`./pcre2test -C newline`
|
||||
if [ "$nl" = "LF" -o "$nl" = "ANY" -o "$nl" = "ANYCRLF" ]; then
|
||||
nlok=1
|
||||
else
|
||||
nlok=0
|
||||
fi
|
||||
|
||||
./pcre2test -C jit >/dev/null
|
||||
jit=$?
|
||||
./pcre2test -C pcre2-8 >/dev/null
|
||||
pcre2_8=$?
|
||||
|
||||
if [ $nlok -gt 0 ]; then
|
||||
echo "Running PCRE2 library tests $withvalgrind"
|
||||
$srcdir/RunTest $valgrind >teststdout 2>teststderr
|
||||
if [ $? -ne 0 -o -s teststderr ]; then
|
||||
echo " "
|
||||
echo "**** Test failed ****"
|
||||
cat teststderr
|
||||
if [ -s teststdout ] ; then cat teststdout; fi
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Skipping PCRE2 library tests: newline is $nl"
|
||||
echo "Running PCRE2 library tests $withvalgrind"
|
||||
$srcdir/RunTest $valgrind >teststdout 2>teststderr
|
||||
if [ $? -ne 0 -o -s teststderr ]; then
|
||||
echo " "
|
||||
echo "**** Test failed ****"
|
||||
cat teststderr
|
||||
if [ -s teststdout ] ; then cat teststdout; fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $nlok -gt 0 -a $pcre2_8 -gt 0 ]; then
|
||||
if [ $pcre2_8 -gt 0 ]; then
|
||||
echo "Running pcre2grep tests $withvalgrind"
|
||||
$srcdir/RunGrepTest $valgrind >teststdout 2>teststderr
|
||||
if [ $? -ne 0 -o -s teststderr ]; then
|
||||
|
@ -167,10 +156,8 @@ runtest()
|
|||
cat teststdout
|
||||
exit 1
|
||||
fi
|
||||
elif [ $nlok -gt 0 ]; then
|
||||
echo "Skipping pcre2grep tests: 8-bit library not compiled"
|
||||
else
|
||||
echo "Skipping pcre2grep tests: newline is $nl"
|
||||
echo "Skipping pcre2grep tests: 8-bit library not compiled"
|
||||
fi
|
||||
|
||||
if [ "$jit" -gt 0 ]; then
|
||||
|
|
|
@ -336,17 +336,18 @@ typedef struct cmdstruct {
|
|||
int value;
|
||||
} cmdstruct;
|
||||
|
||||
enum { CMD_FORBID_UTF, CMD_LOAD, CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_SAVE,
|
||||
CMD_SUBJECT, CMD_UNKNOWN };
|
||||
enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
|
||||
CMD_PERLTEST, CMD_POP, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
|
||||
|
||||
static cmdstruct cmdlist[] = {
|
||||
{ "forbid_utf", CMD_FORBID_UTF },
|
||||
{ "load", CMD_LOAD },
|
||||
{ "pattern", CMD_PATTERN },
|
||||
{ "perltest", CMD_PERLTEST },
|
||||
{ "pop", CMD_POP },
|
||||
{ "save", CMD_SAVE },
|
||||
{ "subject", CMD_SUBJECT }};
|
||||
{ "forbid_utf", CMD_FORBID_UTF },
|
||||
{ "load", CMD_LOAD },
|
||||
{ "newline_default", CMD_NEWLINE_DEFAULT },
|
||||
{ "pattern", CMD_PATTERN },
|
||||
{ "perltest", CMD_PERLTEST },
|
||||
{ "pop", CMD_POP },
|
||||
{ "save", CMD_SAVE },
|
||||
{ "subject", CMD_SUBJECT }};
|
||||
|
||||
#define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct)
|
||||
|
||||
|
@ -720,6 +721,8 @@ static uint32_t maxlookbehind;
|
|||
static uint32_t max_oveccount;
|
||||
static uint32_t callout_count;
|
||||
|
||||
static uint16_t local_newline_default = 0;
|
||||
|
||||
static VERSION_TYPE jittarget[VERSION_SIZE];
|
||||
static VERSION_TYPE version[VERSION_SIZE];
|
||||
static VERSION_TYPE uversion[VERSION_SIZE];
|
||||
|
@ -3420,7 +3423,7 @@ Returns: nothing
|
|||
static void
|
||||
show_controls(uint32_t controls, const char *before)
|
||||
{
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
before,
|
||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||
|
@ -3428,6 +3431,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
|||
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
|
||||
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
|
||||
((controls & CTL_BINCODE) != 0)? " bincode" : "",
|
||||
((controls & CTL_BSR_SET) != 0)? " bsr" : "",
|
||||
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
|
||||
((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
|
||||
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
|
||||
|
@ -3442,6 +3446,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
|||
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
|
||||
((controls & CTL_MARK) != 0)? " mark" : "",
|
||||
((controls & CTL_MEMORY) != 0)? " memory" : "",
|
||||
((controls & CTL_NL_SET) != 0)? " newline" : "",
|
||||
((controls & CTL_POSIX) != 0)? " posix" : "",
|
||||
((controls & CTL_PUSH) != 0)? " push" : "",
|
||||
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
||||
|
@ -3775,8 +3780,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
|
||||
"any Unicode newline" : "CR, LF, or CRLF");
|
||||
|
||||
if ((pat_patctl.control & CTL_NL_SET) != 0 ||
|
||||
(FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
|
||||
if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
|
||||
{
|
||||
switch (newline_convention)
|
||||
{
|
||||
|
@ -3993,6 +3997,7 @@ FILE *f;
|
|||
PCRE2_SIZE serial_size;
|
||||
size_t i;
|
||||
int rc, cmd, cmdlen;
|
||||
uint16_t first_listed_newline;
|
||||
const char *cmdname;
|
||||
uint8_t *argptr, *serial;
|
||||
|
||||
|
@ -4047,6 +4052,31 @@ switch(cmd)
|
|||
(void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
|
||||
break;
|
||||
|
||||
/* Check the default newline, and if not one of those listed, set up the
|
||||
first one to be forced. An empty list unsets. */
|
||||
|
||||
case CMD_NEWLINE_DEFAULT:
|
||||
local_newline_default = 0; /* Unset */
|
||||
first_listed_newline = 0;
|
||||
for (;;)
|
||||
{
|
||||
while (isspace(*argptr)) argptr++;
|
||||
if (*argptr == 0) break;
|
||||
for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
|
||||
{
|
||||
size_t nlen = strlen(newlines[i]);
|
||||
if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
|
||||
isspace(argptr[nlen]))
|
||||
{
|
||||
if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
|
||||
if (first_listed_newline == 0) first_listed_newline = i;
|
||||
}
|
||||
}
|
||||
while (*argptr != 0 && !isspace(*argptr)) argptr++;
|
||||
}
|
||||
local_newline_default = first_listed_newline;
|
||||
break;
|
||||
|
||||
/* Pop a compiled pattern off the stack. Modifiers that do not affect the
|
||||
compiled pattern (e.g. to give information) are permitted. The default
|
||||
pattern modifiers are ignored. */
|
||||
|
@ -4372,6 +4402,8 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
|||
msg = "";
|
||||
}
|
||||
|
||||
if (local_newline_default != 0) prmsg(&msg, "#newline_default");
|
||||
|
||||
if (msg[0] == 0) fprintf(outfile, "\n");
|
||||
|
||||
/* Translate PCRE2 options to POSIX options and then compile. On success, set
|
||||
|
@ -4461,6 +4493,15 @@ if we had a hex pattern. */
|
|||
|
||||
if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;
|
||||
|
||||
/* If #newline_default has been used and the library was not compiled with an
|
||||
appropriate default newline setting, local_newline_default will be non-zero. We
|
||||
use this if there is no explicit newline modifier. */
|
||||
|
||||
if ((pat_patctl.control & CTL_NL_SET) == 0 && local_newline_default != 0)
|
||||
{
|
||||
SETFLD(pat_context, newline_convention, local_newline_default);
|
||||
}
|
||||
|
||||
/* Compile many times when timing. */
|
||||
|
||||
if (timeit > 0)
|
||||
|
@ -4551,6 +4592,14 @@ if (pat_patctl.jit != 0)
|
|||
}
|
||||
}
|
||||
|
||||
/* If an explicit newline modifier was given, set the information flag in the
|
||||
pattern so that it is preserved over push/pop. */
|
||||
|
||||
if ((pat_patctl.control & CTL_NL_SET) != 0)
|
||||
{
|
||||
SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
|
||||
}
|
||||
|
||||
/* Output code size and other information if requested. */
|
||||
|
||||
if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# 32-bit PCRE libraries, and also using the perltest.pl script.
|
||||
|
||||
#forbid_utf
|
||||
#newline_default lf any anycrlf
|
||||
#perltest
|
||||
|
||||
/the quick brown fox/
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# different, so they have separate output files.
|
||||
|
||||
#forbid_utf
|
||||
#newline_default LF ANY ANYCRLF
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
|
|
|
@ -38,11 +38,11 @@
|
|||
the quick brown fox
|
||||
The Quick Brown Fox
|
||||
|
||||
/abc.def/
|
||||
/(*LF)abc.def/
|
||||
*** Failers
|
||||
abc\ndef
|
||||
|
||||
/abc$/
|
||||
/(*LF)abc$/
|
||||
abc
|
||||
abc\n
|
||||
|
||||
|
@ -57,7 +57,7 @@
|
|||
aaaabbbbzzzz\=ovector=1
|
||||
aaaabbbbzzzz\=ovector=2
|
||||
|
||||
/ab.cd/
|
||||
/(*ANY)ab.cd/
|
||||
ab-cd
|
||||
ab=cd
|
||||
** Failers
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
# test 5.
|
||||
|
||||
#forbid_utf
|
||||
#newline_default lf any anycrlf
|
||||
|
||||
# Test binary zeroes in the pattern
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# some of the property tests may differ because of different versions of
|
||||
# Unicode in use by PCRE2 and Perl.
|
||||
|
||||
#newline_default lf anycrlf any
|
||||
#perltest
|
||||
|
||||
/a.b/utf
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#forbid_utf
|
||||
#subject dfa
|
||||
#newline_default lf anycrlf any
|
||||
|
||||
/abc/
|
||||
abc
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# used to force DFA matching for all tests.
|
||||
|
||||
#subject dfa
|
||||
#newline_default LF any anyCRLF
|
||||
|
||||
/\x{100}ab/utf
|
||||
\x{100}ab
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# UTF-8 or Unicode property support. */
|
||||
|
||||
#forbid_utf
|
||||
#newline_default lf any anycrlf
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# 32-bit PCRE libraries, and also using the perltest.pl script.
|
||||
|
||||
#forbid_utf
|
||||
#newline_default lf any anycrlf
|
||||
#perltest
|
||||
|
||||
/the quick brown fox/
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# different, so they have separate output files.
|
||||
|
||||
#forbid_utf
|
||||
#newline_default LF ANY ANYCRLF
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# different, so they have separate output files.
|
||||
|
||||
#forbid_utf
|
||||
#newline_default LF ANY ANYCRLF
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
|
|
|
@ -58,13 +58,13 @@ No match: POSIX code 17: match failed
|
|||
The Quick Brown Fox
|
||||
0: The Quick Brown Fox
|
||||
|
||||
/abc.def/
|
||||
/(*LF)abc.def/
|
||||
*** Failers
|
||||
No match: POSIX code 17: match failed
|
||||
abc\ndef
|
||||
No match: POSIX code 17: match failed
|
||||
|
||||
/abc$/
|
||||
/(*LF)abc$/
|
||||
abc
|
||||
0: abc
|
||||
abc\n
|
||||
|
@ -91,7 +91,7 @@ Matched without capture
|
|||
0: aaaabbbbzz
|
||||
1: bbbb
|
||||
|
||||
/ab.cd/
|
||||
/(*ANY)ab.cd/
|
||||
ab-cd
|
||||
0: ab-cd
|
||||
ab=cd
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
# test 5.
|
||||
|
||||
#forbid_utf
|
||||
#newline_default lf any anycrlf
|
||||
|
||||
# Test binary zeroes in the pattern
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# some of the property tests may differ because of different versions of
|
||||
# Unicode in use by PCRE2 and Perl.
|
||||
|
||||
#newline_default lf anycrlf any
|
||||
#perltest
|
||||
|
||||
/a.b/utf
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#forbid_utf
|
||||
#subject dfa
|
||||
#newline_default lf anycrlf any
|
||||
|
||||
/abc/
|
||||
abc
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# used to force DFA matching for all tests.
|
||||
|
||||
#subject dfa
|
||||
#newline_default LF any anyCRLF
|
||||
|
||||
/\x{100}ab/utf
|
||||
\x{100}ab
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# UTF-8 or Unicode property support. */
|
||||
|
||||
#forbid_utf
|
||||
#newline_default lf any anycrlf
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
|
|
Loading…
Reference in New Issue