Implement #newline_default and adjust testdata and scripts to use it.

This commit is contained in:
Philip.Hazel 2015-09-08 17:01:17 +00:00
parent 6119920f26
commit bb68c64c40
23 changed files with 153 additions and 50 deletions

View File

@ -172,6 +172,13 @@ commas.
49. Fixed two issues in JIT. These were found by Karl Skomski with a custom
LLVM fuzzer.
50. The pcre2test program has been extended by adding the #newline_default
command. This has made it possible to run the standard tests when PCRE2 is
compiled with either CR or CRLF as the default newline convention. As part of
this work, the new command was added to several test files and the testing
scripts were modified. The pcre2grep tests can now also be run when there is no
LF in the default newline convention.
Version 10.20 30-June-2015
--------------------------

View File

@ -19,12 +19,18 @@ unset cp ls mv rm
builddir=`pwd`
pcre2grep=$builddir/pcre2grep
pcre2test=$builddir/pcre2test
if [ ! -x $pcre2grep ] ; then
echo "** $pcre2grep does not exist or is not execuatble."
exit 1
fi
if [ ! -x $pcre2test ] ; then
echo "** $pcre2test does not exist or is not execuatble."
exit 1
fi
valgrind=
while [ $# -gt 0 ] ; do
case $1 in
@ -34,7 +40,6 @@ while [ $# -gt 0 ] ; do
shift
done
echo " "
pcre2grep_version=`$pcre2grep -V`
if [ "$valgrind" = "" ] ; then
echo "Testing $pcre2grep_version"
@ -69,14 +74,22 @@ fi
# Check for the availability of UTF-8 support
./pcre2test -C unicode >/dev/null
$pcre2test -C unicode >/dev/null
utf8=$?
# Check default newline convention. If it does not include LF, force LF.
nl=`$pcre2test -C newline`
if [ "$nl" != "LF" -a "$nl" != "ANY" -a "$nl" != "ANYCRLF" ]; then
pcre2grep="$pcre2grep -N LF"
echo "Default newline setting forced to LF"
fi
# ------ Function to run and check a special pcre2grep arguments test -------
checkspecial()
{
$valgrind ./pcre2grep $1 >>testtrygrep 2>&1
$valgrind $pcre2grep $1 >>testtrygrep 2>&1
if [ $? -ne $2 ] ; then
echo "** pcre2grep $1 failed - check testtrygrep"
exit 1

10
RunTest
View File

@ -75,7 +75,7 @@ title17="Test 17: JIT-specific features when JIT is available"
title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
title19="Test 19: Tests of the POSIX interface with UTF/UCP"
title20="Test 20: Serialization tests"
maxtest=18
maxtest=20
if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title0
@ -699,8 +699,12 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do14 = yes ] ; then
echo $title14
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry
checkresult $? 14-$bits ""
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry
checkresult $? 14-$bits ""
fi
fi
# Test non-JIT match and recursion limits

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "03 September 2015" "PCRE 10.21"
.TH PCRE2TEST 1 "08 September 2015" "PCRE 10.21"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -259,6 +259,34 @@ described in the section entitled "Saving and restoring compiled patterns"
.\" </a>
below.
.\"
.sp
#newline_default [<newline-list>]
.sp
When PCRE2 is built, a default newline convention can be specified. This
determines which characters and/or character pairs are recognized as indicating
a newline in a pattern or subject string. The default can be overridden when a
pattern is compiled. The standard test files contain tests of various newline
conventions, but the majority of the tests expect a single linefeed to be
recognized as a newline by default. Without special action the tests would fail
when PCRE2 is compiled with either CR or CRLF as the default newline.
.P
The #newline_default command specifies a list of newline types that are
acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, or
ANY (in upper or lower case), for example:
.sp
#newline_default LF Any anyCRLF
.sp
If the default newline is in the list, this command has no effect. Otherwise,
except when testing the POSIX API, a \fBnewline\fP modifier that specifies the
first newline convention in the list (LF in the above example) is added to any
pattern that does not already have a \fBnewline\fP modifier. If the newline
list is empty, the feature is turned off. This command is present in a number
of the standard test input files.
.P
When the POSIX API is being tested there is no way to override the default
newline convention, though it is possible to set the newline convention from
within the pattern. A warning is given if the \fBposix\fP modifier is used when
\fB#newline_default\fP would set a default for the non-POSIX API.
.sp
#pattern <modifier-list>
.sp
@ -1457,6 +1485,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 03 September 2015
Last updated: 08 September 2015
Copyright (c) 1997-2015 University of Cambridge.
.fi

View File

@ -131,33 +131,22 @@ runtest()
./pcre2test -C
fi
nl=`./pcre2test -C newline`
if [ "$nl" = "LF" -o "$nl" = "ANY" -o "$nl" = "ANYCRLF" ]; then
nlok=1
else
nlok=0
fi
./pcre2test -C jit >/dev/null
jit=$?
./pcre2test -C pcre2-8 >/dev/null
pcre2_8=$?
if [ $nlok -gt 0 ]; then
echo "Running PCRE2 library tests $withvalgrind"
$srcdir/RunTest $valgrind >teststdout 2>teststderr
if [ $? -ne 0 -o -s teststderr ]; then
echo " "
echo "**** Test failed ****"
cat teststderr
if [ -s teststdout ] ; then cat teststdout; fi
exit 1
fi
else
echo "Skipping PCRE2 library tests: newline is $nl"
echo "Running PCRE2 library tests $withvalgrind"
$srcdir/RunTest $valgrind >teststdout 2>teststderr
if [ $? -ne 0 -o -s teststderr ]; then
echo " "
echo "**** Test failed ****"
cat teststderr
if [ -s teststdout ] ; then cat teststdout; fi
exit 1
fi
if [ $nlok -gt 0 -a $pcre2_8 -gt 0 ]; then
if [ $pcre2_8 -gt 0 ]; then
echo "Running pcre2grep tests $withvalgrind"
$srcdir/RunGrepTest $valgrind >teststdout 2>teststderr
if [ $? -ne 0 -o -s teststderr ]; then
@ -167,10 +156,8 @@ runtest()
cat teststdout
exit 1
fi
elif [ $nlok -gt 0 ]; then
echo "Skipping pcre2grep tests: 8-bit library not compiled"
else
echo "Skipping pcre2grep tests: newline is $nl"
echo "Skipping pcre2grep tests: 8-bit library not compiled"
fi
if [ "$jit" -gt 0 ]; then

View File

@ -336,17 +336,18 @@ typedef struct cmdstruct {
int value;
} cmdstruct;
enum { CMD_FORBID_UTF, CMD_LOAD, CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_SAVE,
CMD_SUBJECT, CMD_UNKNOWN };
enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
CMD_PERLTEST, CMD_POP, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
static cmdstruct cmdlist[] = {
{ "forbid_utf", CMD_FORBID_UTF },
{ "load", CMD_LOAD },
{ "pattern", CMD_PATTERN },
{ "perltest", CMD_PERLTEST },
{ "pop", CMD_POP },
{ "save", CMD_SAVE },
{ "subject", CMD_SUBJECT }};
{ "forbid_utf", CMD_FORBID_UTF },
{ "load", CMD_LOAD },
{ "newline_default", CMD_NEWLINE_DEFAULT },
{ "pattern", CMD_PATTERN },
{ "perltest", CMD_PERLTEST },
{ "pop", CMD_POP },
{ "save", CMD_SAVE },
{ "subject", CMD_SUBJECT }};
#define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct)
@ -720,6 +721,8 @@ static uint32_t maxlookbehind;
static uint32_t max_oveccount;
static uint32_t callout_count;
static uint16_t local_newline_default = 0;
static VERSION_TYPE jittarget[VERSION_SIZE];
static VERSION_TYPE version[VERSION_SIZE];
static VERSION_TYPE uversion[VERSION_SIZE];
@ -3420,7 +3423,7 @@ Returns: nothing
static void
show_controls(uint32_t controls, const char *before)
{
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@ -3428,6 +3431,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
((controls & CTL_BINCODE) != 0)? " bincode" : "",
((controls & CTL_BSR_SET) != 0)? " bsr" : "",
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
@ -3442,6 +3446,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
((controls & CTL_MARK) != 0)? " mark" : "",
((controls & CTL_MEMORY) != 0)? " memory" : "",
((controls & CTL_NL_SET) != 0)? " newline" : "",
((controls & CTL_POSIX) != 0)? " posix" : "",
((controls & CTL_PUSH) != 0)? " push" : "",
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
@ -3472,7 +3477,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
@ -3775,8 +3780,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
"any Unicode newline" : "CR, LF, or CRLF");
if ((pat_patctl.control & CTL_NL_SET) != 0 ||
(FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
{
switch (newline_convention)
{
@ -3993,6 +3997,7 @@ FILE *f;
PCRE2_SIZE serial_size;
size_t i;
int rc, cmd, cmdlen;
uint16_t first_listed_newline;
const char *cmdname;
uint8_t *argptr, *serial;
@ -4047,6 +4052,31 @@ switch(cmd)
(void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
break;
/* Check the default newline, and if not one of those listed, set up the
first one to be forced. An empty list unsets. */
case CMD_NEWLINE_DEFAULT:
local_newline_default = 0; /* Unset */
first_listed_newline = 0;
for (;;)
{
while (isspace(*argptr)) argptr++;
if (*argptr == 0) break;
for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
{
size_t nlen = strlen(newlines[i]);
if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
isspace(argptr[nlen]))
{
if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
if (first_listed_newline == 0) first_listed_newline = i;
}
}
while (*argptr != 0 && !isspace(*argptr)) argptr++;
}
local_newline_default = first_listed_newline;
break;
/* Pop a compiled pattern off the stack. Modifiers that do not affect the
compiled pattern (e.g. to give information) are permitted. The default
pattern modifiers are ignored. */
@ -4371,6 +4401,8 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, msg);
msg = "";
}
if (local_newline_default != 0) prmsg(&msg, "#newline_default");
if (msg[0] == 0) fprintf(outfile, "\n");
@ -4461,6 +4493,15 @@ if we had a hex pattern. */
if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;
/* If #newline_default has been used and the library was not compiled with an
appropriate default newline setting, local_newline_default will be non-zero. We
use this if there is no explicit newline modifier. */
if ((pat_patctl.control & CTL_NL_SET) == 0 && local_newline_default != 0)
{
SETFLD(pat_context, newline_convention, local_newline_default);
}
/* Compile many times when timing. */
if (timeit > 0)
@ -4551,6 +4592,14 @@ if (pat_patctl.jit != 0)
}
}
/* If an explicit newline modifier was given, set the information flag in the
pattern so that it is preserved over push/pop. */
if ((pat_patctl.control & CTL_NL_SET) != 0)
{
SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
}
/* Output code size and other information if requested. */
if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();

1
testdata/testinput1 vendored
View File

@ -3,6 +3,7 @@
# 32-bit PCRE libraries, and also using the perltest.pl script.
#forbid_utf
#newline_default lf any anycrlf
#perltest
/the quick brown fox/

View File

@ -4,6 +4,7 @@
# different, so they have separate output files.
#forbid_utf
#newline_default LF ANY ANYCRLF
/a\Cb/
aXb

View File

@ -38,11 +38,11 @@
the quick brown fox
The Quick Brown Fox
/abc.def/
/(*LF)abc.def/
*** Failers
abc\ndef
/abc$/
/(*LF)abc$/
abc
abc\n
@ -57,7 +57,7 @@
aaaabbbbzzzz\=ovector=1
aaaabbbbzzzz\=ovector=2
/ab.cd/
/(*ANY)ab.cd/
ab-cd
ab=cd
** Failers

1
testdata/testinput2 vendored
View File

@ -9,6 +9,7 @@
# test 5.
#forbid_utf
#newline_default lf any anycrlf
# Test binary zeroes in the pattern

1
testdata/testinput4 vendored
View File

@ -3,6 +3,7 @@
# some of the property tests may differ because of different versions of
# Unicode in use by PCRE2 and Perl.
#newline_default lf anycrlf any
#perltest
/a.b/utf

1
testdata/testinput6 vendored
View File

@ -4,6 +4,7 @@
#forbid_utf
#subject dfa
#newline_default lf anycrlf any
/abc/
abc

1
testdata/testinput7 vendored
View File

@ -3,6 +3,7 @@
# used to force DFA matching for all tests.
#subject dfa
#newline_default LF any anyCRLF
/\x{100}ab/utf
\x{100}ab

1
testdata/testinput9 vendored
View File

@ -2,6 +2,7 @@
# UTF-8 or Unicode property support. */
#forbid_utf
#newline_default lf any anycrlf
/a\Cb/
aXb

View File

@ -3,6 +3,7 @@
# 32-bit PCRE libraries, and also using the perltest.pl script.
#forbid_utf
#newline_default lf any anycrlf
#perltest
/the quick brown fox/

View File

@ -4,6 +4,7 @@
# different, so they have separate output files.
#forbid_utf
#newline_default LF ANY ANYCRLF
/a\Cb/
aXb

View File

@ -4,6 +4,7 @@
# different, so they have separate output files.
#forbid_utf
#newline_default LF ANY ANYCRLF
/a\Cb/
aXb

View File

@ -58,13 +58,13 @@ No match: POSIX code 17: match failed
The Quick Brown Fox
0: The Quick Brown Fox
/abc.def/
/(*LF)abc.def/
*** Failers
No match: POSIX code 17: match failed
abc\ndef
No match: POSIX code 17: match failed
/abc$/
/(*LF)abc$/
abc
0: abc
abc\n
@ -91,7 +91,7 @@ Matched without capture
0: aaaabbbbzz
1: bbbb
/ab.cd/
/(*ANY)ab.cd/
ab-cd
0: ab-cd
ab=cd

View File

@ -9,6 +9,7 @@
# test 5.
#forbid_utf
#newline_default lf any anycrlf
# Test binary zeroes in the pattern

View File

@ -3,6 +3,7 @@
# some of the property tests may differ because of different versions of
# Unicode in use by PCRE2 and Perl.
#newline_default lf anycrlf any
#perltest
/a.b/utf

View File

@ -4,6 +4,7 @@
#forbid_utf
#subject dfa
#newline_default lf anycrlf any
/abc/
abc

View File

@ -3,6 +3,7 @@
# used to force DFA matching for all tests.
#subject dfa
#newline_default LF any anyCRLF
/\x{100}ab/utf
\x{100}ab

View File

@ -2,6 +2,7 @@
# UTF-8 or Unicode property support. */
#forbid_utf
#newline_default lf any anycrlf
/a\Cb/
aXb