Add --allow-lookaround-bsk to pcre2grep

This commit is contained in:
Philip Hazel 2021-08-31 16:24:25 +01:00
parent edcc076bd8
commit 31a46200fa
5 changed files with 464 additions and 444 deletions

View File

@ -558,7 +558,7 @@ echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 107 -----------------------------" >>testtrygrep echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
echo "a" >testtemp1grep echo "a" >testtemp1grep
echo "aaaaa" >>testtemp1grep echo "aaaaa" >>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1 (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets --allow-lookaround-bsk '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 108 ------------------------------" >>testtrygrep echo "---------------------------- Test 108 ------------------------------" >>testtrygrep
@ -638,13 +638,13 @@ echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 125 -----------------------------" >>testtrygrep echo "---------------------------- Test 125 -----------------------------" >>testtrygrep
printf 'abcd\n' >testNinputgrep printf 'abcd\n' >testNinputgrep
$valgrind $vjs $pcre2grep --colour=always '(?<=\K.)' testNinputgrep >>testtrygrep $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K.)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?=.\K)' testNinputgrep >>testtrygrep $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=.\K)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?<=\K[ac])' testNinputgrep >>testtrygrep $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K[ac])' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 126 -----------------------------" >>testtrygrep echo "---------------------------- Test 126 -----------------------------" >>testtrygrep
@ -701,7 +701,7 @@ if [ $utf8 -ne 0 ] ; then
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U3 ------------------------------" >>testtrygrep echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any --allow-lookaround-bsk '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U4 ------------------------------" >>testtrygrep echo "---------------------------- Test U4 ------------------------------" >>testtrygrep

View File

@ -188,6 +188,12 @@ Treat binary files as text. This is equivalent to
<b>--binary-files</b>=<i>text</i>. <b>--binary-files</b>=<i>text</i>.
</P> </P>
<P> <P>
<b>--allow-lookaround-bsk</b>
PCRE2 now forbids the use of \K in lookarounds by default, in line with Perl.
This option causes <b>pcre2grep</b> to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
option, which enables this somewhat dangerous usage.
</P>
<P>
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i> <b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
Output up to <i>number</i> lines of context before each matching line. Fewer Output up to <i>number</i> lines of context before each matching line. Fewer
lines are output if the previous match or the start of the file is within lines are output if the previous match or the start of the file is within
@ -1040,16 +1046,16 @@ because VMS does not distinguish between exit(0) and exit(1).
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
University Computing Service Retired from University Computing Service
<br> <br>
Cambridge, England. Cambridge, England.
<br> <br>
</P> </P>
<br><a name="SEC16" href="#TOC1">REVISION</a><br> <br><a name="SEC16" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 04 October 2020 Last updated: 31 August 2021
<br> <br>
Copyright &copy; 1997-2020 University of Cambridge. Copyright &copy; 1997-2021 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE2 index page</a>. Return to the <a href="index.html">PCRE2 index page</a>.

View File

@ -1,4 +1,4 @@
.TH PCRE2GREP 1 "04 October 2020" "PCRE2 10.36" .TH PCRE2GREP 1 "31 August 2021" "PCRE2 10.38"
.SH NAME .SH NAME
pcre2grep - a grep with Perl-compatible regular expressions. pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -157,6 +157,11 @@ is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored.
Treat binary files as text. This is equivalent to Treat binary files as text. This is equivalent to
\fB--binary-files\fP=\fItext\fP. \fB--binary-files\fP=\fItext\fP.
.TP .TP
\fB--allow-lookaround-bsk\fP
PCRE2 now forbids the use of \eK in lookarounds by default, in line with Perl.
This option causes \fBpcre2grep\fP to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
option, which enables this somewhat dangerous usage.
.TP
\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP \fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
Output up to \fInumber\fP lines of context before each matching line. Fewer Output up to \fInumber\fP lines of context before each matching line. Fewer
lines are output if the previous match or the start of the file is within lines are output if the previous match or the start of the file is within
@ -946,7 +951,7 @@ because VMS does not distinguish between exit(0) and exit(1).
.sp .sp
.nf .nf
Philip Hazel Philip Hazel
University Computing Service Retired from University Computing Service
Cambridge, England. Cambridge, England.
.fi .fi
. .
@ -955,6 +960,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 04 October 2020 Last updated: 31 August 2021
Copyright (c) 1997-2020 University of Cambridge. Copyright (c) 1997-2021 University of Cambridge.
.fi .fi

View File

@ -158,6 +158,12 @@ OPTIONS
Treat binary files as text. This is equivalent to --binary- Treat binary files as text. This is equivalent to --binary-
files=text. files=text.
--allow-lookaround-bsk
PCRE2 now forbids the use of \K in lookarounds by default, in
line with Perl. This option causes pcre2grep to set the
PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option, which enables this
somewhat dangerous usage.
-B number, --before-context=number -B number, --before-context=number
Output up to number lines of context before each matching Output up to number lines of context before each matching
line. Fewer lines are output if the previous match or the line. Fewer lines are output if the previous match or the
@ -1010,11 +1016,11 @@ SEE ALSO
AUTHOR AUTHOR
Philip Hazel Philip Hazel
University Computing Service Retired from University Computing Service
Cambridge, England. Cambridge, England.
REVISION REVISION
Last updated: 04 October 2020 Last updated: 31 August 2021
Copyright (c) 1997-2020 University of Cambridge. Copyright (c) 1997-2021 University of Cambridge.

View File

@ -423,6 +423,7 @@ used to identify them. */
#define N_OM_SEPARATOR (-22) #define N_OM_SEPARATOR (-22)
#define N_MAX_BUFSIZE (-23) #define N_MAX_BUFSIZE (-23)
#define N_OM_CAPTURE (-24) #define N_OM_CAPTURE (-24)
#define N_ALLABSK (-25)
static option_item optionlist[] = { static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", "terminate options" }, { OP_NODATA, N_NULL, NULL, "", "terminate options" },
@ -490,6 +491,7 @@ static option_item optionlist[] = {
{ OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
{ OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
{ OP_NODATA, 0, NULL, NULL, NULL } { OP_NODATA, 0, NULL, NULL, NULL }
}; };
@ -3544,7 +3546,7 @@ return rc;
/************************************************* /*************************************************
* Handle a single-letter, no data option * * Handle a no-data option *
*************************************************/ *************************************************/
static int static int
@ -3557,6 +3559,7 @@ switch(letter)
case N_LBUFFER: line_buffered = TRUE; break; case N_LBUFFER: line_buffered = TRUE; break;
case N_LOFFSETS: line_offsets = number = TRUE; break; case N_LOFFSETS: line_offsets = number = TRUE; break;
case N_NOJIT: use_jit = FALSE; break; case N_NOJIT: use_jit = FALSE; break;
case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
case 'a': binary_files = BIN_TEXT; break; case 'a': binary_files = BIN_TEXT; break;
case 'c': count_only = TRUE; break; case 'c': count_only = TRUE; break;
case 'F': options |= PCRE2_LITERAL; break; case 'F': options |= PCRE2_LITERAL; break;