Implement PCRE2_NEWLINE_NUL.
This commit is contained in:
parent
772d857f69
commit
3d80fa4fc2
|
@ -160,7 +160,7 @@ SET(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING
|
|||
"Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_NEWLINE "LF" CACHE STRING
|
||||
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
|
||||
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).")
|
||||
|
||||
SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
|
||||
"Obsolete option: do not use")
|
||||
|
@ -344,6 +344,9 @@ ENDIF(PCRE2_NEWLINE STREQUAL "ANY")
|
|||
IF(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||
SET(NEWLINE_DEFAULT "5")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||
IF(PCRE2_NEWLINE STREQUAL "NUL")
|
||||
SET(NEWLINE_DEFAULT "6")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "NUL")
|
||||
|
||||
IF(NEWLINE_DEFAULT STREQUAL "")
|
||||
MESSAGE(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
|
||||
|
|
|
@ -169,6 +169,7 @@ all the tests can run with clang's sanitizing options.
|
|||
33. Implement extra compile options in the compile context and add the first
|
||||
one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
|
||||
|
||||
34. Implement newline type PCRE2_NEWLINE_NUL.
|
||||
|
||||
|
||||
Version 10.23 14-February-2017
|
||||
|
|
|
@ -662,6 +662,11 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
|
|||
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "abc\0def" >testNinputgrep
|
||||
|
||||
printf "%c--------------------------- Test N7 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutputN testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
|
|
@ -189,6 +189,10 @@ AC_ARG_ENABLE(newline-is-any,
|
|||
AS_HELP_STRING([--enable-newline-is-any],
|
||||
[use any valid Unicode newline sequence]),
|
||||
ac_pcre2_newline=any)
|
||||
AC_ARG_ENABLE(newline-is-nul,
|
||||
AS_HELP_STRING([--enable-newline-is-nul],
|
||||
[use NUL (binary zero) as newline character]),
|
||||
ac_pcre2_newline=nul)
|
||||
enable_newline="$ac_pcre2_newline"
|
||||
|
||||
# Handle --enable-bsr-anycrlf
|
||||
|
@ -360,6 +364,7 @@ case "$enable_newline" in
|
|||
crlf) ac_pcre2_newline_value=3 ;;
|
||||
any) ac_pcre2_newline_value=4 ;;
|
||||
anycrlf) ac_pcre2_newline_value=5 ;;
|
||||
nul) ac_pcre2_newline_value=6 ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
|
||||
;;
|
||||
|
@ -658,7 +663,7 @@ AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
|||
The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||
and 5 (ANYCRLF).])
|
||||
5 (ANYCRLF), and 6 (NUL).])
|
||||
|
||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||
|
|
|
@ -57,6 +57,7 @@ point to a uint32_t integer variable. The available codes are:
|
|||
PCRE2_NEWLINE_CRLF
|
||||
PCRE2_NEWLINE_ANY
|
||||
PCRE2_NEWLINE_ANYCRLF
|
||||
PCRE2_NEWLINE_NUL
|
||||
PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit
|
||||
PCRE2_CONFIG_RECURSIONLIMIT Obsolete: use PCRE2_CONFIG_DEPTHLIMIT
|
||||
PCRE2_CONFIG_STACKRECURSE Obsolete: always returns 0
|
||||
|
|
|
@ -71,6 +71,7 @@ request are as follows:
|
|||
PCRE2_NEWLINE_CRLF
|
||||
PCRE2_NEWLINE_ANY
|
||||
PCRE2_NEWLINE_ANYCRLF
|
||||
PCRE2_NEWLINE_NUL
|
||||
PCRE2_INFO_RECURSIONLIMIT Obsolete synonym for PCRE2_INFO_DEPTHLIMIT
|
||||
PCRE2_INFO_SIZE Size of compiled pattern
|
||||
</pre>
|
||||
|
|
|
@ -35,6 +35,7 @@ matching patterns. The second argument must be one of:
|
|||
PCRE2_NEWLINE_CRLF CR followed by LF only
|
||||
PCRE2_NEWLINE_ANYCRLF Any of the above
|
||||
PCRE2_NEWLINE_ANY Any Unicode newline sequence
|
||||
PCRE2_NEWLINE_NUL The NUL character (binary zero)
|
||||
</pre>
|
||||
The result is zero for success or PCRE2_ERROR_BADDATA if the second argument is
|
||||
invalid.
|
||||
|
|
|
@ -783,8 +783,9 @@ PCRE2_SIZE variable can hold, which is effectively unlimited.
|
|||
This specifies which characters or character sequences are to be recognized as
|
||||
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
|
||||
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
||||
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
|
||||
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
||||
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above),
|
||||
PCRE2_NEWLINE_ANY (any Unicode newline sequence), or PCRE2_NEWLINE_NUL (the
|
||||
NUL character, that is a binary zero).
|
||||
</P>
|
||||
<P>
|
||||
A pattern can override the value set in the compile context by starting with a
|
||||
|
@ -1106,6 +1107,7 @@ sequence that is recognized as meaning "newline". The values are:
|
|||
PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF)
|
||||
PCRE2_NEWLINE_ANY Any Unicode line ending
|
||||
PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF
|
||||
PCRE2_NEWLINE_NUL The NUL character (binary zero)
|
||||
</pre>
|
||||
The default should normally correspond to the standard sequence for your
|
||||
operating system.
|
||||
|
@ -2121,6 +2123,7 @@ The output is one of the following <b>uint32_t</b> values:
|
|||
PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF)
|
||||
PCRE2_NEWLINE_ANY Any Unicode line ending
|
||||
PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF
|
||||
PCRE2_NEWLINE_NUL The NUL character (binary zero)
|
||||
</pre>
|
||||
This identifies the character sequence that will be recognized as meaning
|
||||
"newline" while matching.
|
||||
|
@ -3468,7 +3471,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 17 May 2017
|
||||
Last updated: 26 May 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -142,9 +142,11 @@ standard input is always so treated.
|
|||
<br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
|
||||
<P>
|
||||
By default, a file that contains a binary zero byte within the first 1024 bytes
|
||||
is identified as a binary file, and is processed specially. (GNU grep also
|
||||
identifies binary files in this manner.) See the <b>--binary-files</b> option
|
||||
for a means of changing the way binary files are handled.
|
||||
is identified as a binary file, and is processed specially. (GNU grep
|
||||
identifies binary files in this manner.) However, if the newline type is
|
||||
specified as "nul", that is, the line terminator is a binary zero, the test for
|
||||
a binary file is not applied. See the <b>--binary-files</b> option for a means
|
||||
of changing the way binary files are handled.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
|
||||
<P>
|
||||
|
@ -934,7 +936,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 11 April 2017
|
||||
Last updated: 26 May 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -214,10 +214,10 @@ amount of system stack that is used.
|
|||
Newline conventions
|
||||
</b><br>
|
||||
<P>
|
||||
PCRE2 supports five different conventions for indicating line breaks in
|
||||
PCRE2 supports six different conventions for indicating line breaks in
|
||||
strings: a single CR (carriage return) character, a single LF (linefeed)
|
||||
character, the two-character sequence CRLF, any of the three preceding, or any
|
||||
Unicode newline sequence. The
|
||||
character, the two-character sequence CRLF, any of the three preceding, any
|
||||
Unicode newline sequence, or the NUL character (binary zero). The
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page has
|
||||
<a href="pcre2api.html#newlines">further discussion</a>
|
||||
|
@ -226,13 +226,14 @@ about newlines, and shows how to set the newline convention when calling
|
|||
</P>
|
||||
<P>
|
||||
It is also possible to specify a newline convention by starting a pattern
|
||||
string with one of the following five sequences:
|
||||
string with one of the following sequences:
|
||||
<pre>
|
||||
(*CR) carriage return
|
||||
(*LF) linefeed
|
||||
(*CRLF) carriage return, followed by linefeed
|
||||
(*ANYCRLF) any of the three above
|
||||
(*ANY) all Unicode newline sequences
|
||||
(*NUL) the NUL character (binary zero)
|
||||
</pre>
|
||||
These override the default and the options given to the compiling function. For
|
||||
example, on a Unix system where LF is the default newline sequence, the pattern
|
||||
|
@ -3444,7 +3445,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 18 April 2017
|
||||
Last updated: 26 May 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -468,6 +468,7 @@ settings with a similar syntax.
|
|||
(*CRLF) carriage return followed by linefeed
|
||||
(*ANYCRLF) all three of the above
|
||||
(*ANY) any Unicode newline sequence
|
||||
(*NUL) the NUL character (binary zero)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
|
@ -598,7 +599,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 18 April 2017
|
||||
Last updated: 26 May 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -182,7 +182,7 @@ following options output the value and set the exit code as indicated:
|
|||
linksize the configured internal link size (2, 3, or 4)
|
||||
exit code is set to the link size
|
||||
newline the default newline setting:
|
||||
CR, LF, CRLF, ANYCRLF, or ANY
|
||||
CR, LF, CRLF, ANYCRLF, ANY, or NUL
|
||||
exit code is always 0
|
||||
bsr the default setting for what \R matches:
|
||||
ANYCRLF or ANY
|
||||
|
@ -367,8 +367,8 @@ when PCRE2 is compiled with either CR or CRLF as the default newline.
|
|||
</P>
|
||||
<P>
|
||||
The #newline_default command specifies a list of newline types that are
|
||||
acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, or
|
||||
ANY (in upper or lower case), for example:
|
||||
acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF,
|
||||
ANY, or NUL (in upper or lower case), for example:
|
||||
<pre>
|
||||
#newline_default LF Any anyCRLF
|
||||
</pre>
|
||||
|
@ -655,7 +655,7 @@ is built, with the default default being Unicode.
|
|||
<P>
|
||||
The <b>newline</b> modifier specifies which characters are to be interpreted as
|
||||
newlines, both in the pattern and in subject lines. The type must be one of CR,
|
||||
LF, CRLF, ANYCRLF, or ANY (in upper or lower case).
|
||||
LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case).
|
||||
</P>
|
||||
<br><b>
|
||||
Information about a pattern
|
||||
|
@ -1816,7 +1816,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 17 May 2017
|
||||
Last updated: 26 May 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -817,7 +817,8 @@ PCRE2 CONTEXTS
|
|||
nized as newlines. The value must be one of PCRE2_NEWLINE_CR (carriage
|
||||
return only), PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the
|
||||
two-character sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any
|
||||
of the above), or PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
||||
of the above), PCRE2_NEWLINE_ANY (any Unicode newline sequence), or
|
||||
PCRE2_NEWLINE_NUL (the NUL character, that is a binary zero).
|
||||
|
||||
A pattern can override the value set in the compile context by starting
|
||||
with a sequence such as (*CRLF). See the pcre2pattern page for details.
|
||||
|
@ -1110,6 +1111,7 @@ CHECKING BUILD-TIME OPTIONS
|
|||
PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF)
|
||||
PCRE2_NEWLINE_ANY Any Unicode line ending
|
||||
PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF
|
||||
PCRE2_NEWLINE_NUL The NUL character (binary zero)
|
||||
|
||||
The default should normally correspond to the standard sequence for
|
||||
your operating system.
|
||||
|
@ -2098,6 +2100,7 @@ INFORMATION ABOUT A COMPILED PATTERN
|
|||
PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF)
|
||||
PCRE2_NEWLINE_ANY Any Unicode line ending
|
||||
PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF
|
||||
PCRE2_NEWLINE_NUL The NUL character (binary zero)
|
||||
|
||||
This identifies the character sequence that will be recognized as mean-
|
||||
ing "newline" while matching.
|
||||
|
@ -3347,7 +3350,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 17 May 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -5756,21 +5759,22 @@ SPECIAL START-OF-PATTERN ITEMS
|
|||
|
||||
Newline conventions
|
||||
|
||||
PCRE2 supports five different conventions for indicating line breaks in
|
||||
PCRE2 supports six different conventions for indicating line breaks in
|
||||
strings: a single CR (carriage return) character, a single LF (line-
|
||||
feed) character, the two-character sequence CRLF, any of the three pre-
|
||||
ceding, or any Unicode newline sequence. The pcre2api page has further
|
||||
discussion about newlines, and shows how to set the newline convention
|
||||
when calling pcre2_compile().
|
||||
ceding, any Unicode newline sequence, or the NUL character (binary
|
||||
zero). The pcre2api page has further discussion about newlines, and
|
||||
shows how to set the newline convention when calling pcre2_compile().
|
||||
|
||||
It is also possible to specify a newline convention by starting a pat-
|
||||
tern string with one of the following five sequences:
|
||||
tern string with one of the following sequences:
|
||||
|
||||
(*CR) carriage return
|
||||
(*LF) linefeed
|
||||
(*CRLF) carriage return, followed by linefeed
|
||||
(*ANYCRLF) any of the three above
|
||||
(*ANY) all Unicode newline sequences
|
||||
(*NUL) the NUL character (binary zero)
|
||||
|
||||
These override the default and the options given to the compiling func-
|
||||
tion. For example, on a Unix system where LF is the default newline
|
||||
|
@ -8682,7 +8686,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 18 April 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -9773,6 +9777,7 @@ NEWLINE CONVENTION
|
|||
(*CRLF) carriage return followed by linefeed
|
||||
(*ANYCRLF) all three of the above
|
||||
(*ANY) any Unicode newline sequence
|
||||
(*NUL) the NUL character (binary zero)
|
||||
|
||||
|
||||
WHAT \R MATCHES
|
||||
|
@ -9901,7 +9906,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 18 April 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_CONFIG 3 "11 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_CONFIG 3 "26 May 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -47,6 +47,7 @@ point to a uint32_t integer variable. The available codes are:
|
|||
PCRE2_NEWLINE_CRLF
|
||||
PCRE2_NEWLINE_ANY
|
||||
PCRE2_NEWLINE_ANYCRLF
|
||||
PCRE2_NEWLINE_NUL
|
||||
PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit
|
||||
PCRE2_CONFIG_RECURSIONLIMIT Obsolete: use PCRE2_CONFIG_DEPTHLIMIT
|
||||
PCRE2_CONFIG_STACKRECURSE Obsolete: always returns 0
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_PATTERN_INFO 3 "11 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_PATTERN_INFO 3 "26 May 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -71,6 +71,7 @@ request are as follows:
|
|||
PCRE2_NEWLINE_CRLF
|
||||
PCRE2_NEWLINE_ANY
|
||||
PCRE2_NEWLINE_ANYCRLF
|
||||
PCRE2_NEWLINE_NUL
|
||||
PCRE2_INFO_RECURSIONLIMIT Obsolete synonym for PCRE2_INFO_DEPTHLIMIT
|
||||
PCRE2_INFO_SIZE Size of compiled pattern
|
||||
.sp
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_SET_NEWLINE 3 "22 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_SET_NEWLINE 3 "26 May 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -23,6 +23,7 @@ matching patterns. The second argument must be one of:
|
|||
PCRE2_NEWLINE_CRLF CR followed by LF only
|
||||
PCRE2_NEWLINE_ANYCRLF Any of the above
|
||||
PCRE2_NEWLINE_ANY Any Unicode newline sequence
|
||||
PCRE2_NEWLINE_NUL The NUL character (binary zero)
|
||||
.sp
|
||||
The result is zero for success or PCRE2_ERROR_BADDATA if the second argument is
|
||||
invalid.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "17 May 2017" "PCRE2 10.30"
|
||||
.TH PCRE2API 3 "26 May 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -724,8 +724,9 @@ PCRE2_SIZE variable can hold, which is effectively unlimited.
|
|||
This specifies which characters or character sequences are to be recognized as
|
||||
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
|
||||
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
||||
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
|
||||
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
||||
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above),
|
||||
PCRE2_NEWLINE_ANY (any Unicode newline sequence), or PCRE2_NEWLINE_NUL (the
|
||||
NUL character, that is a binary zero).
|
||||
.P
|
||||
A pattern can override the value set in the compile context by starting with a
|
||||
sequence such as (*CRLF). See the
|
||||
|
@ -1039,6 +1040,7 @@ sequence that is recognized as meaning "newline". The values are:
|
|||
PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF)
|
||||
PCRE2_NEWLINE_ANY Any Unicode line ending
|
||||
PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF
|
||||
PCRE2_NEWLINE_NUL The NUL character (binary zero)
|
||||
.sp
|
||||
The default should normally correspond to the standard sequence for your
|
||||
operating system.
|
||||
|
@ -2090,6 +2092,7 @@ The output is one of the following \fBuint32_t\fP values:
|
|||
PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF)
|
||||
PCRE2_NEWLINE_ANY Any Unicode line ending
|
||||
PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF
|
||||
PCRE2_NEWLINE_NUL The NUL character (binary zero)
|
||||
.sp
|
||||
This identifies the character sequence that will be recognized as meaning
|
||||
"newline" while matching.
|
||||
|
@ -3488,6 +3491,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 17 May 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2GREP 1 "11 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2GREP 1 "26 May 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -112,9 +112,11 @@ standard input is always so treated.
|
|||
.rs
|
||||
.sp
|
||||
By default, a file that contains a binary zero byte within the first 1024 bytes
|
||||
is identified as a binary file, and is processed specially. (GNU grep also
|
||||
identifies binary files in this manner.) See the \fB--binary-files\fP option
|
||||
for a means of changing the way binary files are handled.
|
||||
is identified as a binary file, and is processed specially. (GNU grep
|
||||
identifies binary files in this manner.) However, if the newline type is
|
||||
specified as "nul", that is, the line terminator is a binary zero, the test for
|
||||
a binary file is not applied. See the \fB--binary-files\fP option for a means
|
||||
of changing the way binary files are handled.
|
||||
.
|
||||
.
|
||||
.SH OPTIONS
|
||||
|
@ -848,6 +850,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 11 April 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -114,7 +114,9 @@ BINARY FILES
|
|||
|
||||
By default, a file that contains a binary zero byte within the first
|
||||
1024 bytes is identified as a binary file, and is processed specially.
|
||||
(GNU grep also identifies binary files in this manner.) See the
|
||||
(GNU grep identifies binary files in this manner.) However, if the new-
|
||||
line type is specified as "nul", that is, the line terminator is a
|
||||
binary zero, the test for a binary file is not applied. See the
|
||||
--binary-files option for a means of changing the way binary files are
|
||||
handled.
|
||||
|
||||
|
@ -915,5 +917,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 11 April 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2PATTERN 3 "18 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2PATTERN 3 "26 May 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||
|
@ -180,10 +180,10 @@ amount of system stack that is used.
|
|||
.SS "Newline conventions"
|
||||
.rs
|
||||
.sp
|
||||
PCRE2 supports five different conventions for indicating line breaks in
|
||||
PCRE2 supports six different conventions for indicating line breaks in
|
||||
strings: a single CR (carriage return) character, a single LF (linefeed)
|
||||
character, the two-character sequence CRLF, any of the three preceding, or any
|
||||
Unicode newline sequence. The
|
||||
character, the two-character sequence CRLF, any of the three preceding, any
|
||||
Unicode newline sequence, or the NUL character (binary zero). The
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
|
@ -196,13 +196,14 @@ about newlines, and shows how to set the newline convention when calling
|
|||
\fBpcre2_compile()\fP.
|
||||
.P
|
||||
It is also possible to specify a newline convention by starting a pattern
|
||||
string with one of the following five sequences:
|
||||
string with one of the following sequences:
|
||||
.sp
|
||||
(*CR) carriage return
|
||||
(*LF) linefeed
|
||||
(*CRLF) carriage return, followed by linefeed
|
||||
(*ANYCRLF) any of the three above
|
||||
(*ANY) all Unicode newline sequences
|
||||
(*NUL) the NUL character (binary zero)
|
||||
.sp
|
||||
These override the default and the options given to the compiling function. For
|
||||
example, on a Unix system where LF is the default newline sequence, the pattern
|
||||
|
@ -3474,6 +3475,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 18 April 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2SYNTAX 3 "18 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2SYNTAX 3 "26 May 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
|
||||
|
@ -447,6 +447,7 @@ settings with a similar syntax.
|
|||
(*CRLF) carriage return followed by linefeed
|
||||
(*ANYCRLF) all three of the above
|
||||
(*ANY) any Unicode newline sequence
|
||||
(*NUL) the NUL character (binary zero)
|
||||
.
|
||||
.
|
||||
.SH "WHAT \eR MATCHES"
|
||||
|
@ -587,6 +588,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 18 April 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "17 May 2017" "PCRE 10.30"
|
||||
.TH PCRE2TEST 1 "26 May 2017" "PCRE 10.30"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -150,7 +150,7 @@ following options output the value and set the exit code as indicated:
|
|||
linksize the configured internal link size (2, 3, or 4)
|
||||
exit code is set to the link size
|
||||
newline the default newline setting:
|
||||
CR, LF, CRLF, ANYCRLF, or ANY
|
||||
CR, LF, CRLF, ANYCRLF, ANY, or NUL
|
||||
exit code is always 0
|
||||
bsr the default setting for what \eR matches:
|
||||
ANYCRLF or ANY
|
||||
|
@ -320,8 +320,8 @@ recognized as a newline by default. Without special action the tests would fail
|
|||
when PCRE2 is compiled with either CR or CRLF as the default newline.
|
||||
.P
|
||||
The #newline_default command specifies a list of newline types that are
|
||||
acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, or
|
||||
ANY (in upper or lower case), for example:
|
||||
acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF,
|
||||
ANY, or NUL (in upper or lower case), for example:
|
||||
.sp
|
||||
#newline_default LF Any anyCRLF
|
||||
.sp
|
||||
|
@ -617,7 +617,7 @@ is built, with the default default being Unicode.
|
|||
.P
|
||||
The \fBnewline\fP modifier specifies which characters are to be interpreted as
|
||||
newlines, both in the pattern and in subject lines. The type must be one of CR,
|
||||
LF, CRLF, ANYCRLF, or ANY (in upper or lower case).
|
||||
LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case).
|
||||
.
|
||||
.
|
||||
.SS "Information about a pattern"
|
||||
|
@ -1792,6 +1792,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 17 May 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -141,7 +141,7 @@ COMMAND LINE OPTIONS
|
|||
linksize the configured internal link size (2, 3, or 4)
|
||||
exit code is set to the link size
|
||||
newline the default newline setting:
|
||||
CR, LF, CRLF, ANYCRLF, or ANY
|
||||
CR, LF, CRLF, ANYCRLF, ANY, or NUL
|
||||
exit code is always 0
|
||||
bsr the default setting for what \R matches:
|
||||
ANYCRLF or ANY
|
||||
|
@ -306,7 +306,7 @@ COMMAND LINES
|
|||
|
||||
The #newline_default command specifies a list of newline types that are
|
||||
acceptable as the default. The types must be one of CR, LF, CRLF, ANY-
|
||||
CRLF, or ANY (in upper or lower case), for example:
|
||||
CRLF, ANY, or NUL (in upper or lower case), for example:
|
||||
|
||||
#newline_default LF Any anyCRLF
|
||||
|
||||
|
@ -594,7 +594,7 @@ PATTERN MODIFIERS
|
|||
|
||||
The newline modifier specifies which characters are to be interpreted
|
||||
as newlines, both in the pattern and in subject lines. The type must be
|
||||
one of CR, LF, CRLF, ANYCRLF, or ANY (in upper or lower case).
|
||||
one of CR, LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case).
|
||||
|
||||
Information about a pattern
|
||||
|
||||
|
@ -1650,5 +1650,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 17 May 2017
|
||||
Last updated: 26 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
|
|
|
@ -179,8 +179,8 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||
(ANYCRLF). */
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||
(ANYCRLF), and 6 (NUL). */
|
||||
#undef NEWLINE_DEFAULT
|
||||
|
||||
/* Name of package */
|
||||
|
|
|
@ -201,6 +201,7 @@ greater than zero. */
|
|||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
#define PCRE2_NEWLINE_NUL 6
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
|
|
@ -201,6 +201,7 @@ greater than zero. */
|
|||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
#define PCRE2_NEWLINE_NUL 6
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
|
|
@ -760,6 +760,7 @@ static pso pso_list[] = {
|
|||
{ (uint8_t *)STRING_LF_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_LF },
|
||||
{ (uint8_t *)STRING_CRLF_RIGHTPAR, 5, PSO_NL, PCRE2_NEWLINE_CRLF },
|
||||
{ (uint8_t *)STRING_ANY_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_ANY },
|
||||
{ (uint8_t *)STRING_NUL_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_NUL },
|
||||
{ (uint8_t *)STRING_ANYCRLF_RIGHTPAR, 8, PSO_NL, PCRE2_NEWLINE_ANYCRLF },
|
||||
{ (uint8_t *)STRING_BSR_ANYCRLF_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_ANYCRLF },
|
||||
{ (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE }
|
||||
|
@ -1911,7 +1912,7 @@ if (c == CHAR_LEFT_CURLY_BRACKET)
|
|||
{
|
||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||
c = *ptr++;
|
||||
if (c == CHAR_NULL) goto ERROR_RETURN;
|
||||
if (c == CHAR_NUL) goto ERROR_RETURN;
|
||||
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
|
||||
name[i] = c;
|
||||
}
|
||||
|
@ -6150,7 +6151,7 @@ for (;; pptr++)
|
|||
}
|
||||
else *callout_string++ = *pp++;
|
||||
}
|
||||
*callout_string++ = CHAR_NULL;
|
||||
*callout_string++ = CHAR_NUL;
|
||||
|
||||
/* Set the length of the entire item, the advance to its end. */
|
||||
|
||||
|
@ -9160,6 +9161,11 @@ switch(newline)
|
|||
cb.nl[0] = CHAR_NL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
cb.nllen = 1;
|
||||
cb.nl[0] = CHAR_NUL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
cb.nllen = 2;
|
||||
cb.nl[0] = CHAR_CR;
|
||||
|
|
|
@ -359,6 +359,7 @@ switch(newline)
|
|||
case PCRE2_NEWLINE_CRLF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
ccontext->newline_convention = newline;
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -505,7 +505,7 @@ class_index = 0;
|
|||
|
||||
while (TRUE)
|
||||
{
|
||||
if (*class_ptr == CHAR_NULL) return 0;
|
||||
if (*class_ptr == CHAR_NUL) return 0;
|
||||
|
||||
pattern = start;
|
||||
|
||||
|
@ -1021,7 +1021,7 @@ if (result == 0 || result == ERROR_NO_SLASH_Z)
|
|||
if (in_atomic)
|
||||
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||
|
||||
convert_glob_write(&out, CHAR_NULL);
|
||||
convert_glob_write(&out, CHAR_NUL);
|
||||
result = 0;
|
||||
|
||||
if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
|
||||
|
|
|
@ -3269,6 +3269,11 @@ switch(re->newline_convention)
|
|||
mb->nl[0] = CHAR_NL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
mb->nllen = 1;
|
||||
mb->nl[0] = CHAR_NUL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
mb->nllen = 2;
|
||||
mb->nl[0] = CHAR_CR;
|
||||
|
|
|
@ -307,8 +307,8 @@ else /* Invalid error number */
|
|||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NULL) {};
|
||||
if (*message == CHAR_NULL) return PCRE2_ERROR_BADDATA;
|
||||
while (*message++ != CHAR_NUL) {};
|
||||
if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
|
||||
for (i = 0; *message != 0; i++)
|
||||
|
|
|
@ -678,7 +678,7 @@ a positive value. */
|
|||
|
||||
/* The remaining definitions work in both environments. */
|
||||
|
||||
#define CHAR_NULL '\0'
|
||||
#define CHAR_NUL '\0'
|
||||
#define CHAR_HT '\t'
|
||||
#define CHAR_VT '\v'
|
||||
#define CHAR_FF '\f'
|
||||
|
@ -919,6 +919,7 @@ a positive value. */
|
|||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||
#define STRING_NUL_RIGHTPAR "NUL)"
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||
|
@ -956,7 +957,7 @@ only. */
|
|||
#define CHAR_ESC '\033'
|
||||
#define CHAR_DEL '\177'
|
||||
|
||||
#define CHAR_NULL '\0'
|
||||
#define CHAR_NUL '\0'
|
||||
#define CHAR_SPACE '\040'
|
||||
#define CHAR_EXCLAMATION_MARK '\041'
|
||||
#define CHAR_QUOTATION_MARK '\042'
|
||||
|
@ -1194,6 +1195,7 @@ only. */
|
|||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NUL_RIGHTPAR STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||
|
|
|
@ -6246,6 +6246,11 @@ switch(re->newline_convention)
|
|||
mb->nl[0] = CHAR_NL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
mb->nllen = 1;
|
||||
mb->nl[0] = CHAR_NUL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
mb->nllen = 2;
|
||||
mb->nl[0] = CHAR_CR;
|
||||
|
|
|
@ -404,7 +404,7 @@ static option_item optionlist[] = {
|
|||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
|
||||
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
|
||||
{ OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
|
||||
{ OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
|
||||
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
|
||||
|
@ -439,7 +439,7 @@ static option_item optionlist[] = {
|
|||
of PCRE2_NEWLINE_xx in pcre2.h. */
|
||||
|
||||
static const char *newlines[] = {
|
||||
"DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
|
||||
"DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
|
||||
|
||||
/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
|
||||
options. These set the 1, 2, and 4 bits in process_options, respectively. Note
|
||||
|
@ -1337,6 +1337,16 @@ switch(endlinetype)
|
|||
*lenptr = 0;
|
||||
return endptr;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
while (p < endptr && *p != '\0') p++;
|
||||
if (p < endptr)
|
||||
{
|
||||
*lenptr = 1;
|
||||
return p + 1;
|
||||
}
|
||||
*lenptr = 0;
|
||||
return endptr;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
for (;;)
|
||||
{
|
||||
|
@ -1488,6 +1498,11 @@ switch(endlinetype)
|
|||
while (p > startptr && p[-1] != '\n') p--;
|
||||
return p;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
p--;
|
||||
while (p > startptr && p[-1] != '\0') p--;
|
||||
return p;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
for (;;)
|
||||
{
|
||||
|
@ -2319,12 +2334,13 @@ endptr = main_buffer + bufflength;
|
|||
|
||||
/* Unless binary-files=text, see if we have a binary file. This uses the same
|
||||
rule as GNU grep, namely, a search for a binary zero byte near the start of the
|
||||
file. */
|
||||
file. However, when the newline convention is binary zero, we can't do this. */
|
||||
|
||||
if (binary_files != BIN_TEXT)
|
||||
{
|
||||
binary =
|
||||
memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
|
||||
if (endlinetype != PCRE2_NEWLINE_NUL)
|
||||
binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
|
||||
!= NULL;
|
||||
if (binary && binary_files == BIN_NOMATCH) return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -390,7 +390,7 @@ static cmdstruct cmdlist[] = {
|
|||
of PCRE2_NEWLINE_xx in pcre2.h. */
|
||||
|
||||
static const char *newlines[] = {
|
||||
"DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
|
||||
"DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
|
||||
|
||||
/* Structure and table for handling pattern conversion types. */
|
||||
|
||||
|
@ -4453,6 +4453,10 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
fprintf(outfile, "Forced newline is any Unicode newline\n");
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
fprintf(outfile, "Forced newline is NUL\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -7635,7 +7639,7 @@ printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
|
|||
printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
|
||||
printf(" jit just-in-time compiler supported [0, 1]\n");
|
||||
printf(" linksize internal link size [2, 3, 4]\n");
|
||||
printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
|
||||
printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
|
||||
printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
|
||||
printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
|
||||
printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
|
||||
|
|
|
@ -13,4 +13,5 @@ jkl---------------------------- Test N5 ------------------------------
|
|||
4:jkl---------------------------- Test N6 ------------------------------
|
||||
1:abc
2:def
|
||||
3:ghi
|
||||
4:jkl
|
||||
4:jkl---------------------------- Test N7 ------------------------------
|
||||
1:abcZERO2:def
|
|
@ -2258,6 +2258,24 @@
|
|||
/(*CRLF).*/g
|
||||
abc\r\ndef
|
||||
|
||||
/(*NUL)^.*/
|
||||
a\nb\x00ccc
|
||||
|
||||
/(*NUL)^.*/s
|
||||
a\nb\x00ccc
|
||||
|
||||
/^x/m,newline=NUL
|
||||
ab\x00xy
|
||||
|
||||
/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex
|
||||
x\nyz
|
||||
|
||||
/(*NUL)^X\NY/
|
||||
X\nY
|
||||
X\rY
|
||||
\= Expect no match
|
||||
X\x00Y
|
||||
|
||||
/a\Rb/I,bsr=anycrlf
|
||||
a\rb
|
||||
a\nb
|
||||
|
|
|
@ -4911,4 +4911,22 @@
|
|||
\= Expect no match
|
||||
xyzabcdef
|
||||
|
||||
/(*NUL)^.*/
|
||||
a\nb\x00ccc
|
||||
|
||||
/(*NUL)^.*/s
|
||||
a\nb\x00ccc
|
||||
|
||||
/^x/m,newline=nul
|
||||
ab\x00xy
|
||||
|
||||
/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex
|
||||
x\nyz
|
||||
|
||||
/(*NUL)^X\NY/
|
||||
X\nY
|
||||
X\rY
|
||||
\= Expect no match
|
||||
X\x00Y
|
||||
|
||||
# End of testinput6
|
||||
|
|
|
@ -8314,6 +8314,31 @@ No match
|
|||
0: def
|
||||
0:
|
||||
|
||||
/(*NUL)^.*/
|
||||
a\nb\x00ccc
|
||||
0: a\x0ab
|
||||
|
||||
/(*NUL)^.*/s
|
||||
a\nb\x00ccc
|
||||
0: a\x0ab\x00ccc
|
||||
|
||||
/^x/m,newline=NUL
|
||||
ab\x00xy
|
||||
0: x
|
||||
|
||||
/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex
|
||||
x\nyz
|
||||
0: x\x0ay
|
||||
|
||||
/(*NUL)^X\NY/
|
||||
X\nY
|
||||
0: X\x0aY
|
||||
X\rY
|
||||
0: X\x0dY
|
||||
\= Expect no match
|
||||
X\x00Y
|
||||
No match
|
||||
|
||||
/a\Rb/I,bsr=anycrlf
|
||||
Capturing subpattern count = 0
|
||||
\R matches CR, LF, or CRLF
|
||||
|
|
|
@ -7721,4 +7721,29 @@ Failed: error -34: bad option value
|
|||
xyzabcdef
|
||||
No match
|
||||
|
||||
/(*NUL)^.*/
|
||||
a\nb\x00ccc
|
||||
0: a\x0ab
|
||||
|
||||
/(*NUL)^.*/s
|
||||
a\nb\x00ccc
|
||||
0: a\x0ab\x00ccc
|
||||
|
||||
/^x/m,newline=nul
|
||||
ab\x00xy
|
||||
0: x
|
||||
|
||||
/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex
|
||||
x\nyz
|
||||
0: x\x0ay
|
||||
|
||||
/(*NUL)^X\NY/
|
||||
X\nY
|
||||
0: X\x0aY
|
||||
X\rY
|
||||
0: X\x0dY
|
||||
\= Expect no match
|
||||
X\x00Y
|
||||
No match
|
||||
|
||||
# End of testinput6
|
||||
|
|
Loading…
Reference in New Issue