Documentation detrail and make HTML for 10.22-RC1.
This commit is contained in:
parent
431d41cb2d
commit
921636f6fc
|
@ -511,10 +511,16 @@ line in which the match ended. If the matched string ends with a newline
|
|||
sequence the output ends at the end of that line.
|
||||
<br>
|
||||
<br>
|
||||
When this option is set, the PCRE2 library is called in "multiline" mode.
|
||||
However, <b>pcre2grep</b> still processes the input line by line. The difference
|
||||
is that a matched string may extend past the end of a line and continue on
|
||||
one or more subsequent lines. The newline sequence must be matched as part of
|
||||
When this option is set, the PCRE2 library is called in "multiline" mode. This
|
||||
allows a matched string to extend past the end of a line and continue on one or
|
||||
more subsequent lines. However, <b>pcre2grep</b> still processes the input line
|
||||
by line. Once a match has been handled, scanning restarts at the beginning of
|
||||
the next line, just as it does when <b>-M</b> is not present. This means that it
|
||||
is possible for the second or subsequent lines in a multiline match to be
|
||||
output again as part of another match.
|
||||
<br>
|
||||
<br>
|
||||
The newline sequence that separates multiple lines must be matched as part of
|
||||
the pattern. For example, to find the phrase "regular expression" in a file
|
||||
where "regular" might be at the end of a line and "expression" at the start of
|
||||
the next line, you could use this command:
|
||||
|
@ -825,7 +831,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 06 April 2016
|
||||
Last updated: 19 June 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -1256,17 +1256,22 @@ build PCRE2 with the use of \C permanently disabled.
|
|||
<P>
|
||||
PCRE2 does not allow \C to appear in lookbehind assertions
|
||||
<a href="#lookbehind">(described below)</a>
|
||||
in a UTF mode, because this would make it impossible to calculate the length of
|
||||
the lookbehind. Neither the alternative matching function
|
||||
<b>pcre2_dfa_match()</b> nor the JIT optimizer support \C in a UTF mode. The
|
||||
former gives a match-time error; the latter fails to optimize and so the match
|
||||
is always run using the interpreter.
|
||||
in UTF-8 or UTF-16 modes, because this would make it impossible to calculate
|
||||
the length of the lookbehind. Neither the alternative matching function
|
||||
<b>pcre2_dfa_match()</b> nor the JIT optimizer support \C in these UTF modes.
|
||||
The former gives a match-time error; the latter fails to optimize and so the
|
||||
match is always run using the interpreter.
|
||||
</P>
|
||||
<P>
|
||||
In the 32-bit library, however, \C is always supported (when not explicitly
|
||||
locked out) because it always matches a single code unit, whether or not UTF-32
|
||||
is specified.
|
||||
</P>
|
||||
<P>
|
||||
In general, the \C escape sequence is best avoided. However, one way of using
|
||||
it that avoids the problem of malformed UTF characters is to use a lookahead to
|
||||
check the length of the next character, as in this pattern, which could be used
|
||||
with a UTF-8 string (ignore white space and line breaks):
|
||||
it that avoids the problem of malformed UTF-8 or UTF-16 characters is to use a
|
||||
lookahead to check the length of the next character, as in this pattern, which
|
||||
could be used with a UTF-8 string (ignore white space and line breaks):
|
||||
<pre>
|
||||
(?| (?=[\x00-\x7f])(\C) |
|
||||
(?=[\x80-\x{7ff}])(\C)(\C) |
|
||||
|
@ -3388,9 +3393,9 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 13 November 2015
|
||||
Last updated: 20 June 2016
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -6338,17 +6338,21 @@ MATCHING A SINGLE CODE UNIT
|
|||
possible to build PCRE2 with the use of \C permanently disabled.
|
||||
|
||||
PCRE2 does not allow \C to appear in lookbehind assertions (described
|
||||
below) in a UTF mode, because this would make it impossible to calcu-
|
||||
late the length of the lookbehind. Neither the alternative matching
|
||||
function pcre2_dfa_match() nor the JIT optimizer support \C in a UTF
|
||||
mode. The former gives a match-time error; the latter fails to optimize
|
||||
and so the match is always run using the interpreter.
|
||||
below) in UTF-8 or UTF-16 modes, because this would make it impossible
|
||||
to calculate the length of the lookbehind. Neither the alternative
|
||||
matching function pcre2_dfa_match() nor the JIT optimizer support \C in
|
||||
these UTF modes. The former gives a match-time error; the latter fails
|
||||
to optimize and so the match is always run using the interpreter.
|
||||
|
||||
In the 32-bit library, however, \C is always supported (when not
|
||||
explicitly locked out) because it always matches a single code unit,
|
||||
whether or not UTF-32 is specified.
|
||||
|
||||
In general, the \C escape sequence is best avoided. However, one way of
|
||||
using it that avoids the problem of malformed UTF characters is to use
|
||||
a lookahead to check the length of the next character, as in this pat-
|
||||
tern, which could be used with a UTF-8 string (ignore white space and
|
||||
line breaks):
|
||||
using it that avoids the problem of malformed UTF-8 or UTF-16 charac-
|
||||
ters is to use a lookahead to check the length of the next character,
|
||||
as in this pattern, which could be used with a UTF-8 string (ignore
|
||||
white space and line breaks):
|
||||
|
||||
(?| (?=[\x00-\x7f])(\C) |
|
||||
(?=[\x80-\x{7ff}])(\C)(\C) |
|
||||
|
@ -8363,8 +8367,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 13 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 20 June 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
|
@ -493,14 +493,20 @@ OPTIONS
|
|||
end of that line.
|
||||
|
||||
When this option is set, the PCRE2 library is called in "mul-
|
||||
tiline" mode. However, pcre2grep still processes the input
|
||||
line by line. The difference is that a matched string may
|
||||
extend past the end of a line and continue on one or more
|
||||
subsequent lines. The newline sequence must be matched as
|
||||
part of the pattern. For example, to find the phrase "regular
|
||||
expression" in a file where "regular" might be at the end of
|
||||
a line and "expression" at the start of the next line, you
|
||||
could use this command:
|
||||
tiline" mode. This allows a matched string to extend past the
|
||||
end of a line and continue on one or more subsequent lines.
|
||||
However, pcre2grep still processes the input line by line.
|
||||
Once a match has been handled, scanning restarts at the
|
||||
beginning of the next line, just as it does when -M is not
|
||||
present. This means that it is possible for the second or
|
||||
subsequent lines in a multiline match to be output again as
|
||||
part of another match.
|
||||
|
||||
The newline sequence that separates multiple lines must be
|
||||
matched as part of the pattern. For example, to find the
|
||||
phrase "regular expression" in a file where "regular" might
|
||||
be at the end of a line and "expression" at the start of the
|
||||
next line, you could use this command:
|
||||
|
||||
pcre2grep -M 'regular\s+expression' <file>
|
||||
|
||||
|
@ -816,5 +822,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 06 April 2016
|
||||
Last updated: 19 June 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
|
|
|
@ -111,6 +111,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
/* #undef HAVE_SYS_TYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
/* #undef HAVE_SYS_WAIT_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
/* #undef HAVE_UNISTD_H */
|
||||
|
||||
|
@ -203,7 +206,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.21"
|
||||
#define PACKAGE_STRING "PCRE2 10.22-RC1"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -212,7 +215,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.21"
|
||||
#define PACKAGE_VERSION "10.22-RC1"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -271,6 +274,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
is able to handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to any value to enable callout script support in pcre2grep. */
|
||||
/* #undef SUPPORT_PCRE2GREP_CALLOUT */
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. */
|
||||
/* #undef SUPPORT_PCRE2GREP_JIT */
|
||||
|
||||
|
@ -293,7 +299,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.21"
|
||||
#define VERSION "10.22-RC1"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
|
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 21
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2016-01-12
|
||||
#define PCRE2_MINOR 22
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2016-06-29
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -146,7 +146,8 @@ sanity checks). */
|
|||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* These are additional options for pcre2_substitute(). */
|
||||
/* These are additional options for pcre2_substitute(), which passes any others
|
||||
through to pcre2_match(). */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||
|
@ -154,6 +155,11 @@ sanity checks). */
|
|||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
|
||||
|
||||
/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(),
|
||||
ignored for pcre2_jit_match(). */
|
||||
|
||||
#define PCRE2_NO_JIT 0x00002000u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
@ -245,6 +251,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -436,7 +443,9 @@ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
|||
PCRE2_EXP_DECL \
|
||||
pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
|
||||
int *, PCRE2_SIZE *, pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
|
||||
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_code *pcre2_code_copy(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
@ -585,6 +594,7 @@ pcre2_compile are called by application code. */
|
|||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
|
|
Loading…
Reference in New Issue