API documentation and a lot of little related changes to the code.
This commit is contained in:
parent
de4f203346
commit
eee8530add
|
@ -149,8 +149,8 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
|
|||
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
|
||||
"Enable use of Just-in-time compiling in pcre2grep.")
|
||||
|
||||
SET(PCRE2_SUPPORT_UTF OFF CACHE BOOL
|
||||
"Enable support for Unicode Transformation Format (UTF-8/UTF-16/UTF-32) encoding.")
|
||||
SET(PCRE2_SUPPORT_UNICODE OFF CACHE BOOL
|
||||
"Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
|
||||
|
||||
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
||||
|
@ -245,9 +245,9 @@ IF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
|||
SET(BSR_ANYCRLF 1)
|
||||
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||
|
||||
IF(PCRE2_SUPPORT_UTF)
|
||||
SET(SUPPORT_UTF 1)
|
||||
ENDIF(PCRE2_SUPPORT_UTF)
|
||||
IF(PCRE2_SUPPORT_UNICODE)
|
||||
SET(SUPPORT_UNICODE 1)
|
||||
ENDIF(PCRE2_SUPPORT_UNICODE)
|
||||
|
||||
IF(PCRE2_SUPPORT_JIT)
|
||||
SET(SUPPORT_JIT 1)
|
||||
|
@ -709,7 +709,7 @@ IF(PCRE2_SHOW_REPORT)
|
|||
MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE16}")
|
||||
MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE32}")
|
||||
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
|
||||
MESSAGE(STATUS " Enable UTF support .............. : ${PCRE2_SUPPORT_UTF}")
|
||||
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
|
||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
|
||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
|
||||
|
|
|
@ -76,7 +76,10 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
|
|||
# doc/html/pcreunicode.html
|
||||
|
||||
# FIXME
|
||||
#dist_man_MANS = \
|
||||
dist_man_MANS = \
|
||||
doc/pcre2api.3
|
||||
|
||||
|
||||
# doc/pcre2-config.1 \
|
||||
# doc/pcre2.3 \
|
||||
# doc/pcre2-16.3 \
|
||||
|
@ -108,7 +111,6 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
|
|||
# doc/pcre2_utf16_to_host_byte_order.3 \
|
||||
# doc/pcre2_utf32_to_host_byte_order.3 \
|
||||
# doc/pcre2_version.3 \
|
||||
# doc/pcre2api.3 \
|
||||
# doc/pcre2build.3 \
|
||||
# doc/pcre2callout.3 \
|
||||
# doc/pcre2compat.3 \
|
||||
|
|
7
RunTest
7
RunTest
|
@ -314,10 +314,11 @@ else
|
|||
fi
|
||||
fi
|
||||
|
||||
# UTF support always applies to all bit sizes if both are supported; we can't
|
||||
# have UTF-8 support without UTF-16 or UTF-32 support.
|
||||
# UTF support is implied by Unicode support, and it always applies to all bit
|
||||
# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
|
||||
# UTF-32 support.
|
||||
|
||||
$sim ./pcre2test -C utf >/dev/null
|
||||
$sim ./pcre2test -C unicode >/dev/null
|
||||
utf=$?
|
||||
|
||||
jitopt=
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#cmakedefine SUPPORT_JIT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
||||
#cmakedefine SUPPORT_UTF 1
|
||||
#cmakedefine SUPPORT_UNICODE 1
|
||||
#cmakedefine SUPPORT_VALGRIND 1
|
||||
|
||||
#cmakedefine BSR_ANYCRLF 1
|
||||
|
|
32
configure.ac
32
configure.ac
|
@ -137,11 +137,11 @@ AC_ARG_ENABLE(rebuild-chartables,
|
|||
[rebuild character tables in current locale]),
|
||||
, enable_rebuild_chartables=no)
|
||||
|
||||
# Handle --enable-utf (disabled by default)
|
||||
AC_ARG_ENABLE(utf,
|
||||
AS_HELP_STRING([--enable-utf],
|
||||
[enable UTF-8/16/32 support (incompatible with --enable-ebcdic)]),
|
||||
, enable_utf=unset)
|
||||
# Handle --enable-unicode (disabled by default)
|
||||
AC_ARG_ENABLE(unicode,
|
||||
AS_HELP_STRING([--enable-unicode],
|
||||
[enable Unicode support (incompatible with --enable-ebcdic)]),
|
||||
, enable_unicode=unset)
|
||||
|
||||
# Handle newline options
|
||||
ac_pcre2_newline=lf
|
||||
|
@ -288,10 +288,10 @@ then
|
|||
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
|
||||
fi
|
||||
|
||||
# enable_utf is disabled by default.
|
||||
if test "x$enable_utf" = "xunset"
|
||||
# enable_unicode is disabled by default.
|
||||
if test "x$enable_unicode" = "xunset"
|
||||
then
|
||||
enable_utf=no
|
||||
enable_unicode=no
|
||||
fi
|
||||
|
||||
# Convert the newline identifier into the appropriate integer value. These must
|
||||
|
@ -320,8 +320,8 @@ fi
|
|||
#
|
||||
if test "x$enable_ebcdic" = "xyes"; then
|
||||
enable_rebuild_chartables=yes
|
||||
if test "x$enable_utf" = "xyes"; then
|
||||
AC_MSG_ERROR([support for EBCDIC and UTF-8/16/32 cannot be enabled at the same time])
|
||||
if test "x$enable_unicode" = "xyes"; then
|
||||
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -372,7 +372,7 @@ AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
|
|||
AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
|
||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||
AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
|
||||
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
|
||||
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
||||
|
||||
# Checks for typedefs, structures, and compiler characteristics.
|
||||
|
@ -513,12 +513,12 @@ if test "$enable_pcre2grep_jit" = "yes"; then
|
|||
Define to any value to enable JIT support in pcre2grep.])
|
||||
fi
|
||||
|
||||
if test "$enable_utf" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UTF], [], [
|
||||
Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||
if test "$enable_unicode" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UNICODE], [], [
|
||||
Define to any value to enable support for Unicode and UTF encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible
|
||||
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
||||
code *or* ASCII/UTF-8/16/32, but not both at once.])
|
||||
code *or* ASCII/Unicode, but not both at once.])
|
||||
fi
|
||||
|
||||
if test "$enable_stack_for_recursion" = "no"; then
|
||||
|
@ -854,7 +854,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Build 16-bit pcre2 library ...... : ${enable_pcre16}
|
||||
Build 32-bit pcre2 library ...... : ${enable_pcre32}
|
||||
Enable JIT compiling support .... : ${enable_jit}
|
||||
Enable UTF-8/16/32 support ...... : ${enable_utf}
|
||||
Enable Unicode support .......... : ${enable_unicode}
|
||||
Newline char/sequence ........... : ${enable_newline}
|
||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||
EBCDIC coding ................... : ${enable_ebcdic}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -120,7 +120,7 @@ to the same value:
|
|||
pcre16 the 16-bit library was built
|
||||
pcre32 the 32-bit library was built
|
||||
pcre8 the 8-bit library was built
|
||||
utf UTF and Unicode property support is available
|
||||
unicode Unicode support is available
|
||||
.sp
|
||||
If an unknown option is given, an error message is output; the exit code is 0.
|
||||
.TP 10
|
||||
|
|
|
@ -0,0 +1,254 @@
|
|||
.TH PCRE2UNICODE 3 "16 September 2014" "PCRE2 10.00"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions (revised API)
|
||||
.SH "UNICODE AND UTF SUPPORT"
|
||||
.rs
|
||||
.sp
|
||||
When PCRE2 is built with Unicode support, it acquires knowledge of Unicode
|
||||
character properties and can process text strings in UTF-8, UTF-16, or UTF-32
|
||||
format (depending on the code unit width). By default, PCRE2 assumes that one
|
||||
code unit is one character. To process a pattern as a UTF string, where a
|
||||
character may require more than one code unit, you must call
|
||||
.\" HREF
|
||||
\fBpcre2_compile()\fP
|
||||
.\"
|
||||
with the PCRE2_UTF option flag, or the pattern must start with the sequence
|
||||
(*UTF). When either of these is the case, both the pattern and any subject
|
||||
strings that are matched against it are treated as UTF strings instead of
|
||||
strings of individual one-code-unit characters.
|
||||
.P
|
||||
If you build PCRE2 with Unicode support, the library will be bigger, but the
|
||||
additional run time overhead is limited to testing the PCRE2_UTF flag
|
||||
occasionally, so should not be very much.
|
||||
.
|
||||
.
|
||||
.SH "UNICODE PROPERTY SUPPORT"
|
||||
.rs
|
||||
.sp
|
||||
When PCRE2 is built with Unicode support, the escape sequences \ep{..},
|
||||
\eP{..}, and \eX can be used. The Unicode properties that can be tested are
|
||||
limited to the general category properties such as Lu for an upper case letter
|
||||
or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
|
||||
the derived properties Any and L&. Full lists are given in the
|
||||
.\" HREF
|
||||
\fBpcre2pattern\fP
|
||||
.\"
|
||||
and
|
||||
.\" HREF
|
||||
\fBpcre2syntax\fP
|
||||
.\"
|
||||
documentation. Only the short names for properties are supported. For example,
|
||||
\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
|
||||
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
||||
compatibility with Perl 5.6. PCRE does not support this.
|
||||
.
|
||||
.
|
||||
.SH "WIDE CHARACTERS AND UTF MODES"
|
||||
.rs
|
||||
.sp
|
||||
Codepoints less than 256 can be specified in patterns by either braced or
|
||||
unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger
|
||||
values have to use braced sequences. Unbraced octal code points up to \e777 are
|
||||
also recognized; larger ones can be coded using \eo{...}.
|
||||
.P
|
||||
In UTF modes, repeat quantifiers apply to complete UTF characters, not to
|
||||
individual code units.
|
||||
.P
|
||||
In UTF modes, the dot metacharacter matches one UTF character instead of a
|
||||
single code unit.
|
||||
.P
|
||||
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
|
||||
but its use can lead to some strange effects because it breaks up multi-unit
|
||||
characters (see the description of \eC in the
|
||||
.\" HREF
|
||||
\fBpcre2pattern\fP
|
||||
.\"
|
||||
documentation). The use of \eC is not supported in the alternative matching
|
||||
function \fBpcre2_dfa_exec()\fP, nor is it supported in UTF mode by the JIT
|
||||
optimization. If JIT optimization is requested for a UTF pattern that contains
|
||||
\eC, it will not succeed, and so the matching will be carried out by the normal
|
||||
interpretive function.
|
||||
.P
|
||||
The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test
|
||||
characters of any code value, but, by default, the characters that PCRE2
|
||||
recognizes as digits, spaces, or word characters remain the same set as in
|
||||
non-UTF mode, all with code points less than 256. This remains true even when
|
||||
PCRE2 is built to include Unicode support, because to do otherwise would slow
|
||||
down matching in many common cases. Note that this also applies to \eb
|
||||
and \eB, because they are defined in terms of \ew and \eW. If you want
|
||||
to test for a wider sense of, say, "digit", you can use explicit Unicode
|
||||
property tests such as \ep{Nd}. Alternatively, if you set the PCRE2_UCP option,
|
||||
the way that the character escapes work is changed so that Unicode properties
|
||||
are used to determine which characters match. There are more details in the
|
||||
section on
|
||||
.\" HTML <a href="pcre2pattern.html#genericchartypes">
|
||||
.\" </a>
|
||||
generic character types
|
||||
.\"
|
||||
in the
|
||||
.\" HREF
|
||||
\fBpcre2pattern\fP
|
||||
.\"
|
||||
documentation.
|
||||
.P
|
||||
Similarly, characters that match the POSIX named character classes are all
|
||||
low-valued characters, unless the PCRE2_UCP option is set.
|
||||
.P
|
||||
However, the special horizontal and vertical white space matching escapes (\eh,
|
||||
\eH, \ev, and \eV) do match all the appropriate Unicode characters, whether or
|
||||
not PCRE2_UCP is set.
|
||||
.P
|
||||
Case-insensitive matching in UTF mode makes use of Unicode properties. A few
|
||||
Unicode characters such as Greek sigma have more than two codepoints that are
|
||||
case-equivalent, and these are treated as such.
|
||||
.
|
||||
.
|
||||
.SH "VALIDITY OF UTF STRINGS"
|
||||
.rs
|
||||
.sp
|
||||
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
||||
are (by default) checked for validity on entry to the relevant functions.
|
||||
If an invalid UTF string is passed, an error return is given.
|
||||
.P
|
||||
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
||||
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
|
||||
strings to be in host byte order.
|
||||
.P
|
||||
The entire string is checked before any other processing takes place. In
|
||||
addition to checking the format of the string, there is a check to ensure that
|
||||
all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
|
||||
The so-called "non-character" code points are not excluded because Unicode
|
||||
corrigendum #9 makes it clear that they should not be.
|
||||
.P
|
||||
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
||||
where they are used in pairs to encode code points with values greater than
|
||||
0xFFFF. The code points that are encoded by UTF-16 pairs are available
|
||||
independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
|
||||
surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
|
||||
UTF-32.)
|
||||
.P
|
||||
In some situations, you may already know that your strings are valid, and
|
||||
therefore want to skip these checks in order to improve performance, for
|
||||
example in the case of a long subject string that is being scanned repeatedly.
|
||||
If you set the PCRE2_NO_UTF_CHECK flag at compile time or at run time, PCRE2
|
||||
assumes that the pattern or subject it is given (respectively) contains only
|
||||
valid UTF code unit sequences.
|
||||
.P
|
||||
Passing PCRE2_NO_UTF_CHECK to \fBpcre2_compile()\fP just disables the check for
|
||||
the pattern; it does not also apply to subject strings. If you want to disable
|
||||
the check for a subject string you must pass this option to \fBpcre2_exec()\fP
|
||||
or \fBpcre2_dfa_exec()\fP.
|
||||
.P
|
||||
If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
|
||||
is undefined and your program may crash or loop indefinitely.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="utf8strings"></a>
|
||||
.SS "Errors in UTF-8 strings"
|
||||
.rs
|
||||
.sp
|
||||
The following negative error codes are given for invalid UTF-8 strings:
|
||||
.sp
|
||||
PCRE2_ERROR_UTF8_ERR1
|
||||
PCRE2_ERROR_UTF8_ERR2
|
||||
PCRE2_ERROR_UTF8_ERR3
|
||||
PCRE2_ERROR_UTF8_ERR4
|
||||
PCRE2_ERROR_UTF8_ERR5
|
||||
.sp
|
||||
The string ends with a truncated UTF-8 character; the code specifies how many
|
||||
bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
|
||||
no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
|
||||
allows for up to 6 bytes, and this is checked first; hence the possibility of
|
||||
4 or 5 missing bytes.
|
||||
.sp
|
||||
PCRE2_ERROR_UTF8_ERR6
|
||||
PCRE2_ERROR_UTF8_ERR7
|
||||
PCRE2_ERROR_UTF8_ERR8
|
||||
PCRE2_ERROR_UTF8_ERR9
|
||||
PCRE2_ERROR_UTF8_ERR10
|
||||
.sp
|
||||
The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
|
||||
character do not have the binary value 0b10 (that is, either the most
|
||||
significant bit is 0, or the next bit is 1).
|
||||
.sp
|
||||
PCRE2_ERROR_UTF8_ERR11
|
||||
PCRE2_ERROR_UTF8_ERR12
|
||||
.sp
|
||||
A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
|
||||
these code points are excluded by RFC 3629.
|
||||
.sp
|
||||
PCRE2_ERROR_UTF8_ERR13
|
||||
.sp
|
||||
A 4-byte character has a value greater than 0x10fff; these code points are
|
||||
excluded by RFC 3629.
|
||||
.sp
|
||||
PCRE2_ERROR_UTF8_ERR14
|
||||
.sp
|
||||
A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
|
||||
code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
|
||||
from UTF-8.
|
||||
.sp
|
||||
PCRE2_ERROR_UTF8_ERR15
|
||||
PCRE2_ERROR_UTF8_ERR16
|
||||
PCRE2_ERROR_UTF8_ERR17
|
||||
PCRE2_ERROR_UTF8_ERR18
|
||||
PCRE2_ERROR_UTF8_ERR19
|
||||
.sp
|
||||
A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
|
||||
value that can be represented by fewer bytes, which is invalid. For example,
|
||||
the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
|
||||
one byte.
|
||||
.sp
|
||||
PCRE2_ERROR_UTF8_ERR20
|
||||
.sp
|
||||
The two most significant bits of the first byte of a character have the binary
|
||||
value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
|
||||
byte can only validly occur as the second or subsequent byte of a multi-byte
|
||||
character.
|
||||
.sp
|
||||
PCRE2_ERROR_UTF8_ERR21
|
||||
.sp
|
||||
The first byte of a character has the value 0xfe or 0xff. These values can
|
||||
never occur in a valid UTF-8 string.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="utf16strings"></a>
|
||||
.SS "Errors in UTF-16 strings"
|
||||
.rs
|
||||
.sp
|
||||
The following negative error codes are given for invalid UTF-16 strings:
|
||||
.sp
|
||||
PCRE_UTF16_ERR1 Missing low surrogate at end of string
|
||||
PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate
|
||||
PCRE_UTF16_ERR3 Isolated low surrogate
|
||||
.sp
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="utf32strings"></a>
|
||||
.SS "Errors in UTF-32 strings"
|
||||
.rs
|
||||
.sp
|
||||
The following negative error codes are given for invalid UTF-32 strings:
|
||||
.sp
|
||||
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
|
||||
PCRE_UTF32_ERR2 Code point is greater than 0x10ffff
|
||||
.sp
|
||||
.
|
||||
.
|
||||
.SH AUTHOR
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Philip Hazel
|
||||
University Computing Service
|
||||
Cambridge CB2 3QH, England.
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH REVISION
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 16 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
|
@ -202,7 +202,7 @@ if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then
|
|||
echo "---------- Maximally configured test with -O2 ----------"
|
||||
SAVECLFAGS="$CFLAGS"
|
||||
CFLAGS="$CFLAGS -O2"
|
||||
opts="--disable-shared --enable-utf $enable_jit --enable-pcre16 --enable-pcre32"
|
||||
opts="--disable-shared --enable-unicode $enable_jit --enable-pcre16 --enable-pcre32"
|
||||
runtest
|
||||
CFLAGS="$SAVECFLAGS"
|
||||
fi
|
||||
|
@ -211,23 +211,23 @@ if [ $usemain -ne 0 ]; then
|
|||
echo "---------- Non-JIT tests in the current directory ----------"
|
||||
for opts in \
|
||||
"" \
|
||||
"--enable-utf --disable-static" \
|
||||
"--enable-unicode --disable-static" \
|
||||
"--disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-utf --disable-shared" \
|
||||
"--enable-utf --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-utf --with-link-size=3 --disable-shared" \
|
||||
"--enable-unicode --disable-shared" \
|
||||
"--enable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-unicode --with-link-size=3 --disable-shared" \
|
||||
"--enable-rebuild-chartables --disable-shared" \
|
||||
"--enable-newline-is-any --disable-shared" \
|
||||
"--enable-newline-is-cr --disable-shared" \
|
||||
"--enable-newline-is-crlf --disable-shared" \
|
||||
"--enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \
|
||||
"--enable-utf --enable-newline-is-any --disable-stack-for-recursion --disable-static" \
|
||||
"--enable-unicode --enable-newline-is-any --disable-stack-for-recursion --disable-static" \
|
||||
"--enable-pcre16" \
|
||||
"--enable-pcre16 --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-pcre16 --enable-utf --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-pcre16 --enable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-pcre32" \
|
||||
"--enable-pcre32 --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-pcre32 --enable-utf --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-pcre32 --enable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-pcre32 --enable-pcre16 --disable-shared" \
|
||||
"--enable-pcre32 --enable-pcre16 --disable-pcre8 --disable-shared"
|
||||
do
|
||||
|
@ -241,18 +241,18 @@ if [ $usejit -ne 0 ]; then
|
|||
echo "---------- JIT tests in the current directory ----------"
|
||||
for opts in \
|
||||
"--enable-jit --disable-shared" \
|
||||
"--enable-jit --enable-utf --disable-shared" \
|
||||
"--enable-jit --enable-utf --with-link-size=3 --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --enable-utf --disable-shared" \
|
||||
"--enable-jit --enable-unicode --disable-shared" \
|
||||
"--enable-jit --enable-unicode --with-link-size=3 --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --enable-unicode --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --disable-pcre8 --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --disable-pcre8 --enable-utf --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --enable-utf --with-link-size=3 --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --enable-utf --with-link-size=4 --disable-shared" \
|
||||
"--enable-jit --enable-pcre32 --enable-utf --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --disable-pcre8 --enable-unicode --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --enable-unicode --with-link-size=3 --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --enable-unicode --with-link-size=4 --disable-shared" \
|
||||
"--enable-jit --enable-pcre32 --enable-unicode --disable-shared" \
|
||||
"--enable-jit --enable-pcre32 --disable-pcre8 --disable-shared" \
|
||||
"--enable-jit --enable-pcre32 --disable-pcre8 --enable-utf --disable-shared" \
|
||||
"--enable-jit --enable-pcre32 --enable-utf --with-link-size=4 --disable-shared" \
|
||||
"--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-utf --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared"
|
||||
"--enable-jit --enable-pcre32 --disable-pcre8 --enable-unicode --disable-shared" \
|
||||
"--enable-jit --enable-pcre32 --enable-unicode --with-link-size=4 --disable-shared" \
|
||||
"--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-unicode --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared"
|
||||
do
|
||||
runtest
|
||||
done
|
||||
|
@ -267,8 +267,8 @@ if [ $usevalgrind -ne 0 ]; then
|
|||
withvalgrind="with valgrind"
|
||||
|
||||
for opts in \
|
||||
"--enable-utf --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-utf --with-link-size=3 --disable-shared" \
|
||||
"--enable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||
"--enable-unicode --with-link-size=3 --disable-shared" \
|
||||
"--disable-shared"
|
||||
do
|
||||
opts="--enable-valgrind $opts"
|
||||
|
@ -277,8 +277,8 @@ if [ $usevalgrind -ne 0 ]; then
|
|||
|
||||
if [ $usejit -ne 0 ]; then
|
||||
for opts in \
|
||||
"--enable-jit --enable-utf --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --enable-pcre32 --enable-utf"
|
||||
"--enable-jit --enable-unicode --disable-shared" \
|
||||
"--enable-jit --enable-pcre16 --enable-pcre32 --enable-unicode"
|
||||
do
|
||||
opts="--enable-valgrind $opts"
|
||||
runtest
|
||||
|
@ -324,7 +324,7 @@ fi
|
|||
|
||||
if [ $usetmp -ne 0 ]; then
|
||||
for opts in \
|
||||
"--enable-utf --disable-shared"
|
||||
"--enable-unicode --disable-shared"
|
||||
do
|
||||
runtest
|
||||
done
|
||||
|
|
|
@ -472,7 +472,7 @@ print("condition to cut out the tables when not needed. But don't leave")
|
|||
print("a totally empty module because some compilers barf at that.")
|
||||
print("Instead, just supply small dummy tables. */")
|
||||
print()
|
||||
print("#ifndef SUPPORT_UTF")
|
||||
print("#ifndef SUPPORT_UNICODE")
|
||||
print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};")
|
||||
print("const uint8_t PRIV(ucd_stage1)[] = {0};")
|
||||
print("const uint16_t PRIV(ucd_stage2)[] = {0};")
|
||||
|
@ -507,7 +507,7 @@ print_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size)
|
|||
print("#if UCD_BLOCK_SIZE != %d" % min_block_size)
|
||||
print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h")
|
||||
print("#endif")
|
||||
print("#endif /* SUPPORT_UTF */")
|
||||
print("#endif /* SUPPORT_UNICODE */")
|
||||
print()
|
||||
print("#endif /* PCRE2_PCRE2TEST */")
|
||||
|
||||
|
|
|
@ -19,8 +19,8 @@ one. */
|
|||
#include "../src/config.h"
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#define SUPPORT_UTF
|
||||
#ifndef SUPPORT_UNICODE
|
||||
#define SUPPORT_UNICODE
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
|
|
|
@ -278,11 +278,11 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE8 */
|
||||
|
||||
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible with
|
||||
the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/UTF-8/16/32, but not both at once. */
|
||||
/* #undef SUPPORT_UTF */
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
/* #undef SUPPORT_UNICODE */
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
/* #undef SUPPORT_VALGRIND */
|
||||
|
|
|
@ -193,32 +193,32 @@ must all be greater than zero. */
|
|||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Error codes for pcre2[_dfa]_match() */
|
||||
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, and
|
||||
context functions. */
|
||||
|
||||
#define PCRE2_ERROR_BADCOUNT (-29)
|
||||
#define PCRE2_ERROR_BADENDIANNESS (-30)
|
||||
#define PCRE2_ERROR_BADLENGTH (-31)
|
||||
#define PCRE2_ERROR_BADMAGIC (-32)
|
||||
#define PCRE2_ERROR_BADMODE (-33)
|
||||
#define PCRE2_ERROR_BADOFFSET (-34)
|
||||
#define PCRE2_ERROR_BADOPTION (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
||||
#define PCRE2_ERROR_DFA_UMLIMIT (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_BADLENGTH (-30)
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-35)
|
||||
#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-37)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-38)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-39)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-40)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-41)
|
||||
#define PCRE2_ERROR_INTERNAL (-42)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-43)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-44)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-45)
|
||||
#define PCRE2_ERROR_NOMEMORY (-46)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-47)
|
||||
#define PCRE2_ERROR_NULL (-48)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-49)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-50)
|
||||
#define PCRE2_ERROR_UNSET (-51)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -257,8 +257,8 @@ must all be greater than zero. */
|
|||
#define PCRE2_CONFIG_PARENSLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 9
|
||||
#define PCRE2_CONFIG_UTF 10
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
@ -338,7 +338,7 @@ expanded for each width below. Start with functions that give general
|
|||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_config(int, void *, PCRE2_SIZE);
|
||||
PCRE2_EXP_DECL int pcre2_config(uint32_t, void *, PCRE2_SIZE);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
@ -437,16 +437,16 @@ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
|||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||
int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||
int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||
int, PCRE2_SIZE *); \
|
||||
unsigned int, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||
|
@ -622,24 +622,27 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
|||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* Re-define PCRE2_SUFFIX to use the external width value, if defined.
|
||||
Otherwise, undefine the other macros and make PCRE2_SUFFIX a no-op, to reduce
|
||||
confusion. */
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8 && \
|
||||
PCRE2_CODE_UNIT_WIDTH != 16 && \
|
||||
PCRE2_CODE_UNIT_WIDTH != 32
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 8, 16, or 32
|
||||
#endif
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#else
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
|
|
@ -231,7 +231,7 @@ static const uint8_t opcode_possessify[] = {
|
|||
|
||||
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Check a character and a property *
|
||||
*************************************************/
|
||||
|
@ -311,7 +311,7 @@ switch(ptype)
|
|||
|
||||
return FALSE;
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
|
@ -368,7 +368,7 @@ PCRE2_UCHAR base;
|
|||
PCRE2_SPTR end;
|
||||
uint32_t chr;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t *clist_dest;
|
||||
const uint32_t *clist_src;
|
||||
#else
|
||||
|
@ -451,7 +451,7 @@ switch(c)
|
|||
GETCHARINCTEST(chr, code);
|
||||
list[2] = chr;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (chr < 128 || (chr < 256 && !utf))
|
||||
list[3] = fcc[chr];
|
||||
else
|
||||
|
@ -470,7 +470,7 @@ switch(c)
|
|||
list[4] = NOTACHAR;
|
||||
return code;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (code[0] != PT_CLIST)
|
||||
|
@ -812,7 +812,7 @@ for(;;)
|
|||
leftop = base_list[0];
|
||||
rightop = list[0];
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
accepted = FALSE; /* Always set in non-unicode case. */
|
||||
if (leftop == OP_PROP || leftop == OP_NOTPROP)
|
||||
{
|
||||
|
@ -915,7 +915,7 @@ for(;;)
|
|||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
|
||||
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
|
||||
|
@ -1039,7 +1039,7 @@ for(;;)
|
|||
case OP_EOD: /* Can always possessify before \z */
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
|
||||
|
|
|
@ -433,7 +433,7 @@ static const int posix_class_maps[] = {
|
|||
/* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by
|
||||
Unicode property escapes. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
static const PCRE2_UCHAR string_PNd[] = {
|
||||
CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
|
||||
CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
||||
|
@ -541,7 +541,7 @@ static PCRE2_SPTR posix_substitutes[] = {
|
|||
NULL /* ^xdigit */
|
||||
};
|
||||
#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *))
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Masks for checking option settings. */
|
||||
|
||||
|
@ -887,7 +887,7 @@ for (;;)
|
|||
case OP_NOTI:
|
||||
branchlength++;
|
||||
cc += 2;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
@ -901,7 +901,7 @@ for (;;)
|
|||
case OP_NOTEXACTI:
|
||||
branchlength += (int)GET2(cc,1);
|
||||
cc += 2 + IMM2_SIZE;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
@ -1315,7 +1315,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|||
actual length is stored in the compiled code, so we must update "code"
|
||||
here. */
|
||||
|
||||
#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
case OP_XCLASS:
|
||||
ccode = code += GET(code, 1);
|
||||
goto CHECK_CLASS_REPEAT;
|
||||
|
@ -1325,7 +1325,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|||
case OP_NCLASS:
|
||||
ccode = code + PRIV(OP_lengths)[OP_CLASS];
|
||||
|
||||
#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
CHECK_CLASS_REPEAT:
|
||||
#endif
|
||||
|
||||
|
@ -2062,7 +2062,7 @@ return escape;
|
|||
|
||||
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Handle \P and \p *
|
||||
*************************************************/
|
||||
|
@ -2678,7 +2678,7 @@ return -1;
|
|||
|
||||
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Get othercase range *
|
||||
*************************************************/
|
||||
|
@ -2740,7 +2740,7 @@ for (++c; c <= d; c++)
|
|||
*cptr = c; /* Rest of input range */
|
||||
return 0;
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
|
@ -2780,7 +2780,7 @@ range. */
|
|||
|
||||
if ((options & PCRE2_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((options & PCRE2_UTF) != 0)
|
||||
{
|
||||
int rc;
|
||||
|
@ -2810,7 +2810,7 @@ if ((options & PCRE2_CASELESS) != 0)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF mode */
|
||||
|
||||
|
@ -2844,7 +2844,7 @@ if (end >= start)
|
|||
{
|
||||
PCRE2_UCHAR *uchardata = *uchardptr;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((options & PCRE2_UTF) != 0)
|
||||
{
|
||||
if (start < end)
|
||||
|
@ -2860,7 +2860,7 @@ if (end >= start)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Without UTF support, character values are constrained by the bit length,
|
||||
and can only be > 256 for 16-bit and 32-bit libraries. */
|
||||
|
@ -3042,7 +3042,7 @@ uint8_t classbits[32];
|
|||
not do this for other options (e.g. PCRE2_EXTENDED) because they may change
|
||||
dynamically as we process the pattern. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */
|
||||
|
@ -3235,7 +3235,7 @@ for (;; ptr++)
|
|||
break;
|
||||
}
|
||||
ptr++;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) FORWARDCHAR(ptr);
|
||||
#endif
|
||||
}
|
||||
|
@ -3474,7 +3474,7 @@ for (;; ptr++)
|
|||
goto FAILED;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(c))
|
||||
{ /* Braces are required because the */
|
||||
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
|
||||
|
@ -3556,7 +3556,7 @@ for (;; ptr++)
|
|||
that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
|
||||
directly. UCP support is not available unless UTF support is.*/
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((options & PCRE2_UCP) != 0)
|
||||
{
|
||||
unsigned int ptype = 0;
|
||||
|
@ -3599,7 +3599,7 @@ for (;; ptr++)
|
|||
break;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* In the non-UCP case, or when UCP makes no difference, we build the
|
||||
bit map for the POSIX class in a chunk of local store because we may be
|
||||
|
@ -3689,7 +3689,7 @@ for (;; ptr++)
|
|||
|
||||
switch (escape)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case ESC_du: /* These are the values given for \d etc */
|
||||
case ESC_DU: /* when PCRE2_UCP is set. We replace the */
|
||||
case ESC_wu: /* escape sequence with an appropriate \p */
|
||||
|
@ -3757,7 +3757,7 @@ for (;; ptr++)
|
|||
cb, PRIV(vspace_list));
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case ESC_p:
|
||||
case ESC_P:
|
||||
{
|
||||
|
@ -3840,7 +3840,7 @@ for (;; ptr++)
|
|||
|
||||
/* Otherwise, we have a potential range; pick up the next character */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{ /* Braces are required because the */
|
||||
GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */
|
||||
|
@ -3940,7 +3940,7 @@ for (;; ptr++)
|
|||
|
||||
if (negate_class)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int d;
|
||||
#endif
|
||||
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
|
||||
|
@ -3951,7 +3951,7 @@ for (;; ptr++)
|
|||
one other case. If so, generate a special OP_NOTPROP item instead of
|
||||
OP_NOTI. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_CASELESS) != 0 &&
|
||||
(d = UCD_CASESET(c)) != 0)
|
||||
{
|
||||
|
@ -4032,7 +4032,7 @@ for (;; ptr++)
|
|||
be listed) there are no characters < 256, we can omit the bitmap in the
|
||||
actual compiled code. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0))
|
||||
#elif PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (xclass && !should_flip_negation)
|
||||
|
@ -4157,7 +4157,7 @@ for (;; ptr++)
|
|||
break;
|
||||
}
|
||||
p++;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) FORWARDCHAR(p);
|
||||
#endif
|
||||
} /* Loop for comment characters */
|
||||
|
@ -4265,7 +4265,7 @@ for (;; ptr++)
|
|||
/* If previous was a character type match (\d or similar), abolish it and
|
||||
create a suitable repeat item. The code is shared with single-character
|
||||
repeats by setting op_type to add a suitable offset into repeat_type. Note
|
||||
the the Unicode property types will be present only when SUPPORT_UTF is
|
||||
the the Unicode property types will be present only when SUPPORT_UNICODE is
|
||||
defined, but we don't wrap the little bits of code here because it just
|
||||
makes it horribly messy. */
|
||||
|
||||
|
@ -4880,7 +4880,7 @@ for (;; ptr++)
|
|||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
tempcode += PRIV(OP_lengths)[*tempcode];
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(tempcode[-1]))
|
||||
tempcode += GET_EXTRALEN(tempcode[-1]);
|
||||
#endif
|
||||
|
@ -6407,7 +6407,7 @@ for (;; ptr++)
|
|||
|
||||
/* So are Unicode property matches, if supported. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
else if (escape == ESC_P || escape == ESC_p)
|
||||
{
|
||||
BOOL negated;
|
||||
|
@ -6442,7 +6442,7 @@ for (;; ptr++)
|
|||
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
|
||||
cb->max_lookbehind == 0)
|
||||
cb->max_lookbehind = 1;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (escape >= ESC_DU && escape <= ESC_wu)
|
||||
{
|
||||
nestptr = ptr + 1; /* Where to resume */
|
||||
|
@ -6479,7 +6479,7 @@ for (;; ptr++)
|
|||
mclength = 1;
|
||||
mcbuffer[0] = c;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(c))
|
||||
ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
|
||||
#endif
|
||||
|
@ -6493,7 +6493,7 @@ for (;; ptr++)
|
|||
/* For caseless UTF mode, check whether this character has more than one
|
||||
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_CASELESS) != 0)
|
||||
{
|
||||
GETCHAR(c, mcbuffer);
|
||||
|
@ -7527,7 +7527,7 @@ ptr += skipatstart;
|
|||
|
||||
/* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#ifndef SUPPORT_UNICODE
|
||||
if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
||||
{
|
||||
errorcode = ERR32;
|
||||
|
@ -7911,7 +7911,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
|
|||
points and cannot have another case. In 16-bit and 32-bit modes, we can
|
||||
check wide characters when UTF (and therefore UCP) is supported. */
|
||||
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (firstcu <= MAX_UTF_CODE_POINT &&
|
||||
UCD_OTHERCASE(firstcu) != firstcu)
|
||||
re->flags |= PCRE2_FIRSTCASELESS;
|
||||
|
@ -7945,7 +7945,7 @@ if (reqcuflags >= 0 &&
|
|||
{
|
||||
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
||||
}
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#endif
|
||||
|
|
|
@ -75,7 +75,7 @@ Returns: 0 if data returned
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_config(int what, void *where, size_t length)
|
||||
pcre2_config(uint32_t what, void *where, size_t length)
|
||||
{
|
||||
if (length < sizeof(int)) return PCRE2_ERROR_BADLENGTH;
|
||||
|
||||
|
@ -145,7 +145,7 @@ switch (what)
|
|||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UTF
|
||||
#if defined SUPPORT_UNICODE
|
||||
const char *v = PRIV(unicode_version);
|
||||
#else
|
||||
const char *v = "Unicode not supported";
|
||||
|
@ -158,8 +158,8 @@ switch (what)
|
|||
}
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UTF:
|
||||
#if defined SUPPORT_UTF
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
#if defined SUPPORT_UNICODE
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
|
|
|
@ -263,8 +263,9 @@ if (mcontext != NULL)
|
|||
* Set values in contexts *
|
||||
*************************************************/
|
||||
|
||||
/* All these functions return 1 for success or 0 if invalid data is given. Only
|
||||
some of the functions are able to test the validity of the data. */
|
||||
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
|
||||
data is given. Only some of the functions are able to test the validity of the
|
||||
data. */
|
||||
|
||||
|
||||
/* ------------ Compile contexts ------------ */
|
||||
|
@ -274,7 +275,7 @@ pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
|||
const unsigned char *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
|
@ -285,10 +286,10 @@ switch(value)
|
|||
case PCRE2_BSR_ANYCRLF:
|
||||
case PCRE2_BSR_UNICODE:
|
||||
ccontext->bsr_convention = value;
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -303,10 +304,10 @@ switch(newline)
|
|||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
ccontext->newline_convention = newline;
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -314,7 +315,7 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
|||
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->parens_nest_limit = limit;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
|
@ -322,7 +323,7 @@ pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
|||
int (*guard)(uint32_t))
|
||||
{
|
||||
ccontext->stack_guard = guard;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -336,10 +337,10 @@ switch(value)
|
|||
case PCRE2_BSR_ANYCRLF:
|
||||
case PCRE2_BSR_UNICODE:
|
||||
mcontext->bsr_convention = value;
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -354,10 +355,10 @@ switch(newline)
|
|||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
mcontext->newline_convention = newline;
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -367,21 +368,21 @@ pcre2_set_callout(pcre2_match_context *mcontext,
|
|||
{
|
||||
mcontext->callout = callout;
|
||||
mcontext->callout_data = callout_data;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->match_limit = limit;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->recursion_limit = limit;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
|
@ -399,7 +400,7 @@ mcontext->stack_memctl.memory_data = mydata;
|
|||
(void)myfree;
|
||||
(void)mydata;
|
||||
#endif
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_context.c */
|
||||
|
|
|
@ -391,7 +391,7 @@ PCRE2_SPTR start_subject = mb->start_subject;
|
|||
PCRE2_SPTR end_subject = mb->end_subject;
|
||||
PCRE2_SPTR start_code = mb->start_code;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
#else
|
||||
BOOL utf = FALSE;
|
||||
|
@ -447,7 +447,7 @@ if (*first_op == OP_REVERSE)
|
|||
/* If we can't go back the amount required for the longest lookbehind
|
||||
pattern, go back as far as we can; some alternatives may still be viable. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* In character mode we have to step back character by character */
|
||||
|
||||
if (utf)
|
||||
|
@ -570,11 +570,11 @@ for (;;)
|
|||
if (ptr < end_subject)
|
||||
{
|
||||
clen = 1; /* Number of data items in the character */
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
GETCHARLENTEST(c, ptr, clen);
|
||||
#else
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -652,9 +652,9 @@ for (;;)
|
|||
if (coptable[codevalue] > 0)
|
||||
{
|
||||
dlen = 1;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
d = code[coptable[codevalue]];
|
||||
if (codevalue >= OP_TYPESTAR)
|
||||
{
|
||||
|
@ -948,11 +948,11 @@ for (;;)
|
|||
{
|
||||
PCRE2_SPTR temp = ptr - 1;
|
||||
if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) { BACKCHAR(temp); }
|
||||
#endif
|
||||
GETCHARTEST(d, temp);
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
if (d == '_') left_word = TRUE; else
|
||||
|
@ -972,12 +972,12 @@ for (;;)
|
|||
if (ptr >= mb->last_used_ptr)
|
||||
{
|
||||
PCRE2_SPTR temp = ptr + 1;
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) { FORWARDCHAR(temp); }
|
||||
#endif
|
||||
mb->last_used_ptr = temp;
|
||||
}
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
if (c == '_') right_word = TRUE; else
|
||||
|
@ -1003,7 +1003,7 @@ for (;;)
|
|||
if the support is in the binary; otherwise a compile-time error occurs.
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (clen > 0)
|
||||
|
@ -1258,7 +1258,7 @@ for (;;)
|
|||
argument. It keeps the code above fast for the other cases. The argument
|
||||
is in the d variable. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
||||
|
@ -1501,7 +1501,7 @@ for (;;)
|
|||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
||||
|
@ -1785,7 +1785,7 @@ for (;;)
|
|||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
||||
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||
|
@ -2063,7 +2063,7 @@ for (;;)
|
|||
case OP_CHARI:
|
||||
if (clen == 0) break;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
||||
|
@ -2077,7 +2077,7 @@ for (;;)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
/* Not UTF mode */
|
||||
{
|
||||
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
|
||||
|
@ -2086,7 +2086,7 @@ for (;;)
|
|||
break;
|
||||
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* This is a tricky one because it can match more than one character.
|
||||
Find out how many characters to skip, and then set up a negative state
|
||||
|
@ -2222,11 +2222,11 @@ for (;;)
|
|||
if (clen > 0)
|
||||
{
|
||||
unsigned int otherd;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
if (c != d && c != otherd)
|
||||
{ ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
|
@ -2257,11 +2257,11 @@ for (;;)
|
|||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
|
@ -2300,11 +2300,11 @@ for (;;)
|
|||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
|
@ -2341,11 +2341,11 @@ for (;;)
|
|||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
|
@ -2374,11 +2374,11 @@ for (;;)
|
|||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
|
@ -2414,11 +2414,11 @@ for (;;)
|
|||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
|
@ -2747,7 +2747,7 @@ for (;;)
|
|||
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
|
||||
{
|
||||
int charcount = local_offsets[rc+1] - local_offsets[rc];
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
||||
|
@ -2851,7 +2851,7 @@ for (;;)
|
|||
PCRE2_SPTR p = ptr;
|
||||
PCRE2_SPTR pp = local_ptr;
|
||||
charcount = (int)(pp - p);
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
||||
#endif
|
||||
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
||||
|
@ -2933,7 +2933,7 @@ for (;;)
|
|||
}
|
||||
else
|
||||
{
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_SPTR p = start_subject + local_offsets[0];
|
||||
|
@ -3106,14 +3106,24 @@ if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
|||
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
|
||||
means that the pattern is likely compiled with different endianness. */
|
||||
/* FIXME: Remove BADENDIANNESS if saving/restoring is not to be implemented. */
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
#ifdef FIXME
|
||||
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
|
||||
this comment and code:
|
||||
|
||||
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
|
||||
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
|
||||
with different endianness. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
||||
#endif
|
||||
|
||||
/* Check the code unit width. */
|
||||
|
||||
|
@ -3238,7 +3248,7 @@ switch(newline)
|
|||
we must also check that a starting offset does not point into the middle of a
|
||||
multiunit character. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
|
||||
|
@ -3253,7 +3263,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
|||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Set up the first code unit to match, if available. The first_codeunit value
|
||||
is never set for an anchored regular expression, but the anchoring may be
|
||||
|
@ -3270,7 +3280,7 @@ if (!anchored)
|
|||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
}
|
||||
|
@ -3290,7 +3300,7 @@ if ((re->flags & PCRE2_LASTSET) != 0)
|
|||
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||
{
|
||||
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
|
||||
#endif
|
||||
}
|
||||
|
@ -3327,7 +3337,7 @@ for (;;)
|
|||
if (firstline)
|
||||
{
|
||||
PCRE2_SPTR t = start_match;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (t < mb->end_subject && !IS_NEWLINE(t))
|
||||
|
@ -3362,7 +3372,7 @@ for (;;)
|
|||
{
|
||||
if (start_match > mb->start_subject + start_offset)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
|
@ -3516,7 +3526,7 @@ for (;;)
|
|||
|
||||
if (firstline && IS_NEWLINE(start_match)) break;
|
||||
start_match++;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
ACROSSCHAR(start_match < end_subject, *start_match,
|
||||
|
|
|
@ -198,35 +198,34 @@ static const char match_error_texts[] =
|
|||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"bad count value\0"
|
||||
"bad data value\0"
|
||||
/* 30 */
|
||||
"pattern compiled with other endianness\0"
|
||||
"bad length\0"
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
/* 35 */
|
||||
"bad option value\0"
|
||||
/* 35 */
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
/* 40 */
|
||||
"backreference condition or recursion test not supported for DFA matching\0"
|
||||
/* 40 */
|
||||
"item unsupported for DFA matching\0"
|
||||
"match limit not supported for DFA matching\0"
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"internal error - pattern overwritten?\0"
|
||||
/* 45 */
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
/* 45 */
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown or unset substring\0"
|
||||
/* 50 */
|
||||
"NULL argument passed\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
/* 50 */
|
||||
"recursion limit exceeded\0"
|
||||
"requested value is not set\0"
|
||||
;
|
||||
|
||||
|
||||
|
|
|
@ -38,11 +38,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* We do not support both EBCDIC and UTF at the same time. The "configure"
|
||||
/* We do not support both EBCDIC and Unicode at the same time. The "configure"
|
||||
script prevents both being selected, but not everybody uses "configure". */
|
||||
|
||||
#if defined EBCDIC && defined SUPPORT_UTF
|
||||
#error The use of both EBCDIC and SUPPORT_UTF is not supported.
|
||||
#if defined EBCDIC && defined SUPPORT_UNICODE
|
||||
#error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
|
||||
#endif
|
||||
|
||||
/* Standard C headers */
|
||||
|
@ -597,14 +597,14 @@ there are some longer strings as well.
|
|||
|
||||
This means that, on EBCDIC platforms, the PCRE library can handle either
|
||||
EBCDIC, or UTF-8, but not both. To support both in the same compiled library
|
||||
would need different lookups depending on whether PCRE_UTF8 was set or not.
|
||||
would need different lookups depending on whether PCRE2_UTF was set or not.
|
||||
This would make it impossible to use characters in switch/case statements,
|
||||
which would reduce performance. For a theoretical use (which nobody has asked
|
||||
for) in a minority area (EBCDIC platforms), this is not sensible. Any
|
||||
application that did need both could compile two versions of the library, using
|
||||
macros to give the functions distinct names. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
||||
/* UTF-8 support is not enabled; use the platform-dependent character literals
|
||||
so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
|
||||
|
@ -920,7 +920,7 @@ a positive value. */
|
|||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
|
||||
#else /* SUPPORT_UTF */
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
|
||||
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
|
||||
|
@ -1189,7 +1189,7 @@ only. */
|
|||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* -------------------- End of character and string names -------------------*/
|
||||
|
||||
|
@ -1775,10 +1775,10 @@ typedef struct {
|
|||
|
||||
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
||||
|
||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
|
||||
defined, so the following items are omitted. */
|
||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as
|
||||
0, so the following items are omitted. */
|
||||
|
||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
||||
#if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0
|
||||
|
||||
/* This is the largest non-UTF code point. */
|
||||
|
||||
|
|
|
@ -208,9 +208,9 @@ tables. */
|
|||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
#define MAX_MARK ((1u << 8) - 1)
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
|
@ -246,7 +246,7 @@ complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
|||
used when UTF is not supported. To make sure they can never even appear when
|
||||
UTF support is omitted, we don't even define them. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
||||
/* #define MAX_UTF_SINGLE_CU */
|
||||
/* #define HAS_EXTRALEN(c) */
|
||||
|
@ -263,7 +263,7 @@ UTF support is omitted, we don't even define them. */
|
|||
/* #define FORWARDCHAR(eptr) */
|
||||
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||
|
||||
#else /* SUPPORT_UTF */
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
|
@ -527,7 +527,7 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
|
|||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
|
||||
#endif /* UTF-32 character handling */
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/* Mode-dependent macros that have the same definition in all modes. */
|
||||
|
|
|
@ -145,7 +145,7 @@ static int
|
|||
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
|
||||
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
#if defined SUPPORT_UTF
|
||||
#if defined SUPPORT_UNICODE
|
||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
|
||||
|
@ -173,7 +173,7 @@ length = mb->ovector[offset+1] - mb->ovector[offset];
|
|||
|
||||
if (caseless)
|
||||
{
|
||||
#if defined SUPPORT_UTF
|
||||
#if defined SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
/* Match characters up to the end of the reference. NOTE: the number of
|
||||
|
@ -352,7 +352,7 @@ typedef struct heapframe {
|
|||
struct heapframe *Xprevframe;
|
||||
struct heapframe *Xnextframe;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
PCRE2_SPTR Xcharptr;
|
||||
#endif
|
||||
PCRE2_SPTR Xeptr;
|
||||
|
@ -378,7 +378,7 @@ typedef struct heapframe {
|
|||
uint32_t Xop;
|
||||
uint32_t Xsave_capture_last;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t Xprop_value;
|
||||
int Xprop_type;
|
||||
int Xprop_fail_result;
|
||||
|
@ -399,7 +399,7 @@ typedef struct heapframe {
|
|||
eptrblock Xnewptrb;
|
||||
recursion_info Xnew_recursive;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
PCRE2_UCHAR Xocchars[6];
|
||||
#endif
|
||||
} heapframe;
|
||||
|
@ -610,7 +610,7 @@ HEAP_RECURSE:
|
|||
|
||||
/* Ditto for the local variables */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define charptr frame->Xcharptr
|
||||
#define prop_value frame->Xprop_value
|
||||
#define prop_type frame->Xprop_type
|
||||
|
@ -666,7 +666,7 @@ declarations can be cut out in a block. The only declarations within blocks
|
|||
below are for variables that do not have to be preserved over a recursive call
|
||||
to RMATCH(). */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
PCRE2_SPTR charptr;
|
||||
#endif
|
||||
PCRE2_SPTR callpat;
|
||||
|
@ -684,7 +684,7 @@ uint32_t number;
|
|||
uint32_t op;
|
||||
uint32_t save_capture_last;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t prop_value;
|
||||
int prop_type;
|
||||
int prop_fail_result;
|
||||
|
@ -721,7 +721,7 @@ the alternative names that are used. */
|
|||
/* These statements are here to stop the compiler complaining about unitialized
|
||||
variables. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
prop_value = 0;
|
||||
prop_fail_result = 0;
|
||||
#endif
|
||||
|
@ -742,7 +742,7 @@ call because it's quite a complicated macro. It has to be used in one
|
|||
particular way. This shouldn't, however, impact performance when true recursion
|
||||
is being used. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
#else
|
||||
utf = FALSE;
|
||||
|
@ -1662,7 +1662,7 @@ for (;;)
|
|||
back a number of characters, not bytes. */
|
||||
|
||||
case OP_REVERSE:
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
i = GET(ecode, 1);
|
||||
|
@ -2197,7 +2197,7 @@ for (;;)
|
|||
be "non-word" characters. Remember the earliest consulted character for
|
||||
partial matching. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
/* Get status of previous character */
|
||||
|
@ -2257,7 +2257,7 @@ for (;;)
|
|||
if (eptr == mb->start_subject) prev_is_word = FALSE; else
|
||||
{
|
||||
if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
c = eptr[-1];
|
||||
|
@ -2283,7 +2283,7 @@ for (;;)
|
|||
else
|
||||
{
|
||||
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
c = *eptr;
|
||||
|
@ -2334,7 +2334,7 @@ for (;;)
|
|||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
eptr++;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
|
||||
#endif
|
||||
ecode++;
|
||||
|
@ -2550,7 +2550,7 @@ for (;;)
|
|||
ecode++;
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* Check the next character by Unicode property. We will get here only
|
||||
if the support is in the binary; otherwise a compile-time error occurs. */
|
||||
|
||||
|
@ -2684,7 +2684,7 @@ for (;;)
|
|||
CHECK_PARTIAL();
|
||||
ecode++;
|
||||
break;
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/* Match a back reference, possibly repeatedly. Look past the end of the
|
||||
|
@ -2955,7 +2955,7 @@ for (;;)
|
|||
|
||||
/* First, ensure the minimum number of matches are present. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
for (i = 1; i <= min; i++)
|
||||
|
@ -3007,7 +3007,7 @@ for (;;)
|
|||
|
||||
if (minimize)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
for (fi = min;; fi++)
|
||||
|
@ -3063,7 +3063,7 @@ for (;;)
|
|||
{
|
||||
pp = eptr;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
|
@ -3232,7 +3232,7 @@ for (;;)
|
|||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
GETCHARLENTEST(c, eptr, len);
|
||||
#else
|
||||
c = *eptr;
|
||||
|
@ -3248,7 +3248,7 @@ for (;;)
|
|||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (eptr-- == pp) break; /* Stop if tried at original pos */
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(eptr);
|
||||
#endif
|
||||
}
|
||||
|
@ -3262,7 +3262,7 @@ for (;;)
|
|||
/* Match a single character, casefully */
|
||||
|
||||
case OP_CHAR:
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
length = 1;
|
||||
|
@ -3299,7 +3299,7 @@ for (;;)
|
|||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
length = 1;
|
||||
|
@ -3334,7 +3334,7 @@ for (;;)
|
|||
|
||||
if (fc != dc)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (dc != UCD_OTHERCASE(fc))
|
||||
#endif
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
@ -3342,7 +3342,7 @@ for (;;)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF mode */
|
||||
{
|
||||
|
@ -3436,7 +3436,7 @@ for (;;)
|
|||
for speed. */
|
||||
|
||||
REPEATCHAR:
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
length = 1;
|
||||
|
@ -3527,7 +3527,7 @@ for (;;)
|
|||
value of fc will always be < 128. */
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* When not in UTF-8 mode, load a single-byte character. */
|
||||
fc = *ecode++;
|
||||
|
@ -3547,11 +3547,11 @@ for (;;)
|
|||
/* fc must be < 128 if UTF is enabled. */
|
||||
foc = mb->fcc[fc];
|
||||
#else
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && fc > 127)
|
||||
foc = UCD_OTHERCASE(fc);
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
foc = TABLE_GET(fc, mb->fcc, fc);
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
|
@ -3682,7 +3682,7 @@ for (;;)
|
|||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
register uint32_t ch, och;
|
||||
|
@ -3705,7 +3705,7 @@ for (;;)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
{
|
||||
register uint32_t ch = ecode[1];
|
||||
c = *eptr++;
|
||||
|
@ -3803,14 +3803,14 @@ for (;;)
|
|||
|
||||
if (op >= OP_NOTSTARI) /* Caseless */
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && fc > 127)
|
||||
foc = UCD_OTHERCASE(fc);
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
foc = TABLE_GET(fc, mb->fcc, fc);
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
register uint32_t d;
|
||||
|
@ -3826,7 +3826,7 @@ for (;;)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
/* Not UTF mode */
|
||||
{
|
||||
for (i = 1; i <= min; i++)
|
||||
|
@ -3845,7 +3845,7 @@ for (;;)
|
|||
|
||||
if (minimize)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
register uint32_t d;
|
||||
|
@ -3864,7 +3864,7 @@ for (;;)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /*SUPPORT_UTF */
|
||||
#endif /*SUPPORT_UNICODE */
|
||||
/* Not UTF mode */
|
||||
{
|
||||
for (fi = min;; fi++)
|
||||
|
@ -3890,7 +3890,7 @@ for (;;)
|
|||
{
|
||||
pp = eptr;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
register uint32_t d;
|
||||
|
@ -3917,7 +3917,7 @@ for (;;)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
/* Not UTF mode */
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
|
@ -3947,7 +3947,7 @@ for (;;)
|
|||
|
||||
else
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
register uint32_t d;
|
||||
|
@ -3981,7 +3981,7 @@ for (;;)
|
|||
|
||||
if (minimize)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
register uint32_t d;
|
||||
|
@ -4025,7 +4025,7 @@ for (;;)
|
|||
{
|
||||
pp = eptr;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
register uint32_t d;
|
||||
|
@ -4144,7 +4144,7 @@ for (;;)
|
|||
REPEATTYPE:
|
||||
ctype = *ecode++; /* Code for the character type */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (ctype == OP_PROP || ctype == OP_NOTPROP)
|
||||
{
|
||||
prop_fail_result = ctype == OP_NOTPROP;
|
||||
|
@ -4162,7 +4162,7 @@ for (;;)
|
|||
|
||||
if (min > 0)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (prop_type >= 0)
|
||||
{
|
||||
switch(prop_type)
|
||||
|
@ -4378,11 +4378,11 @@ for (;;)
|
|||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle all other cases when the coding is UTF-8 */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
|
@ -4631,7 +4631,7 @@ for (;;)
|
|||
} /* End switch(ctype) */
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Code for the non-UTF-8 case for minimum matching of operators other
|
||||
than OP_PROP and OP_NOTPROP. */
|
||||
|
@ -4889,7 +4889,7 @@ for (;;)
|
|||
|
||||
if (minimize)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (prop_type >= 0)
|
||||
{
|
||||
switch(prop_type)
|
||||
|
@ -5138,9 +5138,9 @@ for (;;)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
for (fi = min;; fi++)
|
||||
|
@ -5410,7 +5410,7 @@ for (;;)
|
|||
{
|
||||
pp = eptr; /* Remember where we started */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (prop_type >= 0)
|
||||
{
|
||||
switch(prop_type)
|
||||
|
@ -5696,9 +5696,9 @@ for (;;)
|
|||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
switch(ctype)
|
||||
|
@ -5940,7 +5940,7 @@ for (;;)
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
/* Not UTF mode */
|
||||
{
|
||||
switch(ctype)
|
||||
|
@ -6219,13 +6219,13 @@ switch (frame->Xwhere)
|
|||
#ifdef SUPPORT_WIDE_CHARS
|
||||
LBL(20) LBL(21)
|
||||
#endif
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
LBL(16) LBL(18)
|
||||
LBL(22) LBL(23) LBL(28) LBL(30)
|
||||
LBL(32) LBL(34) LBL(42) LBL(46)
|
||||
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
|
||||
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
default:
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
@ -6398,14 +6398,21 @@ if (code == NULL || subject == NULL || match_data == NULL)
|
|||
return PCRE2_ERROR_NULL;
|
||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
|
||||
means that the pattern is likely compiled with different endianness. */
|
||||
/* Check that the first field in the block is the magic number. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
#ifdef FIXME
|
||||
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
|
||||
this comment and code:
|
||||
|
||||
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
|
||||
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
|
||||
with different endianness. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
||||
#endif
|
||||
|
||||
/* Check the code unit width. */
|
||||
|
||||
|
@ -6451,7 +6458,7 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
|
|||
we must also check that a starting offset does not point into the middle of a
|
||||
multiunit character. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
|
||||
|
@ -6466,7 +6473,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
|||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* If the pattern was successfully studied with JIT support, run the JIT
|
||||
executable instead of the rest of this function. Most options must be set at
|
||||
|
@ -6640,7 +6647,7 @@ if (!anchored)
|
|||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
}
|
||||
|
@ -6660,7 +6667,7 @@ if ((re->flags & PCRE2_LASTSET) != 0)
|
|||
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||
{
|
||||
req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
|
||||
#endif
|
||||
}
|
||||
|
@ -6696,7 +6703,7 @@ for(;;)
|
|||
if (firstline)
|
||||
{
|
||||
PCRE2_SPTR t = start_match;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (t < mb->end_subject && !IS_NEWLINE(t))
|
||||
|
@ -6731,7 +6738,7 @@ for(;;)
|
|||
{
|
||||
if (start_match > mb->start_subject + start_offset)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
|
@ -6905,7 +6912,7 @@ for(;;)
|
|||
case MATCH_THEN:
|
||||
mb->ignore_skip_arg = 0;
|
||||
new_start_match = start_match + 1;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
ACROSSCHAR(new_start_match < end_subject, *new_start_match,
|
||||
new_start_match++);
|
||||
|
|
|
@ -81,12 +81,12 @@ PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
|||
{
|
||||
uint32_t c;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
|
@ -172,7 +172,7 @@ PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
|||
uint32_t c;
|
||||
ptr--;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
|
@ -182,7 +182,7 @@ else c = *ptr;
|
|||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
|
|
|
@ -50,10 +50,11 @@ into a UTF string. The behaviour is different for each code unit width. */
|
|||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* If SUPPORT_UTF is not defined, this function will never be called. Supply a
|
||||
dummy function because some compilers do not like empty source modules. */
|
||||
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
||||
Supply a dummy function because some compilers do not like empty source
|
||||
modules. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#ifndef SUPPORT_UNICODE
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
|
@ -61,7 +62,7 @@ PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
|||
(void)(buffer);
|
||||
return 0;
|
||||
}
|
||||
#else /* SUPPORT_UTF */
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
|
@ -114,6 +115,6 @@ return 2;
|
|||
return 1;
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre_ord2utf.c */
|
||||
|
|
|
@ -56,11 +56,9 @@ Arguments:
|
|||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
Returns: 0 if data returned, negative on error or unset value
|
||||
*/
|
||||
|
||||
/* FIXME: Remove BADENDIANNESS if saving/restoring is not to be implemented. */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||
{
|
||||
|
@ -69,13 +67,21 @@ const pcre2_real_code *re = (pcre2_real_code *)code;
|
|||
if (re == NULL || where == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
|
||||
means that the pattern is likely compiled with different endianness. */
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
#ifdef FIXME
|
||||
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
|
||||
this comment and code:
|
||||
|
||||
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
|
||||
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
|
||||
with different endianness. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
||||
#endif
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
|
@ -151,6 +157,7 @@ switch(what)
|
|||
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_match;
|
||||
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
|
@ -179,6 +186,7 @@ switch(what)
|
|||
|
||||
case PCRE2_INFO_RECURSIONLIMIT:
|
||||
*((uint32_t *)where) = re->limit_recursion;
|
||||
if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_SIZE:
|
||||
|
|
|
@ -94,7 +94,7 @@ BOOL one_code_unit = !utf;
|
|||
|
||||
/* If UTF is supported and requested, check for a valid single code unit. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
@ -105,7 +105,7 @@ if (utf)
|
|||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle a valid one-code-unit character at any width. */
|
||||
|
||||
|
@ -121,7 +121,7 @@ if (one_code_unit)
|
|||
for each width. If UTF is not supported, control should never get here, but we
|
||||
need a return statement to keep the compiler happy. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#ifndef SUPPORT_UNICODE
|
||||
return 0;
|
||||
#else
|
||||
|
||||
|
@ -178,7 +178,7 @@ as an indication. */
|
|||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
@ -221,7 +221,7 @@ into the main code, however, we just put one into this function. */
|
|||
static const char *
|
||||
get_ucpname(unsigned int ptype, unsigned int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int i;
|
||||
for (i = utt_size - 1; i >= 0; i--)
|
||||
{
|
||||
|
@ -233,7 +233,7 @@ return (i >= 0)? utt_names + utt[i].name_offset : "??";
|
|||
(void)ptype;
|
||||
(void)pvalue;
|
||||
return "??";
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -228,7 +228,7 @@ for (;;)
|
|||
case OP_NOTPOSPLUSI:
|
||||
branchlength++;
|
||||
cc += 2;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
@ -249,7 +249,7 @@ for (;;)
|
|||
case OP_NOTEXACTI:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 2 + IMM2_SIZE;
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
@ -297,7 +297,7 @@ for (;;)
|
|||
appear, but leave the code, just in case.) */
|
||||
|
||||
case OP_ANYBYTE:
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) return -1;
|
||||
#endif
|
||||
branchlength++;
|
||||
|
@ -536,7 +536,7 @@ for (;;)
|
|||
case OP_NOTPOSQUERYI:
|
||||
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
@ -608,7 +608,7 @@ SET_BIT(c);
|
|||
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
|
||||
the end of the character, even when caseless. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
@ -617,7 +617,7 @@ if (utf)
|
|||
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* If caseless, handle the other case of the character. */
|
||||
|
||||
|
@ -671,7 +671,7 @@ set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
|||
register uint32_t c;
|
||||
for (c = 0; c < table_limit; c++)
|
||||
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (table_limit == 32) return;
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
|
@ -712,7 +712,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
|||
register uint32_t c;
|
||||
for (c = 0; c < table_limit; c++)
|
||||
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||
#endif
|
||||
}
|
||||
|
@ -752,7 +752,7 @@ set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
|
|||
register uint32_t c;
|
||||
int yield = SSB_DONE;
|
||||
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int table_limit = utf? 16:32;
|
||||
#else
|
||||
int table_limit = 32;
|
||||
|
@ -866,7 +866,7 @@ do
|
|||
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
|
||||
while ((c = *p++) < NOTACHAR)
|
||||
{
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
|
@ -1042,7 +1042,7 @@ do
|
|||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of horizontal space characters. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+00A0 */
|
||||
|
@ -1081,7 +1081,7 @@ do
|
|||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of vertical space characters. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
||||
|
@ -1181,7 +1181,7 @@ do
|
|||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of horizontal space characters. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+00A0 */
|
||||
|
@ -1218,7 +1218,7 @@ do
|
|||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of vertical space characters. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
||||
|
@ -1287,7 +1287,7 @@ do
|
|||
character modes, set the 0xFF bit to indicate code units >= 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
re->start_bitmap[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
|
@ -1318,7 +1318,7 @@ do
|
|||
|
||||
if (classmap != NULL)
|
||||
{
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];
|
||||
|
|
|
@ -108,8 +108,8 @@ Returns: if successful: 0
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data, int stringnumber,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||
unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
PCRE2_SIZE p = 0;
|
||||
|
@ -189,8 +189,8 @@ Returns: if successful: zero
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data, int stringnumber,
|
||||
PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||
unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
PCRE2_SIZE p = 0;
|
||||
|
@ -288,7 +288,7 @@ Returns: 0 if successful, else a negative error number
|
|||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
||||
int stringnumber, PCRE2_SIZE *sizeptr)
|
||||
unsigned int stringnumber, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
if (stringnumber >= match_data->oveccount ||
|
||||
stringnumber > match_data->code->top_bracket ||
|
||||
|
|
|
@ -76,7 +76,7 @@ as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
|
|||
handling wide characters. */
|
||||
|
||||
#if defined PCRE2_PCRE2TEST || \
|
||||
(defined SUPPORT_UTF && \
|
||||
(defined SUPPORT_UNICODE && \
|
||||
defined PCRE2_CODE_UNIT_WIDTH && \
|
||||
PCRE2_CODE_UNIT_WIDTH == 8)
|
||||
|
||||
|
@ -106,7 +106,7 @@ const uint8_t PRIV(utf8_table4)[] = {
|
|||
#endif /* UTF-8 support needed */
|
||||
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
|
@ -728,6 +728,6 @@ const ucp_type_table PRIV(utt)[] = {
|
|||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_tables.c */
|
||||
|
|
|
@ -32,7 +32,7 @@ condition to cut out the tables when not needed. But don't leave
|
|||
a totally empty module because some compilers barf at that.
|
||||
Instead, just supply small dummy tables. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#ifndef SUPPORT_UNICODE
|
||||
const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};
|
||||
const uint8_t PRIV(ucd_stage1)[] = {0};
|
||||
const uint16_t PRIV(ucd_stage2)[] = {0};
|
||||
|
@ -3628,6 +3628,6 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */
|
|||
#if UCD_BLOCK_SIZE != 128
|
||||
#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h
|
||||
#endif
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
|
|
@ -50,12 +50,12 @@ strings. */
|
|||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#ifndef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Dummy function when UTF not supported *
|
||||
* Dummy function when Unicode is not supported *
|
||||
*************************************************/
|
||||
|
||||
/* This function should never be called when UTF is not supported. */
|
||||
/* This function should never be called when Unicode is not supported. */
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
|
@ -388,6 +388,6 @@ for (p = string; length-- > 0; p++)
|
|||
return 0;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_valid_utf.c */
|
||||
|
|
|
@ -103,7 +103,7 @@ while ((t = *data++) != XCL_END)
|
|||
uint32_t x, y;
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
|
@ -115,7 +115,7 @@ while ((t = *data++) != XCL_END)
|
|||
}
|
||||
else if (t == XCL_RANGE)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
|
@ -130,7 +130,7 @@ while ((t = *data++) != XCL_END)
|
|||
if (c >= x && c <= y) return !negated;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
const ucd_record *prop = GET_UCD(c);
|
||||
|
@ -262,7 +262,7 @@ while ((t = *data++) != XCL_END)
|
|||
}
|
||||
#else
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
|
|
117
src/pcre2test.c
117
src/pcre2test.c
|
@ -196,6 +196,7 @@ so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
|
|||
for building the library. */
|
||||
|
||||
#define PRIV(name) name
|
||||
#define PCRE2_CODE_UNIT_WIDTH 0
|
||||
#include "pcre2.h"
|
||||
#include "pcre2posix.h"
|
||||
#include "pcre2_internal.h"
|
||||
|
@ -208,16 +209,17 @@ of PRIV avoids name clashes. */
|
|||
#include "pcre2_tables.c"
|
||||
#include "pcre2_ucd.c"
|
||||
|
||||
/* When PCRE2_CODE_UNIT_WIDTH is unset, pcre2_internal.h does not include
|
||||
/* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
|
||||
pcre2_intmodedep.h, which is where mode-dependent macros and structures are
|
||||
defined. We can now include it for each supported code unit width. Because
|
||||
PCRE2_CODE_UNIT_WIDTH was not defined before including pcre2.h, it will have
|
||||
left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately while
|
||||
including these files, and then restore it to a no-op. Because LINK_SIZE may be
|
||||
changed in 16-bit mode and forced to 1 in 32-bit mode, the order of these
|
||||
inclusions should not be changed. */
|
||||
PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
|
||||
have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
|
||||
while including these files, and then restore it to a no-op. Because LINK_SIZE
|
||||
may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
|
||||
these inclusions should not be changed. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#undef PCRE2_CODE_UNIT_WIDTH
|
||||
|
||||
#ifdef SUPPORT_PCRE8
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
@ -576,7 +578,7 @@ static coptstruct coptlist[] = {
|
|||
{ "pcre16", CONF_FIX, SUPPORT_16 },
|
||||
{ "pcre32", CONF_FIX, SUPPORT_32 },
|
||||
{ "pcre8", CONF_FIX, SUPPORT_8 },
|
||||
{ "utf", CONF_INT, PCRE2_CONFIG_UTF }
|
||||
{ "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
|
||||
};
|
||||
|
||||
#define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
|
||||
|
@ -2815,22 +2817,26 @@ pattern.
|
|||
Arguments:
|
||||
what code for the required information
|
||||
where where to put the answer
|
||||
unsetok PCRE2_ERROR_UNSET is an "expected" result
|
||||
|
||||
Returns: the return from pcre2_pattern_info()
|
||||
*/
|
||||
|
||||
static int
|
||||
pattern_info(int what, void *where)
|
||||
pattern_info(int what, void *where, BOOL unsetok)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
|
||||
if (rc >= 0) return 0;
|
||||
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
|
||||
if (rc != PCRE2_ERROR_UNSET || !unsetok)
|
||||
{
|
||||
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
|
||||
what);
|
||||
if (rc == PCRE2_ERROR_BADMODE)
|
||||
if (rc == PCRE2_ERROR_BADMODE)
|
||||
fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
|
||||
"%d-bit mode\n", test_mode,
|
||||
8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -3026,32 +3032,61 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
{
|
||||
const void *nametable;
|
||||
const uint8_t *start_bits;
|
||||
BOOL match_limit_set, recursion_limit_set;
|
||||
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
|
||||
hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit,
|
||||
maxlookbehind, minlength, nameentrysize, namecount, newline_convention,
|
||||
recursion_limit;
|
||||
|
||||
/* These info requests may return PCRE2_ERROR_UNSET. */
|
||||
|
||||
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
|
||||
{
|
||||
case 0:
|
||||
match_limit_set = TRUE;
|
||||
break;
|
||||
|
||||
case PCRE2_ERROR_UNSET:
|
||||
match_limit_set = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return PR_ABEND;
|
||||
}
|
||||
|
||||
switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE))
|
||||
{
|
||||
case 0:
|
||||
recursion_limit_set = TRUE;
|
||||
break;
|
||||
|
||||
case PCRE2_ERROR_UNSET:
|
||||
recursion_limit_set = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return PR_ABEND;
|
||||
}
|
||||
|
||||
/* These info requests should always succeed. */
|
||||
|
||||
if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax) +
|
||||
pattern_info(PCRE2_INFO_BSR, &bsr_convention) +
|
||||
pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count) +
|
||||
pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits) +
|
||||
pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit) +
|
||||
pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype) +
|
||||
pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf) +
|
||||
pattern_info(PCRE2_INFO_JCHANGED, &jchanged) +
|
||||
pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit) +
|
||||
pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype) +
|
||||
pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty) +
|
||||
pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit) +
|
||||
pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind) +
|
||||
pattern_info(PCRE2_INFO_MINLENGTH, &minlength) +
|
||||
pattern_info(PCRE2_INFO_NAMECOUNT, &namecount) +
|
||||
pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize) +
|
||||
pattern_info(PCRE2_INFO_NAMETABLE, &nametable) +
|
||||
pattern_info(PCRE2_INFO_NEWLINE, &newline_convention) +
|
||||
pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit)
|
||||
if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
|
||||
pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
|
||||
pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
|
||||
pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
|
||||
pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
|
||||
pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
|
||||
pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
|
||||
pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
|
||||
pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
|
||||
pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
|
||||
pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
|
||||
pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) +
|
||||
pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
|
||||
pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
|
||||
pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
|
||||
pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
|
||||
pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
|
||||
!= 0)
|
||||
return PR_ABEND;
|
||||
|
||||
|
@ -3063,10 +3098,10 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
if (maxlookbehind > 0)
|
||||
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
||||
|
||||
if (match_limit != UINT32_MAX)
|
||||
if (match_limit_set)
|
||||
fprintf(outfile, "Match limit = %u\n", match_limit);
|
||||
|
||||
if (recursion_limit != UINT32_MAX)
|
||||
if (recursion_limit_set)
|
||||
fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
|
||||
|
||||
if (namecount > 0)
|
||||
|
@ -3099,8 +3134,8 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
|
||||
if (match_empty) fprintf(outfile, "May match empty string\n");
|
||||
|
||||
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options);
|
||||
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options);
|
||||
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
|
||||
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
|
||||
|
||||
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
|
||||
cluttering up the verification output of non-UTF test files. */
|
||||
|
@ -3234,7 +3269,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
|
||||
{
|
||||
size_t jitsize;
|
||||
if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize) == 0)
|
||||
if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize, FALSE) == 0)
|
||||
{
|
||||
if (jitsize > 0)
|
||||
fprintf(outfile, "JIT compilation was successful\n");
|
||||
|
@ -3625,14 +3660,14 @@ if ((pat_patctl.control & CTL_MEMORY) != 0)
|
|||
if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32);
|
||||
#endif
|
||||
|
||||
(void)pattern_info(PCRE2_INFO_SIZE, &size);
|
||||
(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count);
|
||||
(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
||||
(void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
|
||||
(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
|
||||
(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
|
||||
fprintf(outfile, "Memory allocation (code space): %d\n",
|
||||
(int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
|
||||
if (pat_patctl.jit != 0)
|
||||
{
|
||||
(void)pattern_info(PCRE2_INFO_JITSIZE, &size);
|
||||
(void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
|
||||
fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
|
||||
}
|
||||
}
|
||||
|
@ -4452,7 +4487,7 @@ for (gmatched = 0;; gmatched++)
|
|||
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
||||
{
|
||||
uint32_t maxcapcount;
|
||||
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount) < 0)
|
||||
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
|
||||
return PR_SKIP;
|
||||
capcount = maxcapcount + 1; /* Allow for full match */
|
||||
if (capcount > (int)dat_datctl.oveccount) capcount = dat_datctl.oveccount;
|
||||
|
@ -4943,7 +4978,7 @@ printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
|
|||
printf(" pcre8 8 bit library support enabled [0, 1]\n");
|
||||
printf(" pcre16 16 bit library support enabled [0, 1]\n");
|
||||
printf(" pcre32 32 bit library support enabled [0, 1]\n");
|
||||
printf(" utf Unicode Transformation Format supported [0, 1]\n");
|
||||
printf(" unicode Unicode and UTF support enabled [0, 1]\n");
|
||||
printf(" -d set default pattern control 'debug'\n");
|
||||
printf(" -dfa set default subject control 'dfa'\n");
|
||||
printf(" -help show usage information\n");
|
||||
|
@ -5057,7 +5092,7 @@ printf(" 16-bit support\n");
|
|||
printf(" 32-bit support\n");
|
||||
#endif
|
||||
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc, sizeof(rc));
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &rc, sizeof(rc));
|
||||
if (rc != 0)
|
||||
printf(" UTF support (Unicode version %s)\n", uversion);
|
||||
else
|
||||
|
|
|
@ -384,15 +384,15 @@ aaaaa2
|
|||
010203040506
|
||||
RC=0
|
||||
======== STDERR ========
|
||||
pcre2grep: pcre2_match() gave error -47 while matching this text:
|
||||
pcre2grep: pcre2_match() gave error -45 while matching this text:
|
||||
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
|
||||
pcre2grep: pcre2_match() gave error -47 while matching this text:
|
||||
pcre2grep: pcre2_match() gave error -45 while matching this text:
|
||||
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
|
||||
pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded.
|
||||
pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
|
||||
pcre2grep: Check your regex for nested unlimited loops.
|
||||
---------------------------- Test 38 ------------------------------
|
||||
This line contains a binary zero here > |