API documentation and a lot of little related changes to the code.

This commit is contained in:
Philip.Hazel 2014-09-19 07:43:39 +00:00
parent de4f203346
commit eee8530add
40 changed files with 3484 additions and 459 deletions

View File

@ -149,8 +149,8 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
"Enable use of Just-in-time compiling in pcre2grep.") "Enable use of Just-in-time compiling in pcre2grep.")
SET(PCRE2_SUPPORT_UTF OFF CACHE BOOL SET(PCRE2_SUPPORT_UNICODE OFF CACHE BOOL
"Enable support for Unicode Transformation Format (UTF-8/UTF-16/UTF-32) encoding.") "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks") "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
@ -245,9 +245,9 @@ IF(PCRE2_SUPPORT_BSR_ANYCRLF)
SET(BSR_ANYCRLF 1) SET(BSR_ANYCRLF 1)
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF) ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
IF(PCRE2_SUPPORT_UTF) IF(PCRE2_SUPPORT_UNICODE)
SET(SUPPORT_UTF 1) SET(SUPPORT_UNICODE 1)
ENDIF(PCRE2_SUPPORT_UTF) ENDIF(PCRE2_SUPPORT_UNICODE)
IF(PCRE2_SUPPORT_JIT) IF(PCRE2_SUPPORT_JIT)
SET(SUPPORT_JIT 1) SET(SUPPORT_JIT 1)
@ -709,7 +709,7 @@ IF(PCRE2_SHOW_REPORT)
MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE16}") MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE16}")
MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE32}") MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE32}")
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}") MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
MESSAGE(STATUS " Enable UTF support .............. : ${PCRE2_SUPPORT_UTF}") MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}") MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}") MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}") MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")

View File

@ -76,7 +76,10 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
# doc/html/pcreunicode.html # doc/html/pcreunicode.html
# FIXME # FIXME
#dist_man_MANS = \ dist_man_MANS = \
doc/pcre2api.3
# doc/pcre2-config.1 \ # doc/pcre2-config.1 \
# doc/pcre2.3 \ # doc/pcre2.3 \
# doc/pcre2-16.3 \ # doc/pcre2-16.3 \
@ -108,7 +111,6 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
# doc/pcre2_utf16_to_host_byte_order.3 \ # doc/pcre2_utf16_to_host_byte_order.3 \
# doc/pcre2_utf32_to_host_byte_order.3 \ # doc/pcre2_utf32_to_host_byte_order.3 \
# doc/pcre2_version.3 \ # doc/pcre2_version.3 \
# doc/pcre2api.3 \
# doc/pcre2build.3 \ # doc/pcre2build.3 \
# doc/pcre2callout.3 \ # doc/pcre2callout.3 \
# doc/pcre2compat.3 \ # doc/pcre2compat.3 \

View File

@ -314,10 +314,11 @@ else
fi fi
fi fi
# UTF support always applies to all bit sizes if both are supported; we can't # UTF support is implied by Unicode support, and it always applies to all bit
# have UTF-8 support without UTF-16 or UTF-32 support. # sizes if both are supported; we can't have UTF-8 support without UTF-16 or
# UTF-32 support.
$sim ./pcre2test -C utf >/dev/null $sim ./pcre2test -C unicode >/dev/null
utf=$? utf=$?
jitopt= jitopt=

View File

@ -25,7 +25,7 @@
#cmakedefine SUPPORT_JIT 1 #cmakedefine SUPPORT_JIT 1
#cmakedefine SUPPORT_PCRE2GREP_JIT 1 #cmakedefine SUPPORT_PCRE2GREP_JIT 1
#cmakedefine SUPPORT_UTF 1 #cmakedefine SUPPORT_UNICODE 1
#cmakedefine SUPPORT_VALGRIND 1 #cmakedefine SUPPORT_VALGRIND 1
#cmakedefine BSR_ANYCRLF 1 #cmakedefine BSR_ANYCRLF 1

View File

@ -137,11 +137,11 @@ AC_ARG_ENABLE(rebuild-chartables,
[rebuild character tables in current locale]), [rebuild character tables in current locale]),
, enable_rebuild_chartables=no) , enable_rebuild_chartables=no)
# Handle --enable-utf (disabled by default) # Handle --enable-unicode (disabled by default)
AC_ARG_ENABLE(utf, AC_ARG_ENABLE(unicode,
AS_HELP_STRING([--enable-utf], AS_HELP_STRING([--enable-unicode],
[enable UTF-8/16/32 support (incompatible with --enable-ebcdic)]), [enable Unicode support (incompatible with --enable-ebcdic)]),
, enable_utf=unset) , enable_unicode=unset)
# Handle newline options # Handle newline options
ac_pcre2_newline=lf ac_pcre2_newline=lf
@ -288,10 +288,10 @@ then
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled]) AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
fi fi
# enable_utf is disabled by default. # enable_unicode is disabled by default.
if test "x$enable_utf" = "xunset" if test "x$enable_unicode" = "xunset"
then then
enable_utf=no enable_unicode=no
fi fi
# Convert the newline identifier into the appropriate integer value. These must # Convert the newline identifier into the appropriate integer value. These must
@ -320,8 +320,8 @@ fi
# #
if test "x$enable_ebcdic" = "xyes"; then if test "x$enable_ebcdic" = "xyes"; then
enable_rebuild_chartables=yes enable_rebuild_chartables=yes
if test "x$enable_utf" = "xyes"; then if test "x$enable_unicode" = "xyes"; then
AC_MSG_ERROR([support for EBCDIC and UTF-8/16/32 cannot be enabled at the same time]) AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
fi fi
fi fi
@ -372,7 +372,7 @@ AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes") AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes") AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes") AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes") AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes") AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
# Checks for typedefs, structures, and compiler characteristics. # Checks for typedefs, structures, and compiler characteristics.
@ -513,12 +513,12 @@ if test "$enable_pcre2grep_jit" = "yes"; then
Define to any value to enable JIT support in pcre2grep.]) Define to any value to enable JIT support in pcre2grep.])
fi fi
if test "$enable_utf" = "yes"; then if test "$enable_unicode" = "yes"; then
AC_DEFINE([SUPPORT_UTF], [], [ AC_DEFINE([SUPPORT_UNICODE], [], [
Define to any value to enable support for the UTF-8/16/32 Unicode encoding. Define to any value to enable support for Unicode and UTF encoding.
This will work even in an EBCDIC environment, but it is incompatible This will work even in an EBCDIC environment, but it is incompatible
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
code *or* ASCII/UTF-8/16/32, but not both at once.]) code *or* ASCII/Unicode, but not both at once.])
fi fi
if test "$enable_stack_for_recursion" = "no"; then if test "$enable_stack_for_recursion" = "no"; then
@ -854,7 +854,7 @@ $PACKAGE-$VERSION configuration summary:
Build 16-bit pcre2 library ...... : ${enable_pcre16} Build 16-bit pcre2 library ...... : ${enable_pcre16}
Build 32-bit pcre2 library ...... : ${enable_pcre32} Build 32-bit pcre2 library ...... : ${enable_pcre32}
Enable JIT compiling support .... : ${enable_jit} Enable JIT compiling support .... : ${enable_jit}
Enable UTF-8/16/32 support ...... : ${enable_utf} Enable Unicode support .......... : ${enable_unicode}
Newline char/sequence ........... : ${enable_newline} Newline char/sequence ........... : ${enable_newline}
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf} \R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
EBCDIC coding ................... : ${enable_ebcdic} EBCDIC coding ................... : ${enable_ebcdic}

2704
doc/pcre2api.3 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -120,7 +120,7 @@ to the same value:
pcre16 the 16-bit library was built pcre16 the 16-bit library was built
pcre32 the 32-bit library was built pcre32 the 32-bit library was built
pcre8 the 8-bit library was built pcre8 the 8-bit library was built
utf UTF and Unicode property support is available unicode Unicode support is available
.sp .sp
If an unknown option is given, an error message is output; the exit code is 0. If an unknown option is given, an error message is output; the exit code is 0.
.TP 10 .TP 10

254
doc/pcre2unicode.3 Normal file
View File

@ -0,0 +1,254 @@
.TH PCRE2UNICODE 3 "16 September 2014" "PCRE2 10.00"
.SH NAME
PCRE - Perl-compatible regular expressions (revised API)
.SH "UNICODE AND UTF SUPPORT"
.rs
.sp
When PCRE2 is built with Unicode support, it acquires knowledge of Unicode
character properties and can process text strings in UTF-8, UTF-16, or UTF-32
format (depending on the code unit width). By default, PCRE2 assumes that one
code unit is one character. To process a pattern as a UTF string, where a
character may require more than one code unit, you must call
.\" HREF
\fBpcre2_compile()\fP
.\"
with the PCRE2_UTF option flag, or the pattern must start with the sequence
(*UTF). When either of these is the case, both the pattern and any subject
strings that are matched against it are treated as UTF strings instead of
strings of individual one-code-unit characters.
.P
If you build PCRE2 with Unicode support, the library will be bigger, but the
additional run time overhead is limited to testing the PCRE2_UTF flag
occasionally, so should not be very much.
.
.
.SH "UNICODE PROPERTY SUPPORT"
.rs
.sp
When PCRE2 is built with Unicode support, the escape sequences \ep{..},
\eP{..}, and \eX can be used. The Unicode properties that can be tested are
limited to the general category properties such as Lu for an upper case letter
or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
the derived properties Any and L&. Full lists are given in the
.\" HREF
\fBpcre2pattern\fP
.\"
and
.\" HREF
\fBpcre2syntax\fP
.\"
documentation. Only the short names for properties are supported. For example,
\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
compatibility with Perl 5.6. PCRE does not support this.
.
.
.SH "WIDE CHARACTERS AND UTF MODES"
.rs
.sp
Codepoints less than 256 can be specified in patterns by either braced or
unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger
values have to use braced sequences. Unbraced octal code points up to \e777 are
also recognized; larger ones can be coded using \eo{...}.
.P
In UTF modes, repeat quantifiers apply to complete UTF characters, not to
individual code units.
.P
In UTF modes, the dot metacharacter matches one UTF character instead of a
single code unit.
.P
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
but its use can lead to some strange effects because it breaks up multi-unit
characters (see the description of \eC in the
.\" HREF
\fBpcre2pattern\fP
.\"
documentation). The use of \eC is not supported in the alternative matching
function \fBpcre2_dfa_exec()\fP, nor is it supported in UTF mode by the JIT
optimization. If JIT optimization is requested for a UTF pattern that contains
\eC, it will not succeed, and so the matching will be carried out by the normal
interpretive function.
.P
The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test
characters of any code value, but, by default, the characters that PCRE2
recognizes as digits, spaces, or word characters remain the same set as in
non-UTF mode, all with code points less than 256. This remains true even when
PCRE2 is built to include Unicode support, because to do otherwise would slow
down matching in many common cases. Note that this also applies to \eb
and \eB, because they are defined in terms of \ew and \eW. If you want
to test for a wider sense of, say, "digit", you can use explicit Unicode
property tests such as \ep{Nd}. Alternatively, if you set the PCRE2_UCP option,
the way that the character escapes work is changed so that Unicode properties
are used to determine which characters match. There are more details in the
section on
.\" HTML <a href="pcre2pattern.html#genericchartypes">
.\" </a>
generic character types
.\"
in the
.\" HREF
\fBpcre2pattern\fP
.\"
documentation.
.P
Similarly, characters that match the POSIX named character classes are all
low-valued characters, unless the PCRE2_UCP option is set.
.P
However, the special horizontal and vertical white space matching escapes (\eh,
\eH, \ev, and \eV) do match all the appropriate Unicode characters, whether or
not PCRE2_UCP is set.
.P
Case-insensitive matching in UTF mode makes use of Unicode properties. A few
Unicode characters such as Greek sigma have more than two codepoints that are
case-equivalent, and these are treated as such.
.
.
.SH "VALIDITY OF UTF STRINGS"
.rs
.sp
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions.
If an invalid UTF string is passed, an error return is given.
.P
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
strings to be in host byte order.
.P
The entire string is checked before any other processing takes place. In
addition to checking the format of the string, there is a check to ensure that
all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
The so-called "non-character" code points are not excluded because Unicode
corrigendum #9 makes it clear that they should not be.
.P
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
where they are used in pairs to encode code points with values greater than
0xFFFF. The code points that are encoded by UTF-16 pairs are available
independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
UTF-32.)
.P
In some situations, you may already know that your strings are valid, and
therefore want to skip these checks in order to improve performance, for
example in the case of a long subject string that is being scanned repeatedly.
If you set the PCRE2_NO_UTF_CHECK flag at compile time or at run time, PCRE2
assumes that the pattern or subject it is given (respectively) contains only
valid UTF code unit sequences.
.P
Passing PCRE2_NO_UTF_CHECK to \fBpcre2_compile()\fP just disables the check for
the pattern; it does not also apply to subject strings. If you want to disable
the check for a subject string you must pass this option to \fBpcre2_exec()\fP
or \fBpcre2_dfa_exec()\fP.
.P
If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
is undefined and your program may crash or loop indefinitely.
.
.
.\" HTML <a name="utf8strings"></a>
.SS "Errors in UTF-8 strings"
.rs
.sp
The following negative error codes are given for invalid UTF-8 strings:
.sp
PCRE2_ERROR_UTF8_ERR1
PCRE2_ERROR_UTF8_ERR2
PCRE2_ERROR_UTF8_ERR3
PCRE2_ERROR_UTF8_ERR4
PCRE2_ERROR_UTF8_ERR5
.sp
The string ends with a truncated UTF-8 character; the code specifies how many
bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
allows for up to 6 bytes, and this is checked first; hence the possibility of
4 or 5 missing bytes.
.sp
PCRE2_ERROR_UTF8_ERR6
PCRE2_ERROR_UTF8_ERR7
PCRE2_ERROR_UTF8_ERR8
PCRE2_ERROR_UTF8_ERR9
PCRE2_ERROR_UTF8_ERR10
.sp
The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
character do not have the binary value 0b10 (that is, either the most
significant bit is 0, or the next bit is 1).
.sp
PCRE2_ERROR_UTF8_ERR11
PCRE2_ERROR_UTF8_ERR12
.sp
A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
these code points are excluded by RFC 3629.
.sp
PCRE2_ERROR_UTF8_ERR13
.sp
A 4-byte character has a value greater than 0x10fff; these code points are
excluded by RFC 3629.
.sp
PCRE2_ERROR_UTF8_ERR14
.sp
A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
from UTF-8.
.sp
PCRE2_ERROR_UTF8_ERR15
PCRE2_ERROR_UTF8_ERR16
PCRE2_ERROR_UTF8_ERR17
PCRE2_ERROR_UTF8_ERR18
PCRE2_ERROR_UTF8_ERR19
.sp
A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
value that can be represented by fewer bytes, which is invalid. For example,
the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
one byte.
.sp
PCRE2_ERROR_UTF8_ERR20
.sp
The two most significant bits of the first byte of a character have the binary
value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
byte can only validly occur as the second or subsequent byte of a multi-byte
character.
.sp
PCRE2_ERROR_UTF8_ERR21
.sp
The first byte of a character has the value 0xfe or 0xff. These values can
never occur in a valid UTF-8 string.
.
.
.\" HTML <a name="utf16strings"></a>
.SS "Errors in UTF-16 strings"
.rs
.sp
The following negative error codes are given for invalid UTF-16 strings:
.sp
PCRE_UTF16_ERR1 Missing low surrogate at end of string
PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate
PCRE_UTF16_ERR3 Isolated low surrogate
.sp
.
.
.\" HTML <a name="utf32strings"></a>
.SS "Errors in UTF-32 strings"
.rs
.sp
The following negative error codes are given for invalid UTF-32 strings:
.sp
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
PCRE_UTF32_ERR2 Code point is greater than 0x10ffff
.sp
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 16 September 2014
Copyright (c) 1997-2014 University of Cambridge.
.fi

View File

@ -202,7 +202,7 @@ if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then
echo "---------- Maximally configured test with -O2 ----------" echo "---------- Maximally configured test with -O2 ----------"
SAVECLFAGS="$CFLAGS" SAVECLFAGS="$CFLAGS"
CFLAGS="$CFLAGS -O2" CFLAGS="$CFLAGS -O2"
opts="--disable-shared --enable-utf $enable_jit --enable-pcre16 --enable-pcre32" opts="--disable-shared --enable-unicode $enable_jit --enable-pcre16 --enable-pcre32"
runtest runtest
CFLAGS="$SAVECFLAGS" CFLAGS="$SAVECFLAGS"
fi fi
@ -211,23 +211,23 @@ if [ $usemain -ne 0 ]; then
echo "---------- Non-JIT tests in the current directory ----------" echo "---------- Non-JIT tests in the current directory ----------"
for opts in \ for opts in \
"" \ "" \
"--enable-utf --disable-static" \ "--enable-unicode --disable-static" \
"--disable-stack-for-recursion --disable-shared" \ "--disable-stack-for-recursion --disable-shared" \
"--enable-utf --disable-shared" \ "--enable-unicode --disable-shared" \
"--enable-utf --disable-stack-for-recursion --disable-shared" \ "--enable-unicode --disable-stack-for-recursion --disable-shared" \
"--enable-utf --with-link-size=3 --disable-shared" \ "--enable-unicode --with-link-size=3 --disable-shared" \
"--enable-rebuild-chartables --disable-shared" \ "--enable-rebuild-chartables --disable-shared" \
"--enable-newline-is-any --disable-shared" \ "--enable-newline-is-any --disable-shared" \
"--enable-newline-is-cr --disable-shared" \ "--enable-newline-is-cr --disable-shared" \
"--enable-newline-is-crlf --disable-shared" \ "--enable-newline-is-crlf --disable-shared" \
"--enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \ "--enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \
"--enable-utf --enable-newline-is-any --disable-stack-for-recursion --disable-static" \ "--enable-unicode --enable-newline-is-any --disable-stack-for-recursion --disable-static" \
"--enable-pcre16" \ "--enable-pcre16" \
"--enable-pcre16 --disable-stack-for-recursion --disable-shared" \ "--enable-pcre16 --disable-stack-for-recursion --disable-shared" \
"--enable-pcre16 --enable-utf --disable-stack-for-recursion --disable-shared" \ "--enable-pcre16 --enable-unicode --disable-stack-for-recursion --disable-shared" \
"--enable-pcre32" \ "--enable-pcre32" \
"--enable-pcre32 --disable-stack-for-recursion --disable-shared" \ "--enable-pcre32 --disable-stack-for-recursion --disable-shared" \
"--enable-pcre32 --enable-utf --disable-stack-for-recursion --disable-shared" \ "--enable-pcre32 --enable-unicode --disable-stack-for-recursion --disable-shared" \
"--enable-pcre32 --enable-pcre16 --disable-shared" \ "--enable-pcre32 --enable-pcre16 --disable-shared" \
"--enable-pcre32 --enable-pcre16 --disable-pcre8 --disable-shared" "--enable-pcre32 --enable-pcre16 --disable-pcre8 --disable-shared"
do do
@ -241,18 +241,18 @@ if [ $usejit -ne 0 ]; then
echo "---------- JIT tests in the current directory ----------" echo "---------- JIT tests in the current directory ----------"
for opts in \ for opts in \
"--enable-jit --disable-shared" \ "--enable-jit --disable-shared" \
"--enable-jit --enable-utf --disable-shared" \ "--enable-jit --enable-unicode --disable-shared" \
"--enable-jit --enable-utf --with-link-size=3 --disable-shared" \ "--enable-jit --enable-unicode --with-link-size=3 --disable-shared" \
"--enable-jit --enable-pcre16 --enable-utf --disable-shared" \ "--enable-jit --enable-pcre16 --enable-unicode --disable-shared" \
"--enable-jit --enable-pcre16 --disable-pcre8 --disable-shared" \ "--enable-jit --enable-pcre16 --disable-pcre8 --disable-shared" \
"--enable-jit --enable-pcre16 --disable-pcre8 --enable-utf --disable-shared" \ "--enable-jit --enable-pcre16 --disable-pcre8 --enable-unicode --disable-shared" \
"--enable-jit --enable-pcre16 --enable-utf --with-link-size=3 --disable-shared" \ "--enable-jit --enable-pcre16 --enable-unicode --with-link-size=3 --disable-shared" \
"--enable-jit --enable-pcre16 --enable-utf --with-link-size=4 --disable-shared" \ "--enable-jit --enable-pcre16 --enable-unicode --with-link-size=4 --disable-shared" \
"--enable-jit --enable-pcre32 --enable-utf --disable-shared" \ "--enable-jit --enable-pcre32 --enable-unicode --disable-shared" \
"--enable-jit --enable-pcre32 --disable-pcre8 --disable-shared" \ "--enable-jit --enable-pcre32 --disable-pcre8 --disable-shared" \
"--enable-jit --enable-pcre32 --disable-pcre8 --enable-utf --disable-shared" \ "--enable-jit --enable-pcre32 --disable-pcre8 --enable-unicode --disable-shared" \
"--enable-jit --enable-pcre32 --enable-utf --with-link-size=4 --disable-shared" \ "--enable-jit --enable-pcre32 --enable-unicode --with-link-size=4 --disable-shared" \
"--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-utf --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" "--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-unicode --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared"
do do
runtest runtest
done done
@ -267,8 +267,8 @@ if [ $usevalgrind -ne 0 ]; then
withvalgrind="with valgrind" withvalgrind="with valgrind"
for opts in \ for opts in \
"--enable-utf --disable-stack-for-recursion --disable-shared" \ "--enable-unicode --disable-stack-for-recursion --disable-shared" \
"--enable-utf --with-link-size=3 --disable-shared" \ "--enable-unicode --with-link-size=3 --disable-shared" \
"--disable-shared" "--disable-shared"
do do
opts="--enable-valgrind $opts" opts="--enable-valgrind $opts"
@ -277,8 +277,8 @@ if [ $usevalgrind -ne 0 ]; then
if [ $usejit -ne 0 ]; then if [ $usejit -ne 0 ]; then
for opts in \ for opts in \
"--enable-jit --enable-utf --disable-shared" \ "--enable-jit --enable-unicode --disable-shared" \
"--enable-jit --enable-pcre16 --enable-pcre32 --enable-utf" "--enable-jit --enable-pcre16 --enable-pcre32 --enable-unicode"
do do
opts="--enable-valgrind $opts" opts="--enable-valgrind $opts"
runtest runtest
@ -324,7 +324,7 @@ fi
if [ $usetmp -ne 0 ]; then if [ $usetmp -ne 0 ]; then
for opts in \ for opts in \
"--enable-utf --disable-shared" "--enable-unicode --disable-shared"
do do
runtest runtest
done done

View File

@ -472,7 +472,7 @@ print("condition to cut out the tables when not needed. But don't leave")
print("a totally empty module because some compilers barf at that.") print("a totally empty module because some compilers barf at that.")
print("Instead, just supply small dummy tables. */") print("Instead, just supply small dummy tables. */")
print() print()
print("#ifndef SUPPORT_UTF") print("#ifndef SUPPORT_UNICODE")
print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};") print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};")
print("const uint8_t PRIV(ucd_stage1)[] = {0};") print("const uint8_t PRIV(ucd_stage1)[] = {0};")
print("const uint16_t PRIV(ucd_stage2)[] = {0};") print("const uint16_t PRIV(ucd_stage2)[] = {0};")
@ -507,7 +507,7 @@ print_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size)
print("#if UCD_BLOCK_SIZE != %d" % min_block_size) print("#if UCD_BLOCK_SIZE != %d" % min_block_size)
print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h") print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h")
print("#endif") print("#endif")
print("#endif /* SUPPORT_UTF */") print("#endif /* SUPPORT_UNICODE */")
print() print()
print("#endif /* PCRE2_PCRE2TEST */") print("#endif /* PCRE2_PCRE2TEST */")

View File

@ -19,8 +19,8 @@ one. */
#include "../src/config.h" #include "../src/config.h"
#endif #endif
#ifndef SUPPORT_UTF #ifndef SUPPORT_UNICODE
#define SUPPORT_UTF #define SUPPORT_UNICODE
#endif #endif
#include <ctype.h> #include <ctype.h>

View File

@ -278,11 +278,11 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to any value to enable the 8 bit PCRE2 library. */ /* Define to any value to enable the 8 bit PCRE2 library. */
/* #undef SUPPORT_PCRE8 */ /* #undef SUPPORT_PCRE8 */
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding. /* Define to any value to enable support for Unicode and UTF encoding. This
This will work even in an EBCDIC environment, but it is incompatible with will work even in an EBCDIC environment, but it is incompatible with the
the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or* EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
ASCII/UTF-8/16/32, but not both at once. */ ASCII/Unicode, but not both at once. */
/* #undef SUPPORT_UTF */ /* #undef SUPPORT_UNICODE */
/* Define to any value for valgrind support to find invalid memory reads. */ /* Define to any value for valgrind support to find invalid memory reads. */
/* #undef SUPPORT_VALGRIND */ /* #undef SUPPORT_VALGRIND */

View File

@ -193,32 +193,32 @@ must all be greater than zero. */
#define PCRE2_ERROR_UTF32_ERR1 (-27) #define PCRE2_ERROR_UTF32_ERR1 (-27)
#define PCRE2_ERROR_UTF32_ERR2 (-28) #define PCRE2_ERROR_UTF32_ERR2 (-28)
/* Error codes for pcre2[_dfa]_match() */ /* Error codes for pcre2[_dfa]_match(), substring extraction functions, and
context functions. */
#define PCRE2_ERROR_BADCOUNT (-29) #define PCRE2_ERROR_BADDATA (-29)
#define PCRE2_ERROR_BADENDIANNESS (-30) #define PCRE2_ERROR_BADLENGTH (-30)
#define PCRE2_ERROR_BADLENGTH (-31) #define PCRE2_ERROR_BADMAGIC (-31)
#define PCRE2_ERROR_BADMAGIC (-32) #define PCRE2_ERROR_BADMODE (-32)
#define PCRE2_ERROR_BADMODE (-33) #define PCRE2_ERROR_BADOFFSET (-33)
#define PCRE2_ERROR_BADOFFSET (-34) #define PCRE2_ERROR_BADOPTION (-34)
#define PCRE2_ERROR_BADOPTION (-35) #define PCRE2_ERROR_BADUTFOFFSET (-35)
#define PCRE2_ERROR_BADUTFOFFSET (-36) #define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */ #define PCRE2_ERROR_DFA_BADRESTART (-37)
#define PCRE2_ERROR_DFA_BADRESTART (-38) #define PCRE2_ERROR_DFA_RECURSE (-38)
#define PCRE2_ERROR_DFA_RECURSE (-39) #define PCRE2_ERROR_DFA_UCOND (-39)
#define PCRE2_ERROR_DFA_UCOND (-40) #define PCRE2_ERROR_DFA_UITEM (-40)
#define PCRE2_ERROR_DFA_UITEM (-41) #define PCRE2_ERROR_DFA_WSSIZE (-41)
#define PCRE2_ERROR_DFA_UMLIMIT (-42) #define PCRE2_ERROR_INTERNAL (-42)
#define PCRE2_ERROR_DFA_WSSIZE (-43) #define PCRE2_ERROR_JIT_BADOPTION (-43)
#define PCRE2_ERROR_INTERNAL (-44) #define PCRE2_ERROR_JIT_STACKLIMIT (-44)
#define PCRE2_ERROR_JIT_BADOPTION (-45) #define PCRE2_ERROR_MATCHLIMIT (-45)
#define PCRE2_ERROR_JIT_STACKLIMIT (-46) #define PCRE2_ERROR_NOMEMORY (-46)
#define PCRE2_ERROR_MATCHLIMIT (-47) #define PCRE2_ERROR_NOSUBSTRING (-47)
#define PCRE2_ERROR_NOMEMORY (-48) #define PCRE2_ERROR_NULL (-48)
#define PCRE2_ERROR_NOSUBSTRING (-49) #define PCRE2_ERROR_RECURSELOOP (-49)
#define PCRE2_ERROR_NULL (-50) #define PCRE2_ERROR_RECURSIONLIMIT (-50)
#define PCRE2_ERROR_RECURSELOOP (-51) #define PCRE2_ERROR_UNSET (-51)
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */
@ -257,8 +257,8 @@ must all be greater than zero. */
#define PCRE2_CONFIG_PARENSLIMIT 7 #define PCRE2_CONFIG_PARENSLIMIT 7
#define PCRE2_CONFIG_RECURSIONLIMIT 5 #define PCRE2_CONFIG_RECURSIONLIMIT 5
#define PCRE2_CONFIG_STACKRECURSE 8 #define PCRE2_CONFIG_STACKRECURSE 8
#define PCRE2_CONFIG_UNICODE_VERSION 9 #define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UTF 10 #define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11 #define PCRE2_CONFIG_VERSION 11
/* Types for code units in patterns and subject strings. */ /* Types for code units in patterns and subject strings. */
@ -338,7 +338,7 @@ expanded for each width below. Start with functions that give general
information. */ information. */
#define PCRE2_GENERAL_INFO_FUNCTIONS \ #define PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_EXP_DECL int pcre2_config(int, void *, PCRE2_SIZE); PCRE2_EXP_DECL int pcre2_config(uint32_t, void *, PCRE2_SIZE);
/* Functions for manipulating contexts. */ /* Functions for manipulating contexts. */
@ -437,16 +437,16 @@ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \ PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \ PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \ PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
int, PCRE2_UCHAR *, PCRE2_SIZE *); \ unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \ PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \ PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \ PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \ PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
int, PCRE2_UCHAR **, PCRE2_SIZE *); \ unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \ PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
PCRE2_SPTR, PCRE2_SIZE *); \ PCRE2_SPTR, PCRE2_SIZE *); \
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \ PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
int, PCRE2_SIZE *); \ unsigned int, PCRE2_SIZE *); \
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \ PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \ PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\ PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
@ -622,24 +622,27 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_OTHER_FUNCTIONS #undef PCRE2_OTHER_FUNCTIONS
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS #undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
/* Re-define PCRE2_SUFFIX to use the external width value, if defined. /* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
Otherwise, undefine the other macros and make PCRE2_SUFFIX a no-op, to reduce PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
confusion. */ PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
#undef PCRE2_SUFFIX #undef PCRE2_SUFFIX
#ifdef PCRE2_CODE_UNIT_WIDTH #ifndef PCRE2_CODE_UNIT_WIDTH
#if PCRE2_CODE_UNIT_WIDTH != 8 && \ #error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
PCRE2_CODE_UNIT_WIDTH != 16 && \ #error Use 8, 16, or 32; or 0 for a multi-width application.
PCRE2_CODE_UNIT_WIDTH != 32 #else /* PCRE2_CODE_UNIT_WIDTH is defined */
#error PCRE2_CODE_UNIT_WIDTH must be 8, 16, or 32 #if PCRE2_CODE_UNIT_WIDTH == 8 || \
#endif PCRE2_CODE_UNIT_WIDTH == 16 || \
PCRE2_CODE_UNIT_WIDTH == 32
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH) #define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
#else #elif PCRE2_CODE_UNIT_WIDTH == 0
#undef PCRE2_JOIN #undef PCRE2_JOIN
#undef PCRE2_GLUE #undef PCRE2_GLUE
#define PCRE2_SUFFIX(a) a #define PCRE2_SUFFIX(a) a
#else
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
#endif #endif
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */

View File

@ -231,7 +231,7 @@ static const uint8_t opcode_possessify[] = {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
/************************************************* /*************************************************
* Check a character and a property * * Check a character and a property *
*************************************************/ *************************************************/
@ -311,7 +311,7 @@ switch(ptype)
return FALSE; return FALSE;
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
@ -368,7 +368,7 @@ PCRE2_UCHAR base;
PCRE2_SPTR end; PCRE2_SPTR end;
uint32_t chr; uint32_t chr;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
uint32_t *clist_dest; uint32_t *clist_dest;
const uint32_t *clist_src; const uint32_t *clist_src;
#else #else
@ -451,7 +451,7 @@ switch(c)
GETCHARINCTEST(chr, code); GETCHARINCTEST(chr, code);
list[2] = chr; list[2] = chr;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (chr < 128 || (chr < 256 && !utf)) if (chr < 128 || (chr < 256 && !utf))
list[3] = fcc[chr]; list[3] = fcc[chr];
else else
@ -470,7 +470,7 @@ switch(c)
list[4] = NOTACHAR; list[4] = NOTACHAR;
return code; return code;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
case OP_PROP: case OP_PROP:
case OP_NOTPROP: case OP_NOTPROP:
if (code[0] != PT_CLIST) if (code[0] != PT_CLIST)
@ -812,7 +812,7 @@ for(;;)
leftop = base_list[0]; leftop = base_list[0];
rightop = list[0]; rightop = list[0];
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
accepted = FALSE; /* Always set in non-unicode case. */ accepted = FALSE; /* Always set in non-unicode case. */
if (leftop == OP_PROP || leftop == OP_NOTPROP) if (leftop == OP_PROP || leftop == OP_NOTPROP)
{ {
@ -915,7 +915,7 @@ for(;;)
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP && accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP && rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
@ -1039,7 +1039,7 @@ for(;;)
case OP_EOD: /* Can always possessify before \z */ case OP_EOD: /* Can always possessify before \z */
break; break;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
case OP_PROP: case OP_PROP:
case OP_NOTPROP: case OP_NOTPROP:
if (!check_char_prop(chr, list_ptr[2], list_ptr[3], if (!check_char_prop(chr, list_ptr[2], list_ptr[3],

View File

@ -433,7 +433,7 @@ static const int posix_class_maps[] = {
/* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by /* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by
Unicode property escapes. */ Unicode property escapes. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
static const PCRE2_UCHAR string_PNd[] = { static const PCRE2_UCHAR string_PNd[] = {
CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
@ -541,7 +541,7 @@ static PCRE2_SPTR posix_substitutes[] = {
NULL /* ^xdigit */ NULL /* ^xdigit */
}; };
#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *)) #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *))
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Masks for checking option settings. */ /* Masks for checking option settings. */
@ -887,7 +887,7 @@ for (;;)
case OP_NOTI: case OP_NOTI:
branchlength++; branchlength++;
cc += 2; cc += 2;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif #endif
break; break;
@ -901,7 +901,7 @@ for (;;)
case OP_NOTEXACTI: case OP_NOTEXACTI:
branchlength += (int)GET2(cc,1); branchlength += (int)GET2(cc,1);
cc += 2 + IMM2_SIZE; cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif #endif
break; break;
@ -1315,7 +1315,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
actual length is stored in the compiled code, so we must update "code" actual length is stored in the compiled code, so we must update "code"
here. */ here. */
#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
case OP_XCLASS: case OP_XCLASS:
ccode = code += GET(code, 1); ccode = code += GET(code, 1);
goto CHECK_CLASS_REPEAT; goto CHECK_CLASS_REPEAT;
@ -1325,7 +1325,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
case OP_NCLASS: case OP_NCLASS:
ccode = code + PRIV(OP_lengths)[OP_CLASS]; ccode = code + PRIV(OP_lengths)[OP_CLASS];
#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
CHECK_CLASS_REPEAT: CHECK_CLASS_REPEAT:
#endif #endif
@ -2062,7 +2062,7 @@ return escape;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
/************************************************* /*************************************************
* Handle \P and \p * * Handle \P and \p *
*************************************************/ *************************************************/
@ -2678,7 +2678,7 @@ return -1;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
/************************************************* /*************************************************
* Get othercase range * * Get othercase range *
*************************************************/ *************************************************/
@ -2740,7 +2740,7 @@ for (++c; c <= d; c++)
*cptr = c; /* Rest of input range */ *cptr = c; /* Rest of input range */
return 0; return 0;
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
@ -2780,7 +2780,7 @@ range. */
if ((options & PCRE2_CASELESS) != 0) if ((options & PCRE2_CASELESS) != 0)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if ((options & PCRE2_UTF) != 0) if ((options & PCRE2_UTF) != 0)
{ {
int rc; int rc;
@ -2810,7 +2810,7 @@ if ((options & PCRE2_CASELESS) != 0)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Not UTF mode */ /* Not UTF mode */
@ -2844,7 +2844,7 @@ if (end >= start)
{ {
PCRE2_UCHAR *uchardata = *uchardptr; PCRE2_UCHAR *uchardata = *uchardptr;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if ((options & PCRE2_UTF) != 0) if ((options & PCRE2_UTF) != 0)
{ {
if (start < end) if (start < end)
@ -2860,7 +2860,7 @@ if (end >= start)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Without UTF support, character values are constrained by the bit length, /* Without UTF support, character values are constrained by the bit length,
and can only be > 256 for 16-bit and 32-bit libraries. */ and can only be > 256 for 16-bit and 32-bit libraries. */
@ -3042,7 +3042,7 @@ uint8_t classbits[32];
not do this for other options (e.g. PCRE2_EXTENDED) because they may change not do this for other options (e.g. PCRE2_EXTENDED) because they may change
dynamically as we process the pattern. */ dynamically as we process the pattern. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
BOOL utf = (options & PCRE2_UTF) != 0; BOOL utf = (options & PCRE2_UTF) != 0;
#if PCRE2_CODE_UNIT_WIDTH != 32 #if PCRE2_CODE_UNIT_WIDTH != 32
PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */ PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */
@ -3235,7 +3235,7 @@ for (;; ptr++)
break; break;
} }
ptr++; ptr++;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) FORWARDCHAR(ptr); if (utf) FORWARDCHAR(ptr);
#endif #endif
} }
@ -3474,7 +3474,7 @@ for (;; ptr++)
goto FAILED; goto FAILED;
} }
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(c)) if (utf && HAS_EXTRALEN(c))
{ /* Braces are required because the */ { /* Braces are required because the */
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
@ -3556,7 +3556,7 @@ for (;; ptr++)
that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
directly. UCP support is not available unless UTF support is.*/ directly. UCP support is not available unless UTF support is.*/
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if ((options & PCRE2_UCP) != 0) if ((options & PCRE2_UCP) != 0)
{ {
unsigned int ptype = 0; unsigned int ptype = 0;
@ -3599,7 +3599,7 @@ for (;; ptr++)
break; break;
} }
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* In the non-UCP case, or when UCP makes no difference, we build the /* In the non-UCP case, or when UCP makes no difference, we build the
bit map for the POSIX class in a chunk of local store because we may be bit map for the POSIX class in a chunk of local store because we may be
@ -3689,7 +3689,7 @@ for (;; ptr++)
switch (escape) switch (escape)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
case ESC_du: /* These are the values given for \d etc */ case ESC_du: /* These are the values given for \d etc */
case ESC_DU: /* when PCRE2_UCP is set. We replace the */ case ESC_DU: /* when PCRE2_UCP is set. We replace the */
case ESC_wu: /* escape sequence with an appropriate \p */ case ESC_wu: /* escape sequence with an appropriate \p */
@ -3757,7 +3757,7 @@ for (;; ptr++)
cb, PRIV(vspace_list)); cb, PRIV(vspace_list));
break; break;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
case ESC_p: case ESC_p:
case ESC_P: case ESC_P:
{ {
@ -3840,7 +3840,7 @@ for (;; ptr++)
/* Otherwise, we have a potential range; pick up the next character */ /* Otherwise, we have a potential range; pick up the next character */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ /* Braces are required because the */ { /* Braces are required because the */
GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */
@ -3940,7 +3940,7 @@ for (;; ptr++)
if (negate_class) if (negate_class)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
int d; int d;
#endif #endif
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
@ -3951,7 +3951,7 @@ for (;; ptr++)
one other case. If so, generate a special OP_NOTPROP item instead of one other case. If so, generate a special OP_NOTPROP item instead of
OP_NOTI. */ OP_NOTI. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_CASELESS) != 0 && if (utf && (options & PCRE2_CASELESS) != 0 &&
(d = UCD_CASESET(c)) != 0) (d = UCD_CASESET(c)) != 0)
{ {
@ -4032,7 +4032,7 @@ for (;; ptr++)
be listed) there are no characters < 256, we can omit the bitmap in the be listed) there are no characters < 256, we can omit the bitmap in the
actual compiled code. */ actual compiled code. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0)) if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0))
#elif PCRE2_CODE_UNIT_WIDTH != 8 #elif PCRE2_CODE_UNIT_WIDTH != 8
if (xclass && !should_flip_negation) if (xclass && !should_flip_negation)
@ -4157,7 +4157,7 @@ for (;; ptr++)
break; break;
} }
p++; p++;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) FORWARDCHAR(p); if (utf) FORWARDCHAR(p);
#endif #endif
} /* Loop for comment characters */ } /* Loop for comment characters */
@ -4265,7 +4265,7 @@ for (;; ptr++)
/* If previous was a character type match (\d or similar), abolish it and /* If previous was a character type match (\d or similar), abolish it and
create a suitable repeat item. The code is shared with single-character create a suitable repeat item. The code is shared with single-character
repeats by setting op_type to add a suitable offset into repeat_type. Note repeats by setting op_type to add a suitable offset into repeat_type. Note
the the Unicode property types will be present only when SUPPORT_UTF is the the Unicode property types will be present only when SUPPORT_UNICODE is
defined, but we don't wrap the little bits of code here because it just defined, but we don't wrap the little bits of code here because it just
makes it horribly messy. */ makes it horribly messy. */
@ -4880,7 +4880,7 @@ for (;; ptr++)
case OP_NOTEXACT: case OP_NOTEXACT:
case OP_NOTEXACTI: case OP_NOTEXACTI:
tempcode += PRIV(OP_lengths)[*tempcode]; tempcode += PRIV(OP_lengths)[*tempcode];
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(tempcode[-1])) if (utf && HAS_EXTRALEN(tempcode[-1]))
tempcode += GET_EXTRALEN(tempcode[-1]); tempcode += GET_EXTRALEN(tempcode[-1]);
#endif #endif
@ -6407,7 +6407,7 @@ for (;; ptr++)
/* So are Unicode property matches, if supported. */ /* So are Unicode property matches, if supported. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
else if (escape == ESC_P || escape == ESC_p) else if (escape == ESC_P || escape == ESC_p)
{ {
BOOL negated; BOOL negated;
@ -6442,7 +6442,7 @@ for (;; ptr++)
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
cb->max_lookbehind == 0) cb->max_lookbehind == 0)
cb->max_lookbehind = 1; cb->max_lookbehind = 1;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (escape >= ESC_DU && escape <= ESC_wu) if (escape >= ESC_DU && escape <= ESC_wu)
{ {
nestptr = ptr + 1; /* Where to resume */ nestptr = ptr + 1; /* Where to resume */
@ -6479,7 +6479,7 @@ for (;; ptr++)
mclength = 1; mclength = 1;
mcbuffer[0] = c; mcbuffer[0] = c;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(c)) if (utf && HAS_EXTRALEN(c))
ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
#endif #endif
@ -6493,7 +6493,7 @@ for (;; ptr++)
/* For caseless UTF mode, check whether this character has more than one /* For caseless UTF mode, check whether this character has more than one
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */ other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_CASELESS) != 0) if (utf && (options & PCRE2_CASELESS) != 0)
{ {
GETCHAR(c, mcbuffer); GETCHAR(c, mcbuffer);
@ -7527,7 +7527,7 @@ ptr += skipatstart;
/* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */ /* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
#ifndef SUPPORT_UTF #ifndef SUPPORT_UNICODE
if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0) if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
{ {
errorcode = ERR32; errorcode = ERR32;
@ -7911,7 +7911,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
points and cannot have another case. In 16-bit and 32-bit modes, we can points and cannot have another case. In 16-bit and 32-bit modes, we can
check wide characters when UTF (and therefore UCP) is supported. */ check wide characters when UTF (and therefore UCP) is supported. */
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
else if (firstcu <= MAX_UTF_CODE_POINT && else if (firstcu <= MAX_UTF_CODE_POINT &&
UCD_OTHERCASE(firstcu) != firstcu) UCD_OTHERCASE(firstcu) != firstcu)
re->flags |= PCRE2_FIRSTCASELESS; re->flags |= PCRE2_FIRSTCASELESS;
@ -7945,7 +7945,7 @@ if (reqcuflags >= 0 &&
{ {
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS; if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
} }
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu) else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
re->flags |= PCRE2_LASTCASELESS; re->flags |= PCRE2_LASTCASELESS;
#endif #endif

View File

@ -75,7 +75,7 @@ Returns: 0 if data returned
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_config(int what, void *where, size_t length) pcre2_config(uint32_t what, void *where, size_t length)
{ {
if (length < sizeof(int)) return PCRE2_ERROR_BADLENGTH; if (length < sizeof(int)) return PCRE2_ERROR_BADLENGTH;
@ -145,7 +145,7 @@ switch (what)
case PCRE2_CONFIG_UNICODE_VERSION: case PCRE2_CONFIG_UNICODE_VERSION:
{ {
#if defined SUPPORT_UTF #if defined SUPPORT_UNICODE
const char *v = PRIV(unicode_version); const char *v = PRIV(unicode_version);
#else #else
const char *v = "Unicode not supported"; const char *v = "Unicode not supported";
@ -158,8 +158,8 @@ switch (what)
} }
break; break;
case PCRE2_CONFIG_UTF: case PCRE2_CONFIG_UNICODE:
#if defined SUPPORT_UTF #if defined SUPPORT_UNICODE
*((int *)where) = 1; *((int *)where) = 1;
#else #else
*((int *)where) = 0; *((int *)where) = 0;

View File

@ -263,8 +263,9 @@ if (mcontext != NULL)
* Set values in contexts * * Set values in contexts *
*************************************************/ *************************************************/
/* All these functions return 1 for success or 0 if invalid data is given. Only /* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
some of the functions are able to test the validity of the data. */ data is given. Only some of the functions are able to test the validity of the
data. */
/* ------------ Compile contexts ------------ */ /* ------------ Compile contexts ------------ */
@ -274,7 +275,7 @@ pcre2_set_character_tables(pcre2_compile_context *ccontext,
const unsigned char *tables) const unsigned char *tables)
{ {
ccontext->tables = tables; ccontext->tables = tables;
return 1; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -285,10 +286,10 @@ switch(value)
case PCRE2_BSR_ANYCRLF: case PCRE2_BSR_ANYCRLF:
case PCRE2_BSR_UNICODE: case PCRE2_BSR_UNICODE:
ccontext->bsr_convention = value; ccontext->bsr_convention = value;
return 1; return 0;
default: default:
return 0; return PCRE2_ERROR_BADDATA;
} }
} }
@ -303,10 +304,10 @@ switch(newline)
case PCRE2_NEWLINE_ANY: case PCRE2_NEWLINE_ANY:
case PCRE2_NEWLINE_ANYCRLF: case PCRE2_NEWLINE_ANYCRLF:
ccontext->newline_convention = newline; ccontext->newline_convention = newline;
return 1; return 0;
default: default:
return 0; return PCRE2_ERROR_BADDATA;
} }
} }
@ -314,7 +315,7 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit) pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
{ {
ccontext->parens_nest_limit = limit; ccontext->parens_nest_limit = limit;
return 1; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -322,7 +323,7 @@ pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard)(uint32_t)) int (*guard)(uint32_t))
{ {
ccontext->stack_guard = guard; ccontext->stack_guard = guard;
return 1; return 0;
} }
@ -336,10 +337,10 @@ switch(value)
case PCRE2_BSR_ANYCRLF: case PCRE2_BSR_ANYCRLF:
case PCRE2_BSR_UNICODE: case PCRE2_BSR_UNICODE:
mcontext->bsr_convention = value; mcontext->bsr_convention = value;
return 1; return 0;
default: default:
return 0; return PCRE2_ERROR_BADDATA;
} }
} }
@ -354,10 +355,10 @@ switch(newline)
case PCRE2_NEWLINE_ANY: case PCRE2_NEWLINE_ANY:
case PCRE2_NEWLINE_ANYCRLF: case PCRE2_NEWLINE_ANYCRLF:
mcontext->newline_convention = newline; mcontext->newline_convention = newline;
return 1; return 0;
default: default:
return 0; return PCRE2_ERROR_BADDATA;
} }
} }
@ -367,21 +368,21 @@ pcre2_set_callout(pcre2_match_context *mcontext,
{ {
mcontext->callout = callout; mcontext->callout = callout;
mcontext->callout_data = callout_data; mcontext->callout_data = callout_data;
return 1; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit) pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
{ {
mcontext->match_limit = limit; mcontext->match_limit = limit;
return 1; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit) pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
{ {
mcontext->recursion_limit = limit; mcontext->recursion_limit = limit;
return 1; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -399,7 +400,7 @@ mcontext->stack_memctl.memory_data = mydata;
(void)myfree; (void)myfree;
(void)mydata; (void)mydata;
#endif #endif
return 1; return 0;
} }
/* End of pcre2_context.c */ /* End of pcre2_context.c */

View File

@ -391,7 +391,7 @@ PCRE2_SPTR start_subject = mb->start_subject;
PCRE2_SPTR end_subject = mb->end_subject; PCRE2_SPTR end_subject = mb->end_subject;
PCRE2_SPTR start_code = mb->start_code; PCRE2_SPTR start_code = mb->start_code;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
BOOL utf = (mb->poptions & PCRE2_UTF) != 0; BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
#else #else
BOOL utf = FALSE; BOOL utf = FALSE;
@ -447,7 +447,7 @@ if (*first_op == OP_REVERSE)
/* If we can't go back the amount required for the longest lookbehind /* If we can't go back the amount required for the longest lookbehind
pattern, go back as far as we can; some alternatives may still be viable. */ pattern, go back as far as we can; some alternatives may still be viable. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
/* In character mode we have to step back character by character */ /* In character mode we have to step back character by character */
if (utf) if (utf)
@ -570,11 +570,11 @@ for (;;)
if (ptr < end_subject) if (ptr < end_subject)
{ {
clen = 1; /* Number of data items in the character */ clen = 1; /* Number of data items in the character */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
GETCHARLENTEST(c, ptr, clen); GETCHARLENTEST(c, ptr, clen);
#else #else
c = *ptr; c = *ptr;
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
} }
else else
{ {
@ -652,9 +652,9 @@ for (;;)
if (coptable[codevalue] > 0) if (coptable[codevalue] > 0)
{ {
dlen = 1; dlen = 1;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
d = code[coptable[codevalue]]; d = code[coptable[codevalue]];
if (codevalue >= OP_TYPESTAR) if (codevalue >= OP_TYPESTAR)
{ {
@ -948,11 +948,11 @@ for (;;)
{ {
PCRE2_SPTR temp = ptr - 1; PCRE2_SPTR temp = ptr - 1;
if (temp < mb->start_used_ptr) mb->start_used_ptr = temp; if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (utf) { BACKCHAR(temp); } if (utf) { BACKCHAR(temp); }
#endif #endif
GETCHARTEST(d, temp); GETCHARTEST(d, temp);
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if ((mb->poptions & PCRE2_UCP) != 0) if ((mb->poptions & PCRE2_UCP) != 0)
{ {
if (d == '_') left_word = TRUE; else if (d == '_') left_word = TRUE; else
@ -972,12 +972,12 @@ for (;;)
if (ptr >= mb->last_used_ptr) if (ptr >= mb->last_used_ptr)
{ {
PCRE2_SPTR temp = ptr + 1; PCRE2_SPTR temp = ptr + 1;
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (utf) { FORWARDCHAR(temp); } if (utf) { FORWARDCHAR(temp); }
#endif #endif
mb->last_used_ptr = temp; mb->last_used_ptr = temp;
} }
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if ((mb->poptions & PCRE2_UCP) != 0) if ((mb->poptions & PCRE2_UCP) != 0)
{ {
if (c == '_') right_word = TRUE; else if (c == '_') right_word = TRUE; else
@ -1003,7 +1003,7 @@ for (;;)
if the support is in the binary; otherwise a compile-time error occurs. if the support is in the binary; otherwise a compile-time error occurs.
*/ */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
case OP_PROP: case OP_PROP:
case OP_NOTPROP: case OP_NOTPROP:
if (clen > 0) if (clen > 0)
@ -1258,7 +1258,7 @@ for (;;)
argument. It keeps the code above fast for the other cases. The argument argument. It keeps the code above fast for the other cases. The argument
is in the d variable. */ is in the d variable. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
case OP_PROP_EXTRA + OP_TYPEPLUS: case OP_PROP_EXTRA + OP_TYPEPLUS:
case OP_PROP_EXTRA + OP_TYPEMINPLUS: case OP_PROP_EXTRA + OP_TYPEMINPLUS:
case OP_PROP_EXTRA + OP_TYPEPOSPLUS: case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
@ -1501,7 +1501,7 @@ for (;;)
break; break;
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
case OP_PROP_EXTRA + OP_TYPEQUERY: case OP_PROP_EXTRA + OP_TYPEQUERY:
case OP_PROP_EXTRA + OP_TYPEMINQUERY: case OP_PROP_EXTRA + OP_TYPEMINQUERY:
case OP_PROP_EXTRA + OP_TYPEPOSQUERY: case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
@ -1785,7 +1785,7 @@ for (;;)
break; break;
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
case OP_PROP_EXTRA + OP_TYPEEXACT: case OP_PROP_EXTRA + OP_TYPEEXACT:
case OP_PROP_EXTRA + OP_TYPEUPTO: case OP_PROP_EXTRA + OP_TYPEUPTO:
case OP_PROP_EXTRA + OP_TYPEMINUPTO: case OP_PROP_EXTRA + OP_TYPEMINUPTO:
@ -2063,7 +2063,7 @@ for (;;)
case OP_CHARI: case OP_CHARI:
if (clen == 0) break; if (clen == 0) break;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
@ -2077,7 +2077,7 @@ for (;;)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Not UTF mode */ /* Not UTF mode */
{ {
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d)) if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
@ -2086,7 +2086,7 @@ for (;;)
break; break;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
/* This is a tricky one because it can match more than one character. /* This is a tricky one because it can match more than one character.
Find out how many characters to skip, and then set up a negative state Find out how many characters to skip, and then set up a negative state
@ -2222,11 +2222,11 @@ for (;;)
if (clen > 0) if (clen > 0)
{ {
unsigned int otherd; unsigned int otherd;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && d >= 128) if (utf && d >= 128)
otherd = UCD_OTHERCASE(d); otherd = UCD_OTHERCASE(d);
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
otherd = TABLE_GET(d, fcc, d); otherd = TABLE_GET(d, fcc, d);
if (c != d && c != otherd) if (c != d && c != otherd)
{ ADD_NEW(state_offset + dlen + 1, 0); } { ADD_NEW(state_offset + dlen + 1, 0); }
@ -2257,11 +2257,11 @@ for (;;)
uint32_t otherd = NOTACHAR; uint32_t otherd = NOTACHAR;
if (caseless) if (caseless)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && d >= 128) if (utf && d >= 128)
otherd = UCD_OTHERCASE(d); otherd = UCD_OTHERCASE(d);
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
otherd = TABLE_GET(d, fcc, d); otherd = TABLE_GET(d, fcc, d);
} }
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2300,11 +2300,11 @@ for (;;)
uint32_t otherd = NOTACHAR; uint32_t otherd = NOTACHAR;
if (caseless) if (caseless)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && d >= 128) if (utf && d >= 128)
otherd = UCD_OTHERCASE(d); otherd = UCD_OTHERCASE(d);
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
otherd = TABLE_GET(d, fcc, d); otherd = TABLE_GET(d, fcc, d);
} }
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2341,11 +2341,11 @@ for (;;)
uint32_t otherd = NOTACHAR; uint32_t otherd = NOTACHAR;
if (caseless) if (caseless)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && d >= 128) if (utf && d >= 128)
otherd = UCD_OTHERCASE(d); otherd = UCD_OTHERCASE(d);
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
otherd = TABLE_GET(d, fcc, d); otherd = TABLE_GET(d, fcc, d);
} }
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2374,11 +2374,11 @@ for (;;)
uint32_t otherd = NOTACHAR; uint32_t otherd = NOTACHAR;
if (caseless) if (caseless)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && d >= 128) if (utf && d >= 128)
otherd = UCD_OTHERCASE(d); otherd = UCD_OTHERCASE(d);
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
otherd = TABLE_GET(d, fcc, d); otherd = TABLE_GET(d, fcc, d);
} }
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2414,11 +2414,11 @@ for (;;)
uint32_t otherd = NOTACHAR; uint32_t otherd = NOTACHAR;
if (caseless) if (caseless)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && d >= 128) if (utf && d >= 128)
otherd = UCD_OTHERCASE(d); otherd = UCD_OTHERCASE(d);
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
otherd = TABLE_GET(d, fcc, d); otherd = TABLE_GET(d, fcc, d);
} }
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2747,7 +2747,7 @@ for (;;)
for (rc = rc*2 - 2; rc >= 0; rc -= 2) for (rc = rc*2 - 2; rc >= 0; rc -= 2)
{ {
int charcount = local_offsets[rc+1] - local_offsets[rc]; int charcount = local_offsets[rc+1] - local_offsets[rc];
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (utf) if (utf)
{ {
PCRE2_SPTR p = start_subject + local_offsets[rc]; PCRE2_SPTR p = start_subject + local_offsets[rc];
@ -2851,7 +2851,7 @@ for (;;)
PCRE2_SPTR p = ptr; PCRE2_SPTR p = ptr;
PCRE2_SPTR pp = local_ptr; PCRE2_SPTR pp = local_ptr;
charcount = (int)(pp - p); charcount = (int)(pp - p);
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
#endif #endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
@ -2933,7 +2933,7 @@ for (;;)
} }
else else
{ {
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (utf) if (utf)
{ {
PCRE2_SPTR p = start_subject + local_offsets[0]; PCRE2_SPTR p = start_subject + local_offsets[0];
@ -3106,14 +3106,24 @@ if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET; if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
/* Check that the first field in the block is the magic number. If it is not, /* FIXME: Remove BADENDIANNESS if saving/restoring is not to be implemented. */
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which /* Check that the first field in the block is the magic number. If it is not,
means that the pattern is likely compiled with different endianness. */ return with PCRE2_ERROR_BADMAGIC. */
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
#ifdef FIXME
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
this comment and code:
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER? return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC; PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
#endif
/* Check the code unit width. */ /* Check the code unit width. */
@ -3238,7 +3248,7 @@ switch(newline)
we must also check that a starting offset does not point into the middle of a we must also check that a starting offset does not point into the middle of a
multiunit character. */ multiunit character. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
{ {
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar)); match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
@ -3253,7 +3263,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
return PCRE2_ERROR_BADUTFOFFSET; return PCRE2_ERROR_BADUTFOFFSET;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Set up the first code unit to match, if available. The first_codeunit value /* Set up the first code unit to match, if available. The first_codeunit value
is never set for an anchored regular expression, but the anchoring may be is never set for an anchored regular expression, but the anchoring may be
@ -3270,7 +3280,7 @@ if (!anchored)
if ((re->flags & PCRE2_FIRSTCASELESS) != 0) if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
{ {
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu); first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu); if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
#endif #endif
} }
@ -3290,7 +3300,7 @@ if ((re->flags & PCRE2_LASTSET) != 0)
if ((re->flags & PCRE2_LASTCASELESS) != 0) if ((re->flags & PCRE2_LASTCASELESS) != 0)
{ {
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu); req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu); if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
#endif #endif
} }
@ -3327,7 +3337,7 @@ for (;;)
if (firstline) if (firstline)
{ {
PCRE2_SPTR t = start_match; PCRE2_SPTR t = start_match;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
while (t < mb->end_subject && !IS_NEWLINE(t)) while (t < mb->end_subject && !IS_NEWLINE(t))
@ -3362,7 +3372,7 @@ for (;;)
{ {
if (start_match > mb->start_subject + start_offset) if (start_match > mb->start_subject + start_offset)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
while (start_match < end_subject && !WAS_NEWLINE(start_match)) while (start_match < end_subject && !WAS_NEWLINE(start_match))
@ -3516,7 +3526,7 @@ for (;;)
if (firstline && IS_NEWLINE(start_match)) break; if (firstline && IS_NEWLINE(start_match)) break;
start_match++; start_match++;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
ACROSSCHAR(start_match < end_subject, *start_match, ACROSSCHAR(start_match < end_subject, *start_match,

View File

@ -198,35 +198,34 @@ static const char match_error_texts[] =
"UTF-16 error: isolated low surrogate\0" "UTF-16 error: isolated low surrogate\0"
"UTF-32 error: code points 0xd800-0xdfff are not defined\0" "UTF-32 error: code points 0xd800-0xdfff are not defined\0"
"UTF-32 error: code points greater than 0x10ffff are not defined\0" "UTF-32 error: code points greater than 0x10ffff are not defined\0"
"bad count value\0" "bad data value\0"
/* 30 */ /* 30 */
"pattern compiled with other endianness\0"
"bad length\0" "bad length\0"
"magic number missing\0" "magic number missing\0"
"pattern compiled in wrong mode: 8/16/32-bit error\0" "pattern compiled in wrong mode: 8/16/32-bit error\0"
"bad offset value\0" "bad offset value\0"
/* 35 */
"bad option value\0" "bad option value\0"
/* 35 */
"bad offset into UTF string\0" "bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */ "callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0" "invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0" "too much recursion for DFA matching\0"
/* 40 */
"backreference condition or recursion test not supported for DFA matching\0" "backreference condition or recursion test not supported for DFA matching\0"
/* 40 */
"item unsupported for DFA matching\0" "item unsupported for DFA matching\0"
"match limit not supported for DFA matching\0"
"workspace size exceeded in DFA matching\0" "workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0" "internal error - pattern overwritten?\0"
/* 45 */
"bad JIT option\0" "bad JIT option\0"
"JIT stack limit reached\0" "JIT stack limit reached\0"
/* 45 */
"match limit exceeded\0" "match limit exceeded\0"
"no more memory\0" "no more memory\0"
"unknown or unset substring\0" "unknown or unset substring\0"
/* 50 */
"NULL argument passed\0" "NULL argument passed\0"
"nested recursion at the same subject position\0" "nested recursion at the same subject position\0"
/* 50 */
"recursion limit exceeded\0" "recursion limit exceeded\0"
"requested value is not set\0"
; ;

View File

@ -38,11 +38,11 @@ POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
*/ */
/* We do not support both EBCDIC and UTF at the same time. The "configure" /* We do not support both EBCDIC and Unicode at the same time. The "configure"
script prevents both being selected, but not everybody uses "configure". */ script prevents both being selected, but not everybody uses "configure". */
#if defined EBCDIC && defined SUPPORT_UTF #if defined EBCDIC && defined SUPPORT_UNICODE
#error The use of both EBCDIC and SUPPORT_UTF is not supported. #error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
#endif #endif
/* Standard C headers */ /* Standard C headers */
@ -597,14 +597,14 @@ there are some longer strings as well.
This means that, on EBCDIC platforms, the PCRE library can handle either This means that, on EBCDIC platforms, the PCRE library can handle either
EBCDIC, or UTF-8, but not both. To support both in the same compiled library EBCDIC, or UTF-8, but not both. To support both in the same compiled library
would need different lookups depending on whether PCRE_UTF8 was set or not. would need different lookups depending on whether PCRE2_UTF was set or not.
This would make it impossible to use characters in switch/case statements, This would make it impossible to use characters in switch/case statements,
which would reduce performance. For a theoretical use (which nobody has asked which would reduce performance. For a theoretical use (which nobody has asked
for) in a minority area (EBCDIC platforms), this is not sensible. Any for) in a minority area (EBCDIC platforms), this is not sensible. Any
application that did need both could compile two versions of the library, using application that did need both could compile two versions of the library, using
macros to give the functions distinct names. */ macros to give the functions distinct names. */
#ifndef SUPPORT_UTF #ifndef SUPPORT_UNICODE
/* UTF-8 support is not enabled; use the platform-dependent character literals /* UTF-8 support is not enabled; use the platform-dependent character literals
so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
@ -920,7 +920,7 @@ a positive value. */
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#else /* SUPPORT_UTF */ #else /* SUPPORT_UNICODE */
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
@ -1189,7 +1189,7 @@ only. */
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* -------------------- End of character and string names -------------------*/ /* -------------------- End of character and string names -------------------*/
@ -1775,10 +1775,10 @@ typedef struct {
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */ /* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not /* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as
defined, so the following items are omitted. */ 0, so the following items are omitted. */
#ifdef PCRE2_CODE_UNIT_WIDTH #if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0
/* This is the largest non-UTF code point. */ /* This is the largest non-UTF code point. */

View File

@ -208,9 +208,9 @@ tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE #define MAX_255(c) TRUE
#define MAX_MARK ((1u << 8) - 1) #define MAX_MARK ((1u << 8) - 1)
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
#define SUPPORT_WIDE_CHARS #define SUPPORT_WIDE_CHARS
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
#define TABLE_GET(c, table, default) ((table)[c]) #define TABLE_GET(c, table, default) ((table)[c])
#else /* Code units are 16 or 32 bits */ #else /* Code units are 16 or 32 bits */
@ -246,7 +246,7 @@ complicated ones for UTF characters. GETCHARLENTEST and other macros are not
used when UTF is not supported. To make sure they can never even appear when used when UTF is not supported. To make sure they can never even appear when
UTF support is omitted, we don't even define them. */ UTF support is omitted, we don't even define them. */
#ifndef SUPPORT_UTF #ifndef SUPPORT_UNICODE
/* #define MAX_UTF_SINGLE_CU */ /* #define MAX_UTF_SINGLE_CU */
/* #define HAS_EXTRALEN(c) */ /* #define HAS_EXTRALEN(c) */
@ -263,7 +263,7 @@ UTF support is omitted, we don't even define them. */
/* #define FORWARDCHAR(eptr) */ /* #define FORWARDCHAR(eptr) */
/* #define ACROSSCHAR(condition, eptr, action) */ /* #define ACROSSCHAR(condition, eptr, action) */
#else /* SUPPORT_UTF */ #else /* SUPPORT_UNICODE */
/* ------------------- 8-bit support ------------------ */ /* ------------------- 8-bit support ------------------ */
@ -527,7 +527,7 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
#define PUTCHAR(c, p) (*p = c, 1) #define PUTCHAR(c, p) (*p = c, 1)
#endif /* UTF-32 character handling */ #endif /* UTF-32 character handling */
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Mode-dependent macros that have the same definition in all modes. */ /* Mode-dependent macros that have the same definition in all modes. */

View File

@ -145,7 +145,7 @@ static int
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr, match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr) match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
{ {
#if defined SUPPORT_UTF #if defined SUPPORT_UNICODE
BOOL utf = (mb->poptions & PCRE2_UTF) != 0; BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
#endif #endif
@ -173,7 +173,7 @@ length = mb->ovector[offset+1] - mb->ovector[offset];
if (caseless) if (caseless)
{ {
#if defined SUPPORT_UTF #if defined SUPPORT_UNICODE
if (utf) if (utf)
{ {
/* Match characters up to the end of the reference. NOTE: the number of /* Match characters up to the end of the reference. NOTE: the number of
@ -352,7 +352,7 @@ typedef struct heapframe {
struct heapframe *Xprevframe; struct heapframe *Xprevframe;
struct heapframe *Xnextframe; struct heapframe *Xnextframe;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
PCRE2_SPTR Xcharptr; PCRE2_SPTR Xcharptr;
#endif #endif
PCRE2_SPTR Xeptr; PCRE2_SPTR Xeptr;
@ -378,7 +378,7 @@ typedef struct heapframe {
uint32_t Xop; uint32_t Xop;
uint32_t Xsave_capture_last; uint32_t Xsave_capture_last;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
uint32_t Xprop_value; uint32_t Xprop_value;
int Xprop_type; int Xprop_type;
int Xprop_fail_result; int Xprop_fail_result;
@ -399,7 +399,7 @@ typedef struct heapframe {
eptrblock Xnewptrb; eptrblock Xnewptrb;
recursion_info Xnew_recursive; recursion_info Xnew_recursive;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
PCRE2_UCHAR Xocchars[6]; PCRE2_UCHAR Xocchars[6];
#endif #endif
} heapframe; } heapframe;
@ -610,7 +610,7 @@ HEAP_RECURSE:
/* Ditto for the local variables */ /* Ditto for the local variables */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
#define charptr frame->Xcharptr #define charptr frame->Xcharptr
#define prop_value frame->Xprop_value #define prop_value frame->Xprop_value
#define prop_type frame->Xprop_type #define prop_type frame->Xprop_type
@ -666,7 +666,7 @@ declarations can be cut out in a block. The only declarations within blocks
below are for variables that do not have to be preserved over a recursive call below are for variables that do not have to be preserved over a recursive call
to RMATCH(). */ to RMATCH(). */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
PCRE2_SPTR charptr; PCRE2_SPTR charptr;
#endif #endif
PCRE2_SPTR callpat; PCRE2_SPTR callpat;
@ -684,7 +684,7 @@ uint32_t number;
uint32_t op; uint32_t op;
uint32_t save_capture_last; uint32_t save_capture_last;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
uint32_t prop_value; uint32_t prop_value;
int prop_type; int prop_type;
int prop_fail_result; int prop_fail_result;
@ -721,7 +721,7 @@ the alternative names that are used. */
/* These statements are here to stop the compiler complaining about unitialized /* These statements are here to stop the compiler complaining about unitialized
variables. */ variables. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
prop_value = 0; prop_value = 0;
prop_fail_result = 0; prop_fail_result = 0;
#endif #endif
@ -742,7 +742,7 @@ call because it's quite a complicated macro. It has to be used in one
particular way. This shouldn't, however, impact performance when true recursion particular way. This shouldn't, however, impact performance when true recursion
is being used. */ is being used. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
utf = (mb->poptions & PCRE2_UTF) != 0; utf = (mb->poptions & PCRE2_UTF) != 0;
#else #else
utf = FALSE; utf = FALSE;
@ -1662,7 +1662,7 @@ for (;;)
back a number of characters, not bytes. */ back a number of characters, not bytes. */
case OP_REVERSE: case OP_REVERSE:
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
i = GET(ecode, 1); i = GET(ecode, 1);
@ -2197,7 +2197,7 @@ for (;;)
be "non-word" characters. Remember the earliest consulted character for be "non-word" characters. Remember the earliest consulted character for
partial matching. */ partial matching. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
/* Get status of previous character */ /* Get status of previous character */
@ -2257,7 +2257,7 @@ for (;;)
if (eptr == mb->start_subject) prev_is_word = FALSE; else if (eptr == mb->start_subject) prev_is_word = FALSE; else
{ {
if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1; if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if ((mb->poptions & PCRE2_UCP) != 0) if ((mb->poptions & PCRE2_UCP) != 0)
{ {
c = eptr[-1]; c = eptr[-1];
@ -2283,7 +2283,7 @@ for (;;)
else else
{ {
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1; if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if ((mb->poptions & PCRE2_UCP) != 0) if ((mb->poptions & PCRE2_UCP) != 0)
{ {
c = *eptr; c = *eptr;
@ -2334,7 +2334,7 @@ for (;;)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
eptr++; eptr++;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++); if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
#endif #endif
ecode++; ecode++;
@ -2550,7 +2550,7 @@ for (;;)
ecode++; ecode++;
break; break;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
/* Check the next character by Unicode property. We will get here only /* Check the next character by Unicode property. We will get here only
if the support is in the binary; otherwise a compile-time error occurs. */ if the support is in the binary; otherwise a compile-time error occurs. */
@ -2684,7 +2684,7 @@ for (;;)
CHECK_PARTIAL(); CHECK_PARTIAL();
ecode++; ecode++;
break; break;
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Match a back reference, possibly repeatedly. Look past the end of the /* Match a back reference, possibly repeatedly. Look past the end of the
@ -2955,7 +2955,7 @@ for (;;)
/* First, ensure the minimum number of matches are present. */ /* First, ensure the minimum number of matches are present. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
for (i = 1; i <= min; i++) for (i = 1; i <= min; i++)
@ -3007,7 +3007,7 @@ for (;;)
if (minimize) if (minimize)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
for (fi = min;; fi++) for (fi = min;; fi++)
@ -3063,7 +3063,7 @@ for (;;)
{ {
pp = eptr; pp = eptr;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
for (i = min; i < max; i++) for (i = min; i < max; i++)
@ -3232,7 +3232,7 @@ for (;;)
SCHECK_PARTIAL(); SCHECK_PARTIAL();
break; break;
} }
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
GETCHARLENTEST(c, eptr, len); GETCHARLENTEST(c, eptr, len);
#else #else
c = *eptr; c = *eptr;
@ -3248,7 +3248,7 @@ for (;;)
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (eptr-- == pp) break; /* Stop if tried at original pos */ if (eptr-- == pp) break; /* Stop if tried at original pos */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) BACKCHAR(eptr); if (utf) BACKCHAR(eptr);
#endif #endif
} }
@ -3262,7 +3262,7 @@ for (;;)
/* Match a single character, casefully */ /* Match a single character, casefully */
case OP_CHAR: case OP_CHAR:
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
length = 1; length = 1;
@ -3299,7 +3299,7 @@ for (;;)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
length = 1; length = 1;
@ -3334,7 +3334,7 @@ for (;;)
if (fc != dc) if (fc != dc)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (dc != UCD_OTHERCASE(fc)) if (dc != UCD_OTHERCASE(fc))
#endif #endif
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
@ -3342,7 +3342,7 @@ for (;;)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Not UTF mode */ /* Not UTF mode */
{ {
@ -3436,7 +3436,7 @@ for (;;)
for speed. */ for speed. */
REPEATCHAR: REPEATCHAR:
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
length = 1; length = 1;
@ -3527,7 +3527,7 @@ for (;;)
value of fc will always be < 128. */ value of fc will always be < 128. */
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* When not in UTF-8 mode, load a single-byte character. */ /* When not in UTF-8 mode, load a single-byte character. */
fc = *ecode++; fc = *ecode++;
@ -3547,11 +3547,11 @@ for (;;)
/* fc must be < 128 if UTF is enabled. */ /* fc must be < 128 if UTF is enabled. */
foc = mb->fcc[fc]; foc = mb->fcc[fc];
#else #else
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && fc > 127) if (utf && fc > 127)
foc = UCD_OTHERCASE(fc); foc = UCD_OTHERCASE(fc);
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
foc = TABLE_GET(fc, mb->fcc, fc); foc = TABLE_GET(fc, mb->fcc, fc);
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
@ -3682,7 +3682,7 @@ for (;;)
SCHECK_PARTIAL(); SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
register uint32_t ch, och; register uint32_t ch, och;
@ -3705,7 +3705,7 @@ for (;;)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
{ {
register uint32_t ch = ecode[1]; register uint32_t ch = ecode[1];
c = *eptr++; c = *eptr++;
@ -3803,14 +3803,14 @@ for (;;)
if (op >= OP_NOTSTARI) /* Caseless */ if (op >= OP_NOTSTARI) /* Caseless */
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && fc > 127) if (utf && fc > 127)
foc = UCD_OTHERCASE(fc); foc = UCD_OTHERCASE(fc);
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
foc = TABLE_GET(fc, mb->fcc, fc); foc = TABLE_GET(fc, mb->fcc, fc);
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
register uint32_t d; register uint32_t d;
@ -3826,7 +3826,7 @@ for (;;)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Not UTF mode */ /* Not UTF mode */
{ {
for (i = 1; i <= min; i++) for (i = 1; i <= min; i++)
@ -3845,7 +3845,7 @@ for (;;)
if (minimize) if (minimize)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
register uint32_t d; register uint32_t d;
@ -3864,7 +3864,7 @@ for (;;)
} }
} }
else else
#endif /*SUPPORT_UTF */ #endif /*SUPPORT_UNICODE */
/* Not UTF mode */ /* Not UTF mode */
{ {
for (fi = min;; fi++) for (fi = min;; fi++)
@ -3890,7 +3890,7 @@ for (;;)
{ {
pp = eptr; pp = eptr;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
register uint32_t d; register uint32_t d;
@ -3917,7 +3917,7 @@ for (;;)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Not UTF mode */ /* Not UTF mode */
{ {
for (i = min; i < max; i++) for (i = min; i < max; i++)
@ -3947,7 +3947,7 @@ for (;;)
else else
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
register uint32_t d; register uint32_t d;
@ -3981,7 +3981,7 @@ for (;;)
if (minimize) if (minimize)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
register uint32_t d; register uint32_t d;
@ -4025,7 +4025,7 @@ for (;;)
{ {
pp = eptr; pp = eptr;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
register uint32_t d; register uint32_t d;
@ -4144,7 +4144,7 @@ for (;;)
REPEATTYPE: REPEATTYPE:
ctype = *ecode++; /* Code for the character type */ ctype = *ecode++; /* Code for the character type */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (ctype == OP_PROP || ctype == OP_NOTPROP) if (ctype == OP_PROP || ctype == OP_NOTPROP)
{ {
prop_fail_result = ctype == OP_NOTPROP; prop_fail_result = ctype == OP_NOTPROP;
@ -4162,7 +4162,7 @@ for (;;)
if (min > 0) if (min > 0)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (prop_type >= 0) if (prop_type >= 0)
{ {
switch(prop_type) switch(prop_type)
@ -4378,11 +4378,11 @@ for (;;)
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Handle all other cases when the coding is UTF-8 */ /* Handle all other cases when the coding is UTF-8 */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) switch(ctype) if (utf) switch(ctype)
{ {
case OP_ANY: case OP_ANY:
@ -4631,7 +4631,7 @@ for (;;)
} /* End switch(ctype) */ } /* End switch(ctype) */
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Code for the non-UTF-8 case for minimum matching of operators other /* Code for the non-UTF-8 case for minimum matching of operators other
than OP_PROP and OP_NOTPROP. */ than OP_PROP and OP_NOTPROP. */
@ -4889,7 +4889,7 @@ for (;;)
if (minimize) if (minimize)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (prop_type >= 0) if (prop_type >= 0)
{ {
switch(prop_type) switch(prop_type)
@ -5138,9 +5138,9 @@ for (;;)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
for (fi = min;; fi++) for (fi = min;; fi++)
@ -5410,7 +5410,7 @@ for (;;)
{ {
pp = eptr; /* Remember where we started */ pp = eptr; /* Remember where we started */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (prop_type >= 0) if (prop_type >= 0)
{ {
switch(prop_type) switch(prop_type)
@ -5696,9 +5696,9 @@ for (;;)
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
switch(ctype) switch(ctype)
@ -5940,7 +5940,7 @@ for (;;)
} }
} }
else else
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Not UTF mode */ /* Not UTF mode */
{ {
switch(ctype) switch(ctype)
@ -6219,13 +6219,13 @@ switch (frame->Xwhere)
#ifdef SUPPORT_WIDE_CHARS #ifdef SUPPORT_WIDE_CHARS
LBL(20) LBL(21) LBL(20) LBL(21)
#endif #endif
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
LBL(16) LBL(18) LBL(16) LBL(18)
LBL(22) LBL(23) LBL(28) LBL(30) LBL(22) LBL(23) LBL(28) LBL(30)
LBL(32) LBL(34) LBL(42) LBL(46) LBL(32) LBL(34) LBL(42) LBL(46)
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
default: default:
return PCRE2_ERROR_INTERNAL; return PCRE2_ERROR_INTERNAL;
} }
@ -6398,14 +6398,21 @@ if (code == NULL || subject == NULL || match_data == NULL)
return PCRE2_ERROR_NULL; return PCRE2_ERROR_NULL;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET; if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
/* Check that the first field in the block is the magic number. If it is not, /* Check that the first field in the block is the magic number. */
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
means that the pattern is likely compiled with different endianness. */
#ifdef FIXME
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
this comment and code:
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER? return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC; PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
#endif
/* Check the code unit width. */ /* Check the code unit width. */
@ -6451,7 +6458,7 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
we must also check that a starting offset does not point into the middle of a we must also check that a starting offset does not point into the middle of a
multiunit character. */ multiunit character. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
{ {
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar)); match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
@ -6466,7 +6473,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
return PCRE2_ERROR_BADUTFOFFSET; return PCRE2_ERROR_BADUTFOFFSET;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* If the pattern was successfully studied with JIT support, run the JIT /* If the pattern was successfully studied with JIT support, run the JIT
executable instead of the rest of this function. Most options must be set at executable instead of the rest of this function. Most options must be set at
@ -6640,7 +6647,7 @@ if (!anchored)
if ((re->flags & PCRE2_FIRSTCASELESS) != 0) if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
{ {
first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu); first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu); if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
#endif #endif
} }
@ -6660,7 +6667,7 @@ if ((re->flags & PCRE2_LASTSET) != 0)
if ((re->flags & PCRE2_LASTCASELESS) != 0) if ((re->flags & PCRE2_LASTCASELESS) != 0)
{ {
req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu); req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu); if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
#endif #endif
} }
@ -6696,7 +6703,7 @@ for(;;)
if (firstline) if (firstline)
{ {
PCRE2_SPTR t = start_match; PCRE2_SPTR t = start_match;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
while (t < mb->end_subject && !IS_NEWLINE(t)) while (t < mb->end_subject && !IS_NEWLINE(t))
@ -6731,7 +6738,7 @@ for(;;)
{ {
if (start_match > mb->start_subject + start_offset) if (start_match > mb->start_subject + start_offset)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
while (start_match < end_subject && !WAS_NEWLINE(start_match)) while (start_match < end_subject && !WAS_NEWLINE(start_match))
@ -6905,7 +6912,7 @@ for(;;)
case MATCH_THEN: case MATCH_THEN:
mb->ignore_skip_arg = 0; mb->ignore_skip_arg = 0;
new_start_match = start_match + 1; new_start_match = start_match + 1;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
ACROSSCHAR(new_start_match < end_subject, *new_start_match, ACROSSCHAR(new_start_match < end_subject, *new_start_match,
new_start_match++); new_start_match++);

View File

@ -81,12 +81,12 @@ PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
{ {
uint32_t c; uint32_t c;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) { GETCHAR(c, ptr); } else c = *ptr; if (utf) { GETCHAR(c, ptr); } else c = *ptr;
#else #else
(void)utf; (void)utf;
c = *ptr; c = *ptr;
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
if (type == NLTYPE_ANYCRLF) switch(c) if (type == NLTYPE_ANYCRLF) switch(c)
{ {
@ -172,7 +172,7 @@ PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
uint32_t c; uint32_t c;
ptr--; ptr--;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
BACKCHAR(ptr); BACKCHAR(ptr);
@ -182,7 +182,7 @@ else c = *ptr;
#else #else
(void)utf; (void)utf;
c = *ptr; c = *ptr;
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
if (type == NLTYPE_ANYCRLF) switch(c) if (type == NLTYPE_ANYCRLF) switch(c)
{ {

View File

@ -50,10 +50,11 @@ into a UTF string. The behaviour is different for each code unit width. */
#include "pcre2_internal.h" #include "pcre2_internal.h"
/* If SUPPORT_UTF is not defined, this function will never be called. Supply a /* If SUPPORT_UNICODE is not defined, this function will never be called.
dummy function because some compilers do not like empty source modules. */ Supply a dummy function because some compilers do not like empty source
modules. */
#ifndef SUPPORT_UTF #ifndef SUPPORT_UNICODE
unsigned int unsigned int
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
{ {
@ -61,7 +62,7 @@ PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
(void)(buffer); (void)(buffer);
return 0; return 0;
} }
#else /* SUPPORT_UTF */ #else /* SUPPORT_UNICODE */
/************************************************* /*************************************************
@ -114,6 +115,6 @@ return 2;
return 1; return 1;
#endif #endif
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* End of pcre_ord2utf.c */ /* End of pcre_ord2utf.c */

View File

@ -56,11 +56,9 @@ Arguments:
what what information is required what what information is required
where where to put the information where where to put the information
Returns: 0 if data returned, negative on error Returns: 0 if data returned, negative on error or unset value
*/ */
/* FIXME: Remove BADENDIANNESS if saving/restoring is not to be implemented. */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
{ {
@ -69,13 +67,21 @@ const pcre2_real_code *re = (pcre2_real_code *)code;
if (re == NULL || where == NULL) return PCRE2_ERROR_NULL; if (re == NULL || where == NULL) return PCRE2_ERROR_NULL;
/* Check that the first field in the block is the magic number. If it is not, /* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to return with PCRE2_ERROR_BADMAGIC. */
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */ if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
#ifdef FIXME
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
this comment and code:
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER? return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC; PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
#endif
/* Check that this pattern was compiled in the correct bit mode */ /* Check that this pattern was compiled in the correct bit mode */
@ -151,6 +157,7 @@ switch(what)
case PCRE2_INFO_MATCHLIMIT: case PCRE2_INFO_MATCHLIMIT:
*((uint32_t *)where) = re->limit_match; *((uint32_t *)where) = re->limit_match;
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
break; break;
case PCRE2_INFO_MAXLOOKBEHIND: case PCRE2_INFO_MAXLOOKBEHIND:
@ -179,6 +186,7 @@ switch(what)
case PCRE2_INFO_RECURSIONLIMIT: case PCRE2_INFO_RECURSIONLIMIT:
*((uint32_t *)where) = re->limit_recursion; *((uint32_t *)where) = re->limit_recursion;
if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
break; break;
case PCRE2_INFO_SIZE: case PCRE2_INFO_SIZE:

View File

@ -94,7 +94,7 @@ BOOL one_code_unit = !utf;
/* If UTF is supported and requested, check for a valid single code unit. */ /* If UTF is supported and requested, check for a valid single code unit. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
@ -105,7 +105,7 @@ if (utf)
one_code_unit = (c & 0xfffff800u) != 0xd800u; one_code_unit = (c & 0xfffff800u) != 0xd800u;
#endif /* CODE_UNIT_WIDTH */ #endif /* CODE_UNIT_WIDTH */
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* Handle a valid one-code-unit character at any width. */ /* Handle a valid one-code-unit character at any width. */
@ -121,7 +121,7 @@ if (one_code_unit)
for each width. If UTF is not supported, control should never get here, but we for each width. If UTF is not supported, control should never get here, but we
need a return statement to keep the compiler happy. */ need a return statement to keep the compiler happy. */
#ifndef SUPPORT_UTF #ifndef SUPPORT_UNICODE
return 0; return 0;
#else #else
@ -178,7 +178,7 @@ as an indication. */
fprintf(f, "\\X{%x}", c); fprintf(f, "\\X{%x}", c);
return 0; return 0;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
} }
@ -221,7 +221,7 @@ into the main code, however, we just put one into this function. */
static const char * static const char *
get_ucpname(unsigned int ptype, unsigned int pvalue) get_ucpname(unsigned int ptype, unsigned int pvalue)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
int i; int i;
for (i = utt_size - 1; i >= 0; i--) for (i = utt_size - 1; i >= 0; i--)
{ {
@ -233,7 +233,7 @@ return (i >= 0)? utt_names + utt[i].name_offset : "??";
(void)ptype; (void)ptype;
(void)pvalue; (void)pvalue;
return "??"; return "??";
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
} }

View File

@ -228,7 +228,7 @@ for (;;)
case OP_NOTPOSPLUSI: case OP_NOTPOSPLUSI:
branchlength++; branchlength++;
cc += 2; cc += 2;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif #endif
break; break;
@ -249,7 +249,7 @@ for (;;)
case OP_NOTEXACTI: case OP_NOTEXACTI:
branchlength += GET2(cc,1); branchlength += GET2(cc,1);
cc += 2 + IMM2_SIZE; cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif #endif
break; break;
@ -297,7 +297,7 @@ for (;;)
appear, but leave the code, just in case.) */ appear, but leave the code, just in case.) */
case OP_ANYBYTE: case OP_ANYBYTE:
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) return -1; if (utf) return -1;
#endif #endif
branchlength++; branchlength++;
@ -536,7 +536,7 @@ for (;;)
case OP_NOTPOSQUERYI: case OP_NOTPOSQUERYI:
cc += PRIV(OP_lengths)[op]; cc += PRIV(OP_lengths)[op];
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif #endif
break; break;
@ -608,7 +608,7 @@ SET_BIT(c);
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find /* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
the end of the character, even when caseless. */ the end of the character, even when caseless. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
@ -617,7 +617,7 @@ if (utf)
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p); if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
#endif #endif
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* If caseless, handle the other case of the character. */ /* If caseless, handle the other case of the character. */
@ -671,7 +671,7 @@ set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
register uint32_t c; register uint32_t c;
for (c = 0; c < table_limit; c++) for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type]; re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (table_limit == 32) return; if (table_limit == 32) return;
for (c = 128; c < 256; c++) for (c = 128; c < 256; c++)
{ {
@ -712,7 +712,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
register uint32_t c; register uint32_t c;
for (c = 0; c < table_limit; c++) for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]); re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff; if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
#endif #endif
} }
@ -752,7 +752,7 @@ set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
register uint32_t c; register uint32_t c;
int yield = SSB_DONE; int yield = SSB_DONE;
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
int table_limit = utf? 16:32; int table_limit = utf? 16:32;
#else #else
int table_limit = 32; int table_limit = 32;
@ -866,7 +866,7 @@ do
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2]; const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
while ((c = *p++) < NOTACHAR) while ((c = *p++) < NOTACHAR)
{ {
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (utf) if (utf)
{ {
PCRE2_UCHAR buff[6]; PCRE2_UCHAR buff[6];
@ -1042,7 +1042,7 @@ do
/* For the 8-bit library in UTF-8 mode, set the bits for the first code /* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of horizontal space characters. */ units of horizontal space characters. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
SET_BIT(0xC2); /* For U+00A0 */ SET_BIT(0xC2); /* For U+00A0 */
@ -1081,7 +1081,7 @@ do
/* For the 8-bit library in UTF-8 mode, set the bits for the first code /* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of vertical space characters. */ units of vertical space characters. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
SET_BIT(0xC2); /* For U+0085 (NEL) */ SET_BIT(0xC2); /* For U+0085 (NEL) */
@ -1181,7 +1181,7 @@ do
/* For the 8-bit library in UTF-8 mode, set the bits for the first code /* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of horizontal space characters. */ units of horizontal space characters. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
SET_BIT(0xC2); /* For U+00A0 */ SET_BIT(0xC2); /* For U+00A0 */
@ -1218,7 +1218,7 @@ do
/* For the 8-bit library in UTF-8 mode, set the bits for the first code /* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of vertical space characters. */ units of vertical space characters. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
SET_BIT(0xC2); /* For U+0085 (NEL) */ SET_BIT(0xC2); /* For U+0085 (NEL) */
@ -1287,7 +1287,7 @@ do
character modes, set the 0xFF bit to indicate code units >= 255. */ character modes, set the 0xFF bit to indicate code units >= 255. */
case OP_NCLASS: case OP_NCLASS:
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (utf) if (utf)
{ {
re->start_bitmap[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ re->start_bitmap[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
@ -1318,7 +1318,7 @@ do
if (classmap != NULL) if (classmap != NULL)
{ {
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (utf) if (utf)
{ {
for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c]; for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];

View File

@ -108,8 +108,8 @@ Returns: if successful: 0
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_bynumber(pcre2_match_data *match_data, int stringnumber, pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr) unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{ {
PCRE2_SIZE left, right; PCRE2_SIZE left, right;
PCRE2_SIZE p = 0; PCRE2_SIZE p = 0;
@ -189,8 +189,8 @@ Returns: if successful: zero
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_bynumber(pcre2_match_data *match_data, int stringnumber, pcre2_substring_get_bynumber(pcre2_match_data *match_data,
PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr) unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{ {
PCRE2_SIZE left, right; PCRE2_SIZE left, right;
PCRE2_SIZE p = 0; PCRE2_SIZE p = 0;
@ -288,7 +288,7 @@ Returns: 0 if successful, else a negative error number
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_bynumber(pcre2_match_data *match_data, pcre2_substring_length_bynumber(pcre2_match_data *match_data,
int stringnumber, PCRE2_SIZE *sizeptr) unsigned int stringnumber, PCRE2_SIZE *sizeptr)
{ {
if (stringnumber >= match_data->oveccount || if (stringnumber >= match_data->oveccount ||
stringnumber > match_data->code->top_bracket || stringnumber > match_data->code->top_bracket ||

View File

@ -76,7 +76,7 @@ as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
handling wide characters. */ handling wide characters. */
#if defined PCRE2_PCRE2TEST || \ #if defined PCRE2_PCRE2TEST || \
(defined SUPPORT_UTF && \ (defined SUPPORT_UNICODE && \
defined PCRE2_CODE_UNIT_WIDTH && \ defined PCRE2_CODE_UNIT_WIDTH && \
PCRE2_CODE_UNIT_WIDTH == 8) PCRE2_CODE_UNIT_WIDTH == 8)
@ -106,7 +106,7 @@ const uint8_t PRIV(utf8_table4)[] = {
#endif /* UTF-8 support needed */ #endif /* UTF-8 support needed */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
/* Table to translate from particular type value to the general value. */ /* Table to translate from particular type value to the general value. */
@ -728,6 +728,6 @@ const ucp_type_table PRIV(utt)[] = {
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* End of pcre2_tables.c */ /* End of pcre2_tables.c */

View File

@ -32,7 +32,7 @@ condition to cut out the tables when not needed. But don't leave
a totally empty module because some compilers barf at that. a totally empty module because some compilers barf at that.
Instead, just supply small dummy tables. */ Instead, just supply small dummy tables. */
#ifndef SUPPORT_UTF #ifndef SUPPORT_UNICODE
const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }}; const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};
const uint8_t PRIV(ucd_stage1)[] = {0}; const uint8_t PRIV(ucd_stage1)[] = {0};
const uint16_t PRIV(ucd_stage2)[] = {0}; const uint16_t PRIV(ucd_stage2)[] = {0};
@ -3628,6 +3628,6 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */
#if UCD_BLOCK_SIZE != 128 #if UCD_BLOCK_SIZE != 128
#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h #error Please correct UCD_BLOCK_SIZE in pcre2_internal.h
#endif #endif
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
#endif /* PCRE2_PCRE2TEST */ #endif /* PCRE2_PCRE2TEST */

View File

@ -50,12 +50,12 @@ strings. */
#include "pcre2_internal.h" #include "pcre2_internal.h"
#ifndef SUPPORT_UTF #ifndef SUPPORT_UNICODE
/************************************************* /*************************************************
* Dummy function when UTF not supported * * Dummy function when Unicode is not supported *
*************************************************/ *************************************************/
/* This function should never be called when UTF is not supported. */ /* This function should never be called when Unicode is not supported. */
int int
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset) PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
@ -388,6 +388,6 @@ for (p = string; length-- > 0; p++)
return 0; return 0;
#endif /* CODE_UNIT_WIDTH */ #endif /* CODE_UNIT_WIDTH */
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
/* End of pcre2_valid_utf.c */ /* End of pcre2_valid_utf.c */

View File

@ -103,7 +103,7 @@ while ((t = *data++) != XCL_END)
uint32_t x, y; uint32_t x, y;
if (t == XCL_SINGLE) if (t == XCL_SINGLE)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
GETCHARINC(x, data); /* macro generates multiple statements */ GETCHARINC(x, data); /* macro generates multiple statements */
@ -115,7 +115,7 @@ while ((t = *data++) != XCL_END)
} }
else if (t == XCL_RANGE) else if (t == XCL_RANGE)
{ {
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
GETCHARINC(x, data); /* macro generates multiple statements */ GETCHARINC(x, data); /* macro generates multiple statements */
@ -130,7 +130,7 @@ while ((t = *data++) != XCL_END)
if (c >= x && c <= y) return !negated; if (c >= x && c <= y) return !negated;
} }
#ifdef SUPPORT_UTF #ifdef SUPPORT_UNICODE
else /* XCL_PROP & XCL_NOTPROP */ else /* XCL_PROP & XCL_NOTPROP */
{ {
const ucd_record *prop = GET_UCD(c); const ucd_record *prop = GET_UCD(c);
@ -262,7 +262,7 @@ while ((t = *data++) != XCL_END)
} }
#else #else
(void)utf; /* Avoid compiler warning */ (void)utf; /* Avoid compiler warning */
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UNICODE */
} }
return negated; /* char did not match */ return negated; /* char did not match */

View File

@ -196,6 +196,7 @@ so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
for building the library. */ for building the library. */
#define PRIV(name) name #define PRIV(name) name
#define PCRE2_CODE_UNIT_WIDTH 0
#include "pcre2.h" #include "pcre2.h"
#include "pcre2posix.h" #include "pcre2posix.h"
#include "pcre2_internal.h" #include "pcre2_internal.h"
@ -208,16 +209,17 @@ of PRIV avoids name clashes. */
#include "pcre2_tables.c" #include "pcre2_tables.c"
#include "pcre2_ucd.c" #include "pcre2_ucd.c"
/* When PCRE2_CODE_UNIT_WIDTH is unset, pcre2_internal.h does not include /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
pcre2_intmodedep.h, which is where mode-dependent macros and structures are pcre2_intmodedep.h, which is where mode-dependent macros and structures are
defined. We can now include it for each supported code unit width. Because defined. We can now include it for each supported code unit width. Because
PCRE2_CODE_UNIT_WIDTH was not defined before including pcre2.h, it will have PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately while have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
including these files, and then restore it to a no-op. Because LINK_SIZE may be while including these files, and then restore it to a no-op. Because LINK_SIZE
changed in 16-bit mode and forced to 1 in 32-bit mode, the order of these may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
inclusions should not be changed. */ these inclusions should not be changed. */
#undef PCRE2_SUFFIX #undef PCRE2_SUFFIX
#undef PCRE2_CODE_UNIT_WIDTH
#ifdef SUPPORT_PCRE8 #ifdef SUPPORT_PCRE8
#define PCRE2_CODE_UNIT_WIDTH 8 #define PCRE2_CODE_UNIT_WIDTH 8
@ -576,7 +578,7 @@ static coptstruct coptlist[] = {
{ "pcre16", CONF_FIX, SUPPORT_16 }, { "pcre16", CONF_FIX, SUPPORT_16 },
{ "pcre32", CONF_FIX, SUPPORT_32 }, { "pcre32", CONF_FIX, SUPPORT_32 },
{ "pcre8", CONF_FIX, SUPPORT_8 }, { "pcre8", CONF_FIX, SUPPORT_8 },
{ "utf", CONF_INT, PCRE2_CONFIG_UTF } { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
}; };
#define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct) #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
@ -2815,22 +2817,26 @@ pattern.
Arguments: Arguments:
what code for the required information what code for the required information
where where to put the answer where where to put the answer
unsetok PCRE2_ERROR_UNSET is an "expected" result
Returns: the return from pcre2_pattern_info() Returns: the return from pcre2_pattern_info()
*/ */
static int static int
pattern_info(int what, void *where) pattern_info(int what, void *where, BOOL unsetok)
{ {
int rc; int rc;
PCRE2_PATTERN_INFO(rc, compiled_code, what, where); PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
if (rc >= 0) return 0; if (rc >= 0) return 0;
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode, if (rc != PCRE2_ERROR_UNSET || !unsetok)
what); {
if (rc == PCRE2_ERROR_BADMODE) fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " what);
"%d-bit mode\n", test_mode, if (rc == PCRE2_ERROR_BADMODE)
8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK)); fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
"%d-bit mode\n", test_mode,
8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
}
return rc; return rc;
} }
@ -3026,32 +3032,61 @@ if ((pat_patctl.control & CTL_INFO) != 0)
{ {
const void *nametable; const void *nametable;
const uint8_t *start_bits; const uint8_t *start_bits;
BOOL match_limit_set, recursion_limit_set;
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit, hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit,
maxlookbehind, minlength, nameentrysize, namecount, newline_convention, maxlookbehind, minlength, nameentrysize, namecount, newline_convention,
recursion_limit; recursion_limit;
/* These info requests may return PCRE2_ERROR_UNSET. */
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
{
case 0:
match_limit_set = TRUE;
break;
case PCRE2_ERROR_UNSET:
match_limit_set = FALSE;
break;
default:
return PR_ABEND;
}
switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE))
{
case 0:
recursion_limit_set = TRUE;
break;
case PCRE2_ERROR_UNSET:
recursion_limit_set = FALSE;
break;
default:
return PR_ABEND;
}
/* These info requests should always succeed. */ /* These info requests should always succeed. */
if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax) + if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
pattern_info(PCRE2_INFO_BSR, &bsr_convention) + pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count) + pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits) + pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit) + pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype) + pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf) + pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
pattern_info(PCRE2_INFO_JCHANGED, &jchanged) + pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit) + pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype) + pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty) + pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit) + pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) +
pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind) + pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
pattern_info(PCRE2_INFO_MINLENGTH, &minlength) + pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
pattern_info(PCRE2_INFO_NAMECOUNT, &namecount) + pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize) + pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
pattern_info(PCRE2_INFO_NAMETABLE, &nametable) + pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
pattern_info(PCRE2_INFO_NEWLINE, &newline_convention) +
pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit)
!= 0) != 0)
return PR_ABEND; return PR_ABEND;
@ -3063,10 +3098,10 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (maxlookbehind > 0) if (maxlookbehind > 0)
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
if (match_limit != UINT32_MAX) if (match_limit_set)
fprintf(outfile, "Match limit = %u\n", match_limit); fprintf(outfile, "Match limit = %u\n", match_limit);
if (recursion_limit != UINT32_MAX) if (recursion_limit_set)
fprintf(outfile, "Recursion limit = %u\n", recursion_limit); fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
if (namecount > 0) if (namecount > 0)
@ -3099,8 +3134,8 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
if (match_empty) fprintf(outfile, "May match empty string\n"); if (match_empty) fprintf(outfile, "May match empty string\n");
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options); pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options); pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
cluttering up the verification output of non-UTF test files. */ cluttering up the verification output of non-UTF test files. */
@ -3234,7 +3269,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
{ {
size_t jitsize; size_t jitsize;
if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize) == 0) if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize, FALSE) == 0)
{ {
if (jitsize > 0) if (jitsize > 0)
fprintf(outfile, "JIT compilation was successful\n"); fprintf(outfile, "JIT compilation was successful\n");
@ -3625,14 +3660,14 @@ if ((pat_patctl.control & CTL_MEMORY) != 0)
if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32); if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32);
#endif #endif
(void)pattern_info(PCRE2_INFO_SIZE, &size); (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count); (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
fprintf(outfile, "Memory allocation (code space): %d\n", fprintf(outfile, "Memory allocation (code space): %d\n",
(int)(size - name_count*name_entry_size*code_unit_size - cblock_size)); (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
if (pat_patctl.jit != 0) if (pat_patctl.jit != 0)
{ {
(void)pattern_info(PCRE2_INFO_JITSIZE, &size); (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size); fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
} }
} }
@ -4452,7 +4487,7 @@ for (gmatched = 0;; gmatched++)
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
{ {
uint32_t maxcapcount; uint32_t maxcapcount;
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount) < 0) if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
return PR_SKIP; return PR_SKIP;
capcount = maxcapcount + 1; /* Allow for full match */ capcount = maxcapcount + 1; /* Allow for full match */
if (capcount > (int)dat_datctl.oveccount) capcount = dat_datctl.oveccount; if (capcount > (int)dat_datctl.oveccount) capcount = dat_datctl.oveccount;
@ -4943,7 +4978,7 @@ printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
printf(" pcre8 8 bit library support enabled [0, 1]\n"); printf(" pcre8 8 bit library support enabled [0, 1]\n");
printf(" pcre16 16 bit library support enabled [0, 1]\n"); printf(" pcre16 16 bit library support enabled [0, 1]\n");
printf(" pcre32 32 bit library support enabled [0, 1]\n"); printf(" pcre32 32 bit library support enabled [0, 1]\n");
printf(" utf Unicode Transformation Format supported [0, 1]\n"); printf(" unicode Unicode and UTF support enabled [0, 1]\n");
printf(" -d set default pattern control 'debug'\n"); printf(" -d set default pattern control 'debug'\n");
printf(" -dfa set default subject control 'dfa'\n"); printf(" -dfa set default subject control 'dfa'\n");
printf(" -help show usage information\n"); printf(" -help show usage information\n");
@ -5057,7 +5092,7 @@ printf(" 16-bit support\n");
printf(" 32-bit support\n"); printf(" 32-bit support\n");
#endif #endif
(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc, sizeof(rc)); (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &rc, sizeof(rc));
if (rc != 0) if (rc != 0)
printf(" UTF support (Unicode version %s)\n", uversion); printf(" UTF support (Unicode version %s)\n", uversion);
else else

14
testdata/grepoutput vendored
View File

@ -384,15 +384,15 @@ aaaaa2
010203040506 010203040506
RC=0 RC=0
======== STDERR ======== ======== STDERR ========
pcre2grep: pcre2_match() gave error -47 while matching this text: pcre2grep: pcre2_match() gave error -45 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
pcre2grep: pcre2_match() gave error -47 while matching this text: pcre2grep: pcre2_match() gave error -45 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded. pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops. pcre2grep: Check your regex for nested unlimited loops.
---------------------------- Test 38 ------------------------------ ---------------------------- Test 38 ------------------------------
This line contains a binary zero here >< for testing. This line contains a binary zero here >< for testing.
@ -510,23 +510,23 @@ In the middle of a line, PATTERN appears.
Check up on PATTERN near the end. Check up on PATTERN near the end.
RC=0 RC=0
---------------------------- Test 62 ----------------------------- ---------------------------- Test 62 -----------------------------
pcre2grep: pcre2_match() gave error -47 while matching text that starts: pcre2grep: pcre2_match() gave error -45 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read long so that it needs more than a single read
pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded. pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops. pcre2grep: Check your regex for nested unlimited loops.
RC=1 RC=1
---------------------------- Test 63 ----------------------------- ---------------------------- Test 63 -----------------------------
pcre2grep: pcre2_match() gave error -52 while matching text that starts: pcre2grep: pcre2_match() gave error -50 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read long so that it needs more than a single read
pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded. pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops. pcre2grep: Check your regex for nested unlimited loops.
RC=1 RC=1
---------------------------- Test 64 ------------------------------ ---------------------------- Test 64 ------------------------------

View File

@ -888,7 +888,7 @@ Subject length lower bound = 3
a\x{123}aa\=offset=1 a\x{123}aa\=offset=1
0: aa 0: aa
a\x{123}aa\=offset=2 a\x{123}aa\=offset=2
Error -36 (bad UTF-8 offset) Error -35 (bad UTF-8 offset)
a\x{123}aa\=offset=3 a\x{123}aa\=offset=3
0: aa 0: aa
a\x{123}aa\=offset=4 a\x{123}aa\=offset=4
@ -896,7 +896,7 @@ Error -36 (bad UTF-8 offset)
a\x{123}aa\=offset=5 a\x{123}aa\=offset=5
No match No match
a\x{123}aa\=offset=6 a\x{123}aa\=offset=6
Failed: error -34: bad offset value Failed: error -33: bad offset value
/\x{1234}+/Ii,utf /\x{1234}+/Ii,utf
Capturing subpattern count = 0 Capturing subpattern count = 0

View File

@ -787,9 +787,9 @@ Subject length lower bound = 3
a\x{123}aa\=offset=4 a\x{123}aa\=offset=4
No match No match
a\x{123}aa\=offset=5 a\x{123}aa\=offset=5
Failed: error -34: bad offset value Failed: error -33: bad offset value
a\x{123}aa\=offset=6 a\x{123}aa\=offset=6
Failed: error -34: bad offset value Failed: error -33: bad offset value
/\x{1234}+/Ii,utf /\x{1234}+/Ii,utf
Capturing subpattern count = 0 Capturing subpattern count = 0
@ -851,9 +851,9 @@ Subject length lower bound = 1
/a/utf /a/utf
\x{10000}\=offset=1 \x{10000}\=offset=1
Error -36 (bad UTF-16 offset) Error -35 (bad UTF-16 offset)
\x{10000}ab\=offset=1 \x{10000}ab\=offset=1
Error -36 (bad UTF-16 offset) Error -35 (bad UTF-16 offset)
\x{10000}ab\=offset=2 \x{10000}ab\=offset=2
0: a 0: a
\x{10000}ab\=offset=3 \x{10000}ab\=offset=3
@ -861,7 +861,7 @@ No match
\x{10000}ab\=offset=4 \x{10000}ab\=offset=4
No match No match
\x{10000}ab\=offset=5 \x{10000}ab\=offset=5
Failed: error -34: bad offset value Failed: error -33: bad offset value
/<2F><><EFBFBD>/utf /<2F><><EFBFBD>/utf
Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate

View File

@ -779,9 +779,9 @@ Subject length lower bound = 3
a\x{123}aa\=offset=4 a\x{123}aa\=offset=4
No match No match
a\x{123}aa\=offset=5 a\x{123}aa\=offset=5
Failed: error -34: bad offset value Failed: error -33: bad offset value
a\x{123}aa\=offset=6 a\x{123}aa\=offset=6
Failed: error -34: bad offset value Failed: error -33: bad offset value
/\x{1234}+/Ii,utf /\x{1234}+/Ii,utf
Capturing subpattern count = 0 Capturing subpattern count = 0
@ -851,9 +851,9 @@ No match
\x{10000}ab\=offset=3 \x{10000}ab\=offset=3
No match No match
\x{10000}ab\=offset=4 \x{10000}ab\=offset=4
Failed: error -34: bad offset value Failed: error -33: bad offset value
\x{10000}ab\=offset=5 \x{10000}ab\=offset=5
Failed: error -34: bad offset value Failed: error -33: bad offset value
/<2F><><EFBFBD>/utf /<2F><><EFBFBD>/utf
Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined

44
testdata/testoutput2 vendored
View File

@ -986,7 +986,7 @@ Subject length lower bound = 4
0: abcd 0: abcd
1: a 1: a
2: d 2: d
copy substring 5 failed (-49): unknown or unset substring copy substring 5 failed (-47): unknown or unset substring
/(.{20})/I /(.{20})/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -1040,9 +1040,9 @@ Subject length lower bound = 4
2: <unset> 2: <unset>
3: f 3: f
1G a (1) 1G a (1)
get substring 2 failed (-49): unknown or unset substring get substring 2 failed (-47): unknown or unset substring
3G f (1) 3G f (1)
get substring 4 failed (-49): unknown or unset substring get substring 4 failed (-47): unknown or unset substring
0L adef 0L adef
1L a 1L a
2L 2L
@ -1055,7 +1055,7 @@ get substring 4 failed (-49): unknown or unset substring
1G bc (2) 1G bc (2)
2G bc (2) 2G bc (2)
3G f (1) 3G f (1)
get substring 4 failed (-49): unknown or unset substring get substring 4 failed (-47): unknown or unset substring
0L bcdef 0L bcdef
1L bc 1L bc
2L bc 2L bc
@ -4370,7 +4370,7 @@ Subject length lower bound = 8
0: abcdefgh 0: abcdefgh
1: cd 1: cd
2: gh 2: gh
copy substring 'three' failed (-49): unknown or unset substring copy substring 'three' failed (-47): unknown or unset substring
/(?P<Tes>)(?P<Test>)/IB /(?P<Tes>)(?P<Test>)/IB
------------------------------------------------------------------ ------------------------------------------------------------------
@ -5737,7 +5737,7 @@ No match
0: a1 0: a1
1: a1 1: a1
2: a1 2: a1
copy substring 'Z' failed (-49): unknown or unset substring copy substring 'Z' failed (-47): unknown or unset substring
C a1 (2) A C a1 (2) A
/(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames /(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
@ -5778,7 +5778,7 @@ Subject length lower bound = 2
C a (1) A C a (1) A
cd\=copy=A cd\=copy=A
0: cd 0: cd
copy substring 'A' failed (-49): unknown or unset substring copy substring 'A' failed (-47): unknown or unset substring
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames /^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4 Capturing subpattern count = 4
@ -5822,7 +5822,7 @@ No match
0: a1 0: a1
1: a1 1: a1
2: a1 2: a1
get substring 'Z' failed (-49): unknown or unset substring get substring 'Z' failed (-47): unknown or unset substring
G a1 (2) A G a1 (2) A
/^(?P<A>a)(?P<A>b)/I,dupnames /^(?P<A>a)(?P<A>b)/I,dupnames
@ -5853,7 +5853,7 @@ Subject length lower bound = 2
G a (1) A G a (1) A
cd\=get=A cd\=get=A
0: cd 0: cd
get substring 'A' failed (-49): unknown or unset substring get substring 'A' failed (-47): unknown or unset substring
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames /^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4 Capturing subpattern count = 4
@ -10446,7 +10446,7 @@ Partial match: abc
abc\=offset=3 abc\=offset=3
No match No match
abc\=offset=4 abc\=offset=4
Failed: error -34: bad offset value Failed: error -33: bad offset value
abc\=offset=-4 abc\=offset=-4
** Invalid value in 'offset=-4' ** Invalid value in 'offset=-4'
@ -11129,15 +11129,15 @@ Matched, but too many substrings
/((?2))((?1))/ /((?2))((?1))/
abc abc
Failed: error -51: nested recursion at the same subject position Failed: error -49: nested recursion at the same subject position
/((?(R2)a+|(?1)b))/ /((?(R2)a+|(?1)b))/
aaaabcde aaaabcde
Failed: error -51: nested recursion at the same subject position Failed: error -49: nested recursion at the same subject position
/(?(R)a*(?1)|((?R))b)/ /(?(R)a*(?1)|((?R))b)/
aaaabcde aaaabcde
Failed: error -51: nested recursion at the same subject position Failed: error -49: nested recursion at the same subject position
/(a+|(?R)b)/ /(a+|(?R)b)/
Failed: error 140 at offset 7: recursion could loop indefinitely Failed: error 140 at offset 7: recursion could loop indefinitely
@ -12129,11 +12129,11 @@ Subject length lower bound = 3
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
No match No match
aaaaaaaaaaaaaz\=match_limit=3000 aaaaaaaaaaaaaz\=match_limit=3000
Failed: error -47: match limit exceeded Failed: error -45: match limit exceeded
/(a+)*zz/ /(a+)*zz/
aaaaaaaaaaaaaz\=recursion_limit=10 aaaaaaaaaaaaaz\=recursion_limit=10
Failed: error -52: recursion limit exceeded Failed: error -50: recursion limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I /(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -12142,9 +12142,9 @@ Starting code units: a z
Last code unit = 'z' Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
Failed: error -47: match limit exceeded Failed: error -45: match limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000 aaaaaaaaaaaaaz\=match_limit=60000
Failed: error -47: match limit exceeded Failed: error -45: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -12153,7 +12153,7 @@ Starting code units: a z
Last code unit = 'z' Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
Failed: error -47: match limit exceeded Failed: error -45: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I /(*LIMIT_MATCH=60000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -12164,7 +12164,7 @@ Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
No match No match
aaaaaaaaaaaaaz\=match_limit=3000 aaaaaaaaaaaaaz\=match_limit=3000
Failed: error -47: match limit exceeded Failed: error -45: match limit exceeded
/(*LIMIT_RECURSION=10)(a+)*zz/I /(*LIMIT_RECURSION=10)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -12173,9 +12173,9 @@ Starting code units: a z
Last code unit = 'z' Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
Failed: error -52: recursion limit exceeded Failed: error -50: recursion limit exceeded
aaaaaaaaaaaaaz\=recursion_limit=1000 aaaaaaaaaaaaaz\=recursion_limit=1000
Failed: error -52: recursion limit exceeded Failed: error -50: recursion limit exceeded
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -12195,7 +12195,7 @@ Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
No match No match
aaaaaaaaaaaaaz\=recursion_limit=10 aaaaaaaaaaaaaz\=recursion_limit=10
Failed: error -52: recursion limit exceeded Failed: error -50: recursion limit exceeded
# This test causes a segfault with Perl 5.18.0 # This test causes a segfault with Perl 5.18.0

26
testdata/testoutput6 vendored
View File

@ -6132,7 +6132,7 @@ No match
/^(?(2)a|(1)(2))+$/ /^(?(2)a|(1)(2))+$/
123a 123a
Failed: error -40: backreference condition or recursion test not supported for DFA matching Failed: error -39: backreference condition or recursion test not supported for DFA matching
/(?<=a|bbbb)c/ /(?<=a|bbbb)c/
ac ac
@ -7059,7 +7059,7 @@ Partial match: dogs
/abc\K123/ /abc\K123/
xyzabc123pqr xyzabc123pqr
Failed: error -41: item unsupported for DFA matching Failed: error -40: item unsupported for DFA matching
/(?<=abc)123/ /(?<=abc)123/
xyzabc123pqr xyzabc123pqr
@ -7185,29 +7185,29 @@ No match
/^(?!a(*SKIP)b)/ /^(?!a(*SKIP)b)/
ac ac
Failed: error -41: item unsupported for DFA matching Failed: error -40: item unsupported for DFA matching
/^(?=a(*SKIP)b|ac)/ /^(?=a(*SKIP)b|ac)/
** Failers ** Failers
No match No match
ac ac
Failed: error -41: item unsupported for DFA matching Failed: error -40: item unsupported for DFA matching
/^(?=a(*THEN)b|ac)/ /^(?=a(*THEN)b|ac)/
ac ac
Failed: error -41: item unsupported for DFA matching Failed: error -40: item unsupported for DFA matching
/^(?=a(*PRUNE)b)/ /^(?=a(*PRUNE)b)/
ab ab
Failed: error -41: item unsupported for DFA matching Failed: error -40: item unsupported for DFA matching
** Failers ** Failers
No match No match
ac ac
Failed: error -41: item unsupported for DFA matching Failed: error -40: item unsupported for DFA matching
/^(?(?!a(*SKIP)b))/ /^(?(?!a(*SKIP)b))/
ac ac
Failed: error -41: item unsupported for DFA matching Failed: error -40: item unsupported for DFA matching
/(?<=abc)def/ /(?<=abc)def/
abc\=ph abc\=ph
@ -7277,7 +7277,7 @@ Partial match: abc
abc\=offset=3 abc\=offset=3
No match No match
abc\=offset=4 abc\=offset=4
Failed: error -34: bad offset value Failed: error -33: bad offset value
abc\=offset=-4 abc\=offset=-4
** Invalid value in 'offset=-4' ** Invalid value in 'offset=-4'
@ -7403,7 +7403,7 @@ No match
/((?2))((?1))/ /((?2))((?1))/
abc abc
Failed: error -51: nested recursion at the same subject position Failed: error -49: nested recursion at the same subject position
/(?(R)a+|(?R)b)/ /(?(R)a+|(?R)b)/
aaaabcde aaaabcde
@ -7419,11 +7419,11 @@ Failed: error -51: nested recursion at the same subject position
/((?(R2)a+|(?1)b))/ /((?(R2)a+|(?1)b))/
aaaabcde aaaabcde
Failed: error -40: backreference condition or recursion test not supported for DFA matching Failed: error -39: backreference condition or recursion test not supported for DFA matching
/(?(R)a*(?1)|((?R))b)/ /(?(R)a*(?1)|((?R))b)/
aaaabcde aaaabcde
Failed: error -51: nested recursion at the same subject position Failed: error -49: nested recursion at the same subject position
/(a+)/no_auto_possess /(a+)/no_auto_possess
aaaa\=ovector=3 aaaa\=ovector=3
@ -7572,7 +7572,7 @@ Partial match: \x0d\x0d\x0d
/abcdef/ /abcdef/
abc\=dfa_restart abc\=dfa_restart
Failed: error -38: invalid data in workspace for DFA restart Failed: error -37: invalid data in workspace for DFA restart
/<H((?(?!<H|F>)(.)|(?R))++)*F>/ /<H((?(?!<H|F>)(.)|(?R))++)*F>/
text <H more text <H texting more hexA0-"\xA0" hex above 7F-"\xBC" F> text xxxxx <H text F> text F> text2 <H text sample F> more text. text <H more text <H texting more hexA0-"\xA0" hex above 7F-"\xBC" F> text xxxxx <H text F> text F> text2 <H text sample F> more text.

View File

@ -1230,7 +1230,7 @@ Partial match: the cat
/ab\Cde/utf /ab\Cde/utf
abXde abXde
Failed: error -41: item unsupported for DFA matching Failed: error -40: item unsupported for DFA matching
/(?<=ab\Cde)X/utf /(?<=ab\Cde)X/utf
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion