API documentation and a lot of little related changes to the code.
This commit is contained in:
parent
de4f203346
commit
eee8530add
CMakeLists.txtMakefile.amRunTestconfig-cmake.h.inconfigure.ac
doc
maint
src
config.h.genericpcre2.h.inpcre2_auto_possess.cpcre2_compile.cpcre2_config.cpcre2_context.cpcre2_dfa_match.cpcre2_error.cpcre2_internal.hpcre2_intmodedep.hpcre2_match.cpcre2_newline.cpcre2_ord2utf.cpcre2_pattern_info.cpcre2_printint.cpcre2_study.cpcre2_substring.cpcre2_tables.cpcre2_ucd.cpcre2_valid_utf.cpcre2_xclass.cpcre2test.c
testdata
|
@ -149,8 +149,8 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
|
||||||
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
|
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
|
||||||
"Enable use of Just-in-time compiling in pcre2grep.")
|
"Enable use of Just-in-time compiling in pcre2grep.")
|
||||||
|
|
||||||
SET(PCRE2_SUPPORT_UTF OFF CACHE BOOL
|
SET(PCRE2_SUPPORT_UNICODE OFF CACHE BOOL
|
||||||
"Enable support for Unicode Transformation Format (UTF-8/UTF-16/UTF-32) encoding.")
|
"Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
|
||||||
|
|
||||||
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
||||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
||||||
|
@ -245,9 +245,9 @@ IF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||||
SET(BSR_ANYCRLF 1)
|
SET(BSR_ANYCRLF 1)
|
||||||
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||||
|
|
||||||
IF(PCRE2_SUPPORT_UTF)
|
IF(PCRE2_SUPPORT_UNICODE)
|
||||||
SET(SUPPORT_UTF 1)
|
SET(SUPPORT_UNICODE 1)
|
||||||
ENDIF(PCRE2_SUPPORT_UTF)
|
ENDIF(PCRE2_SUPPORT_UNICODE)
|
||||||
|
|
||||||
IF(PCRE2_SUPPORT_JIT)
|
IF(PCRE2_SUPPORT_JIT)
|
||||||
SET(SUPPORT_JIT 1)
|
SET(SUPPORT_JIT 1)
|
||||||
|
@ -709,7 +709,7 @@ IF(PCRE2_SHOW_REPORT)
|
||||||
MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE16}")
|
MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE16}")
|
||||||
MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE32}")
|
MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE32}")
|
||||||
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
|
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
|
||||||
MESSAGE(STATUS " Enable UTF support .............. : ${PCRE2_SUPPORT_UTF}")
|
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
|
||||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
|
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
|
||||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||||
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
|
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
|
||||||
|
|
|
@ -76,7 +76,10 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
|
||||||
# doc/html/pcreunicode.html
|
# doc/html/pcreunicode.html
|
||||||
|
|
||||||
# FIXME
|
# FIXME
|
||||||
#dist_man_MANS = \
|
dist_man_MANS = \
|
||||||
|
doc/pcre2api.3
|
||||||
|
|
||||||
|
|
||||||
# doc/pcre2-config.1 \
|
# doc/pcre2-config.1 \
|
||||||
# doc/pcre2.3 \
|
# doc/pcre2.3 \
|
||||||
# doc/pcre2-16.3 \
|
# doc/pcre2-16.3 \
|
||||||
|
@ -108,7 +111,6 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
|
||||||
# doc/pcre2_utf16_to_host_byte_order.3 \
|
# doc/pcre2_utf16_to_host_byte_order.3 \
|
||||||
# doc/pcre2_utf32_to_host_byte_order.3 \
|
# doc/pcre2_utf32_to_host_byte_order.3 \
|
||||||
# doc/pcre2_version.3 \
|
# doc/pcre2_version.3 \
|
||||||
# doc/pcre2api.3 \
|
|
||||||
# doc/pcre2build.3 \
|
# doc/pcre2build.3 \
|
||||||
# doc/pcre2callout.3 \
|
# doc/pcre2callout.3 \
|
||||||
# doc/pcre2compat.3 \
|
# doc/pcre2compat.3 \
|
||||||
|
|
7
RunTest
7
RunTest
|
@ -314,10 +314,11 @@ else
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# UTF support always applies to all bit sizes if both are supported; we can't
|
# UTF support is implied by Unicode support, and it always applies to all bit
|
||||||
# have UTF-8 support without UTF-16 or UTF-32 support.
|
# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
|
||||||
|
# UTF-32 support.
|
||||||
|
|
||||||
$sim ./pcre2test -C utf >/dev/null
|
$sim ./pcre2test -C unicode >/dev/null
|
||||||
utf=$?
|
utf=$?
|
||||||
|
|
||||||
jitopt=
|
jitopt=
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
#cmakedefine SUPPORT_JIT 1
|
#cmakedefine SUPPORT_JIT 1
|
||||||
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
||||||
#cmakedefine SUPPORT_UTF 1
|
#cmakedefine SUPPORT_UNICODE 1
|
||||||
#cmakedefine SUPPORT_VALGRIND 1
|
#cmakedefine SUPPORT_VALGRIND 1
|
||||||
|
|
||||||
#cmakedefine BSR_ANYCRLF 1
|
#cmakedefine BSR_ANYCRLF 1
|
||||||
|
|
32
configure.ac
32
configure.ac
|
@ -137,11 +137,11 @@ AC_ARG_ENABLE(rebuild-chartables,
|
||||||
[rebuild character tables in current locale]),
|
[rebuild character tables in current locale]),
|
||||||
, enable_rebuild_chartables=no)
|
, enable_rebuild_chartables=no)
|
||||||
|
|
||||||
# Handle --enable-utf (disabled by default)
|
# Handle --enable-unicode (disabled by default)
|
||||||
AC_ARG_ENABLE(utf,
|
AC_ARG_ENABLE(unicode,
|
||||||
AS_HELP_STRING([--enable-utf],
|
AS_HELP_STRING([--enable-unicode],
|
||||||
[enable UTF-8/16/32 support (incompatible with --enable-ebcdic)]),
|
[enable Unicode support (incompatible with --enable-ebcdic)]),
|
||||||
, enable_utf=unset)
|
, enable_unicode=unset)
|
||||||
|
|
||||||
# Handle newline options
|
# Handle newline options
|
||||||
ac_pcre2_newline=lf
|
ac_pcre2_newline=lf
|
||||||
|
@ -288,10 +288,10 @@ then
|
||||||
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
|
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# enable_utf is disabled by default.
|
# enable_unicode is disabled by default.
|
||||||
if test "x$enable_utf" = "xunset"
|
if test "x$enable_unicode" = "xunset"
|
||||||
then
|
then
|
||||||
enable_utf=no
|
enable_unicode=no
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Convert the newline identifier into the appropriate integer value. These must
|
# Convert the newline identifier into the appropriate integer value. These must
|
||||||
|
@ -320,8 +320,8 @@ fi
|
||||||
#
|
#
|
||||||
if test "x$enable_ebcdic" = "xyes"; then
|
if test "x$enable_ebcdic" = "xyes"; then
|
||||||
enable_rebuild_chartables=yes
|
enable_rebuild_chartables=yes
|
||||||
if test "x$enable_utf" = "xyes"; then
|
if test "x$enable_unicode" = "xyes"; then
|
||||||
AC_MSG_ERROR([support for EBCDIC and UTF-8/16/32 cannot be enabled at the same time])
|
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -372,7 +372,7 @@ AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
|
||||||
AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
|
AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
|
||||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||||
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||||
AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
|
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
|
||||||
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
||||||
|
|
||||||
# Checks for typedefs, structures, and compiler characteristics.
|
# Checks for typedefs, structures, and compiler characteristics.
|
||||||
|
@ -513,12 +513,12 @@ if test "$enable_pcre2grep_jit" = "yes"; then
|
||||||
Define to any value to enable JIT support in pcre2grep.])
|
Define to any value to enable JIT support in pcre2grep.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enable_utf" = "yes"; then
|
if test "$enable_unicode" = "yes"; then
|
||||||
AC_DEFINE([SUPPORT_UTF], [], [
|
AC_DEFINE([SUPPORT_UNICODE], [], [
|
||||||
Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
Define to any value to enable support for Unicode and UTF encoding.
|
||||||
This will work even in an EBCDIC environment, but it is incompatible
|
This will work even in an EBCDIC environment, but it is incompatible
|
||||||
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
||||||
code *or* ASCII/UTF-8/16/32, but not both at once.])
|
code *or* ASCII/Unicode, but not both at once.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enable_stack_for_recursion" = "no"; then
|
if test "$enable_stack_for_recursion" = "no"; then
|
||||||
|
@ -854,7 +854,7 @@ $PACKAGE-$VERSION configuration summary:
|
||||||
Build 16-bit pcre2 library ...... : ${enable_pcre16}
|
Build 16-bit pcre2 library ...... : ${enable_pcre16}
|
||||||
Build 32-bit pcre2 library ...... : ${enable_pcre32}
|
Build 32-bit pcre2 library ...... : ${enable_pcre32}
|
||||||
Enable JIT compiling support .... : ${enable_jit}
|
Enable JIT compiling support .... : ${enable_jit}
|
||||||
Enable UTF-8/16/32 support ...... : ${enable_utf}
|
Enable Unicode support .......... : ${enable_unicode}
|
||||||
Newline char/sequence ........... : ${enable_newline}
|
Newline char/sequence ........... : ${enable_newline}
|
||||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||||
EBCDIC coding ................... : ${enable_ebcdic}
|
EBCDIC coding ................... : ${enable_ebcdic}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -120,7 +120,7 @@ to the same value:
|
||||||
pcre16 the 16-bit library was built
|
pcre16 the 16-bit library was built
|
||||||
pcre32 the 32-bit library was built
|
pcre32 the 32-bit library was built
|
||||||
pcre8 the 8-bit library was built
|
pcre8 the 8-bit library was built
|
||||||
utf UTF and Unicode property support is available
|
unicode Unicode support is available
|
||||||
.sp
|
.sp
|
||||||
If an unknown option is given, an error message is output; the exit code is 0.
|
If an unknown option is given, an error message is output; the exit code is 0.
|
||||||
.TP 10
|
.TP 10
|
||||||
|
|
|
@ -0,0 +1,254 @@
|
||||||
|
.TH PCRE2UNICODE 3 "16 September 2014" "PCRE2 10.00"
|
||||||
|
.SH NAME
|
||||||
|
PCRE - Perl-compatible regular expressions (revised API)
|
||||||
|
.SH "UNICODE AND UTF SUPPORT"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
When PCRE2 is built with Unicode support, it acquires knowledge of Unicode
|
||||||
|
character properties and can process text strings in UTF-8, UTF-16, or UTF-32
|
||||||
|
format (depending on the code unit width). By default, PCRE2 assumes that one
|
||||||
|
code unit is one character. To process a pattern as a UTF string, where a
|
||||||
|
character may require more than one code unit, you must call
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2_compile()\fP
|
||||||
|
.\"
|
||||||
|
with the PCRE2_UTF option flag, or the pattern must start with the sequence
|
||||||
|
(*UTF). When either of these is the case, both the pattern and any subject
|
||||||
|
strings that are matched against it are treated as UTF strings instead of
|
||||||
|
strings of individual one-code-unit characters.
|
||||||
|
.P
|
||||||
|
If you build PCRE2 with Unicode support, the library will be bigger, but the
|
||||||
|
additional run time overhead is limited to testing the PCRE2_UTF flag
|
||||||
|
occasionally, so should not be very much.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "UNICODE PROPERTY SUPPORT"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
When PCRE2 is built with Unicode support, the escape sequences \ep{..},
|
||||||
|
\eP{..}, and \eX can be used. The Unicode properties that can be tested are
|
||||||
|
limited to the general category properties such as Lu for an upper case letter
|
||||||
|
or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
|
||||||
|
the derived properties Any and L&. Full lists are given in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2pattern\fP
|
||||||
|
.\"
|
||||||
|
and
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2syntax\fP
|
||||||
|
.\"
|
||||||
|
documentation. Only the short names for properties are supported. For example,
|
||||||
|
\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
|
||||||
|
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
||||||
|
compatibility with Perl 5.6. PCRE does not support this.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "WIDE CHARACTERS AND UTF MODES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
Codepoints less than 256 can be specified in patterns by either braced or
|
||||||
|
unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger
|
||||||
|
values have to use braced sequences. Unbraced octal code points up to \e777 are
|
||||||
|
also recognized; larger ones can be coded using \eo{...}.
|
||||||
|
.P
|
||||||
|
In UTF modes, repeat quantifiers apply to complete UTF characters, not to
|
||||||
|
individual code units.
|
||||||
|
.P
|
||||||
|
In UTF modes, the dot metacharacter matches one UTF character instead of a
|
||||||
|
single code unit.
|
||||||
|
.P
|
||||||
|
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
|
||||||
|
but its use can lead to some strange effects because it breaks up multi-unit
|
||||||
|
characters (see the description of \eC in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2pattern\fP
|
||||||
|
.\"
|
||||||
|
documentation). The use of \eC is not supported in the alternative matching
|
||||||
|
function \fBpcre2_dfa_exec()\fP, nor is it supported in UTF mode by the JIT
|
||||||
|
optimization. If JIT optimization is requested for a UTF pattern that contains
|
||||||
|
\eC, it will not succeed, and so the matching will be carried out by the normal
|
||||||
|
interpretive function.
|
||||||
|
.P
|
||||||
|
The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test
|
||||||
|
characters of any code value, but, by default, the characters that PCRE2
|
||||||
|
recognizes as digits, spaces, or word characters remain the same set as in
|
||||||
|
non-UTF mode, all with code points less than 256. This remains true even when
|
||||||
|
PCRE2 is built to include Unicode support, because to do otherwise would slow
|
||||||
|
down matching in many common cases. Note that this also applies to \eb
|
||||||
|
and \eB, because they are defined in terms of \ew and \eW. If you want
|
||||||
|
to test for a wider sense of, say, "digit", you can use explicit Unicode
|
||||||
|
property tests such as \ep{Nd}. Alternatively, if you set the PCRE2_UCP option,
|
||||||
|
the way that the character escapes work is changed so that Unicode properties
|
||||||
|
are used to determine which characters match. There are more details in the
|
||||||
|
section on
|
||||||
|
.\" HTML <a href="pcre2pattern.html#genericchartypes">
|
||||||
|
.\" </a>
|
||||||
|
generic character types
|
||||||
|
.\"
|
||||||
|
in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2pattern\fP
|
||||||
|
.\"
|
||||||
|
documentation.
|
||||||
|
.P
|
||||||
|
Similarly, characters that match the POSIX named character classes are all
|
||||||
|
low-valued characters, unless the PCRE2_UCP option is set.
|
||||||
|
.P
|
||||||
|
However, the special horizontal and vertical white space matching escapes (\eh,
|
||||||
|
\eH, \ev, and \eV) do match all the appropriate Unicode characters, whether or
|
||||||
|
not PCRE2_UCP is set.
|
||||||
|
.P
|
||||||
|
Case-insensitive matching in UTF mode makes use of Unicode properties. A few
|
||||||
|
Unicode characters such as Greek sigma have more than two codepoints that are
|
||||||
|
case-equivalent, and these are treated as such.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "VALIDITY OF UTF STRINGS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
||||||
|
are (by default) checked for validity on entry to the relevant functions.
|
||||||
|
If an invalid UTF string is passed, an error return is given.
|
||||||
|
.P
|
||||||
|
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
||||||
|
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
|
||||||
|
strings to be in host byte order.
|
||||||
|
.P
|
||||||
|
The entire string is checked before any other processing takes place. In
|
||||||
|
addition to checking the format of the string, there is a check to ensure that
|
||||||
|
all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
|
||||||
|
The so-called "non-character" code points are not excluded because Unicode
|
||||||
|
corrigendum #9 makes it clear that they should not be.
|
||||||
|
.P
|
||||||
|
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
||||||
|
where they are used in pairs to encode code points with values greater than
|
||||||
|
0xFFFF. The code points that are encoded by UTF-16 pairs are available
|
||||||
|
independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
|
||||||
|
surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
|
||||||
|
UTF-32.)
|
||||||
|
.P
|
||||||
|
In some situations, you may already know that your strings are valid, and
|
||||||
|
therefore want to skip these checks in order to improve performance, for
|
||||||
|
example in the case of a long subject string that is being scanned repeatedly.
|
||||||
|
If you set the PCRE2_NO_UTF_CHECK flag at compile time or at run time, PCRE2
|
||||||
|
assumes that the pattern or subject it is given (respectively) contains only
|
||||||
|
valid UTF code unit sequences.
|
||||||
|
.P
|
||||||
|
Passing PCRE2_NO_UTF_CHECK to \fBpcre2_compile()\fP just disables the check for
|
||||||
|
the pattern; it does not also apply to subject strings. If you want to disable
|
||||||
|
the check for a subject string you must pass this option to \fBpcre2_exec()\fP
|
||||||
|
or \fBpcre2_dfa_exec()\fP.
|
||||||
|
.P
|
||||||
|
If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
|
||||||
|
is undefined and your program may crash or loop indefinitely.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.\" HTML <a name="utf8strings"></a>
|
||||||
|
.SS "Errors in UTF-8 strings"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The following negative error codes are given for invalid UTF-8 strings:
|
||||||
|
.sp
|
||||||
|
PCRE2_ERROR_UTF8_ERR1
|
||||||
|
PCRE2_ERROR_UTF8_ERR2
|
||||||
|
PCRE2_ERROR_UTF8_ERR3
|
||||||
|
PCRE2_ERROR_UTF8_ERR4
|
||||||
|
PCRE2_ERROR_UTF8_ERR5
|
||||||
|
.sp
|
||||||
|
The string ends with a truncated UTF-8 character; the code specifies how many
|
||||||
|
bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
|
||||||
|
no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
|
||||||
|
allows for up to 6 bytes, and this is checked first; hence the possibility of
|
||||||
|
4 or 5 missing bytes.
|
||||||
|
.sp
|
||||||
|
PCRE2_ERROR_UTF8_ERR6
|
||||||
|
PCRE2_ERROR_UTF8_ERR7
|
||||||
|
PCRE2_ERROR_UTF8_ERR8
|
||||||
|
PCRE2_ERROR_UTF8_ERR9
|
||||||
|
PCRE2_ERROR_UTF8_ERR10
|
||||||
|
.sp
|
||||||
|
The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
|
||||||
|
character do not have the binary value 0b10 (that is, either the most
|
||||||
|
significant bit is 0, or the next bit is 1).
|
||||||
|
.sp
|
||||||
|
PCRE2_ERROR_UTF8_ERR11
|
||||||
|
PCRE2_ERROR_UTF8_ERR12
|
||||||
|
.sp
|
||||||
|
A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
|
||||||
|
these code points are excluded by RFC 3629.
|
||||||
|
.sp
|
||||||
|
PCRE2_ERROR_UTF8_ERR13
|
||||||
|
.sp
|
||||||
|
A 4-byte character has a value greater than 0x10fff; these code points are
|
||||||
|
excluded by RFC 3629.
|
||||||
|
.sp
|
||||||
|
PCRE2_ERROR_UTF8_ERR14
|
||||||
|
.sp
|
||||||
|
A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
|
||||||
|
code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
|
||||||
|
from UTF-8.
|
||||||
|
.sp
|
||||||
|
PCRE2_ERROR_UTF8_ERR15
|
||||||
|
PCRE2_ERROR_UTF8_ERR16
|
||||||
|
PCRE2_ERROR_UTF8_ERR17
|
||||||
|
PCRE2_ERROR_UTF8_ERR18
|
||||||
|
PCRE2_ERROR_UTF8_ERR19
|
||||||
|
.sp
|
||||||
|
A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
|
||||||
|
value that can be represented by fewer bytes, which is invalid. For example,
|
||||||
|
the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
|
||||||
|
one byte.
|
||||||
|
.sp
|
||||||
|
PCRE2_ERROR_UTF8_ERR20
|
||||||
|
.sp
|
||||||
|
The two most significant bits of the first byte of a character have the binary
|
||||||
|
value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
|
||||||
|
byte can only validly occur as the second or subsequent byte of a multi-byte
|
||||||
|
character.
|
||||||
|
.sp
|
||||||
|
PCRE2_ERROR_UTF8_ERR21
|
||||||
|
.sp
|
||||||
|
The first byte of a character has the value 0xfe or 0xff. These values can
|
||||||
|
never occur in a valid UTF-8 string.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.\" HTML <a name="utf16strings"></a>
|
||||||
|
.SS "Errors in UTF-16 strings"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The following negative error codes are given for invalid UTF-16 strings:
|
||||||
|
.sp
|
||||||
|
PCRE_UTF16_ERR1 Missing low surrogate at end of string
|
||||||
|
PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate
|
||||||
|
PCRE_UTF16_ERR3 Isolated low surrogate
|
||||||
|
.sp
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.\" HTML <a name="utf32strings"></a>
|
||||||
|
.SS "Errors in UTF-32 strings"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The following negative error codes are given for invalid UTF-32 strings:
|
||||||
|
.sp
|
||||||
|
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
|
||||||
|
PCRE_UTF32_ERR2 Code point is greater than 0x10ffff
|
||||||
|
.sp
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH AUTHOR
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
Philip Hazel
|
||||||
|
University Computing Service
|
||||||
|
Cambridge CB2 3QH, England.
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH REVISION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
Last updated: 16 September 2014
|
||||||
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
|
.fi
|
|
@ -202,7 +202,7 @@ if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then
|
||||||
echo "---------- Maximally configured test with -O2 ----------"
|
echo "---------- Maximally configured test with -O2 ----------"
|
||||||
SAVECLFAGS="$CFLAGS"
|
SAVECLFAGS="$CFLAGS"
|
||||||
CFLAGS="$CFLAGS -O2"
|
CFLAGS="$CFLAGS -O2"
|
||||||
opts="--disable-shared --enable-utf $enable_jit --enable-pcre16 --enable-pcre32"
|
opts="--disable-shared --enable-unicode $enable_jit --enable-pcre16 --enable-pcre32"
|
||||||
runtest
|
runtest
|
||||||
CFLAGS="$SAVECFLAGS"
|
CFLAGS="$SAVECFLAGS"
|
||||||
fi
|
fi
|
||||||
|
@ -211,23 +211,23 @@ if [ $usemain -ne 0 ]; then
|
||||||
echo "---------- Non-JIT tests in the current directory ----------"
|
echo "---------- Non-JIT tests in the current directory ----------"
|
||||||
for opts in \
|
for opts in \
|
||||||
"" \
|
"" \
|
||||||
"--enable-utf --disable-static" \
|
"--enable-unicode --disable-static" \
|
||||||
"--disable-stack-for-recursion --disable-shared" \
|
"--disable-stack-for-recursion --disable-shared" \
|
||||||
"--enable-utf --disable-shared" \
|
"--enable-unicode --disable-shared" \
|
||||||
"--enable-utf --disable-stack-for-recursion --disable-shared" \
|
"--enable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||||
"--enable-utf --with-link-size=3 --disable-shared" \
|
"--enable-unicode --with-link-size=3 --disable-shared" \
|
||||||
"--enable-rebuild-chartables --disable-shared" \
|
"--enable-rebuild-chartables --disable-shared" \
|
||||||
"--enable-newline-is-any --disable-shared" \
|
"--enable-newline-is-any --disable-shared" \
|
||||||
"--enable-newline-is-cr --disable-shared" \
|
"--enable-newline-is-cr --disable-shared" \
|
||||||
"--enable-newline-is-crlf --disable-shared" \
|
"--enable-newline-is-crlf --disable-shared" \
|
||||||
"--enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \
|
"--enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \
|
||||||
"--enable-utf --enable-newline-is-any --disable-stack-for-recursion --disable-static" \
|
"--enable-unicode --enable-newline-is-any --disable-stack-for-recursion --disable-static" \
|
||||||
"--enable-pcre16" \
|
"--enable-pcre16" \
|
||||||
"--enable-pcre16 --disable-stack-for-recursion --disable-shared" \
|
"--enable-pcre16 --disable-stack-for-recursion --disable-shared" \
|
||||||
"--enable-pcre16 --enable-utf --disable-stack-for-recursion --disable-shared" \
|
"--enable-pcre16 --enable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||||
"--enable-pcre32" \
|
"--enable-pcre32" \
|
||||||
"--enable-pcre32 --disable-stack-for-recursion --disable-shared" \
|
"--enable-pcre32 --disable-stack-for-recursion --disable-shared" \
|
||||||
"--enable-pcre32 --enable-utf --disable-stack-for-recursion --disable-shared" \
|
"--enable-pcre32 --enable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||||
"--enable-pcre32 --enable-pcre16 --disable-shared" \
|
"--enable-pcre32 --enable-pcre16 --disable-shared" \
|
||||||
"--enable-pcre32 --enable-pcre16 --disable-pcre8 --disable-shared"
|
"--enable-pcre32 --enable-pcre16 --disable-pcre8 --disable-shared"
|
||||||
do
|
do
|
||||||
|
@ -241,18 +241,18 @@ if [ $usejit -ne 0 ]; then
|
||||||
echo "---------- JIT tests in the current directory ----------"
|
echo "---------- JIT tests in the current directory ----------"
|
||||||
for opts in \
|
for opts in \
|
||||||
"--enable-jit --disable-shared" \
|
"--enable-jit --disable-shared" \
|
||||||
"--enable-jit --enable-utf --disable-shared" \
|
"--enable-jit --enable-unicode --disable-shared" \
|
||||||
"--enable-jit --enable-utf --with-link-size=3 --disable-shared" \
|
"--enable-jit --enable-unicode --with-link-size=3 --disable-shared" \
|
||||||
"--enable-jit --enable-pcre16 --enable-utf --disable-shared" \
|
"--enable-jit --enable-pcre16 --enable-unicode --disable-shared" \
|
||||||
"--enable-jit --enable-pcre16 --disable-pcre8 --disable-shared" \
|
"--enable-jit --enable-pcre16 --disable-pcre8 --disable-shared" \
|
||||||
"--enable-jit --enable-pcre16 --disable-pcre8 --enable-utf --disable-shared" \
|
"--enable-jit --enable-pcre16 --disable-pcre8 --enable-unicode --disable-shared" \
|
||||||
"--enable-jit --enable-pcre16 --enable-utf --with-link-size=3 --disable-shared" \
|
"--enable-jit --enable-pcre16 --enable-unicode --with-link-size=3 --disable-shared" \
|
||||||
"--enable-jit --enable-pcre16 --enable-utf --with-link-size=4 --disable-shared" \
|
"--enable-jit --enable-pcre16 --enable-unicode --with-link-size=4 --disable-shared" \
|
||||||
"--enable-jit --enable-pcre32 --enable-utf --disable-shared" \
|
"--enable-jit --enable-pcre32 --enable-unicode --disable-shared" \
|
||||||
"--enable-jit --enable-pcre32 --disable-pcre8 --disable-shared" \
|
"--enable-jit --enable-pcre32 --disable-pcre8 --disable-shared" \
|
||||||
"--enable-jit --enable-pcre32 --disable-pcre8 --enable-utf --disable-shared" \
|
"--enable-jit --enable-pcre32 --disable-pcre8 --enable-unicode --disable-shared" \
|
||||||
"--enable-jit --enable-pcre32 --enable-utf --with-link-size=4 --disable-shared" \
|
"--enable-jit --enable-pcre32 --enable-unicode --with-link-size=4 --disable-shared" \
|
||||||
"--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-utf --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared"
|
"--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-unicode --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared"
|
||||||
do
|
do
|
||||||
runtest
|
runtest
|
||||||
done
|
done
|
||||||
|
@ -267,8 +267,8 @@ if [ $usevalgrind -ne 0 ]; then
|
||||||
withvalgrind="with valgrind"
|
withvalgrind="with valgrind"
|
||||||
|
|
||||||
for opts in \
|
for opts in \
|
||||||
"--enable-utf --disable-stack-for-recursion --disable-shared" \
|
"--enable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||||
"--enable-utf --with-link-size=3 --disable-shared" \
|
"--enable-unicode --with-link-size=3 --disable-shared" \
|
||||||
"--disable-shared"
|
"--disable-shared"
|
||||||
do
|
do
|
||||||
opts="--enable-valgrind $opts"
|
opts="--enable-valgrind $opts"
|
||||||
|
@ -277,8 +277,8 @@ if [ $usevalgrind -ne 0 ]; then
|
||||||
|
|
||||||
if [ $usejit -ne 0 ]; then
|
if [ $usejit -ne 0 ]; then
|
||||||
for opts in \
|
for opts in \
|
||||||
"--enable-jit --enable-utf --disable-shared" \
|
"--enable-jit --enable-unicode --disable-shared" \
|
||||||
"--enable-jit --enable-pcre16 --enable-pcre32 --enable-utf"
|
"--enable-jit --enable-pcre16 --enable-pcre32 --enable-unicode"
|
||||||
do
|
do
|
||||||
opts="--enable-valgrind $opts"
|
opts="--enable-valgrind $opts"
|
||||||
runtest
|
runtest
|
||||||
|
@ -324,7 +324,7 @@ fi
|
||||||
|
|
||||||
if [ $usetmp -ne 0 ]; then
|
if [ $usetmp -ne 0 ]; then
|
||||||
for opts in \
|
for opts in \
|
||||||
"--enable-utf --disable-shared"
|
"--enable-unicode --disable-shared"
|
||||||
do
|
do
|
||||||
runtest
|
runtest
|
||||||
done
|
done
|
||||||
|
|
|
@ -472,7 +472,7 @@ print("condition to cut out the tables when not needed. But don't leave")
|
||||||
print("a totally empty module because some compilers barf at that.")
|
print("a totally empty module because some compilers barf at that.")
|
||||||
print("Instead, just supply small dummy tables. */")
|
print("Instead, just supply small dummy tables. */")
|
||||||
print()
|
print()
|
||||||
print("#ifndef SUPPORT_UTF")
|
print("#ifndef SUPPORT_UNICODE")
|
||||||
print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};")
|
print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};")
|
||||||
print("const uint8_t PRIV(ucd_stage1)[] = {0};")
|
print("const uint8_t PRIV(ucd_stage1)[] = {0};")
|
||||||
print("const uint16_t PRIV(ucd_stage2)[] = {0};")
|
print("const uint16_t PRIV(ucd_stage2)[] = {0};")
|
||||||
|
@ -507,7 +507,7 @@ print_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size)
|
||||||
print("#if UCD_BLOCK_SIZE != %d" % min_block_size)
|
print("#if UCD_BLOCK_SIZE != %d" % min_block_size)
|
||||||
print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h")
|
print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h")
|
||||||
print("#endif")
|
print("#endif")
|
||||||
print("#endif /* SUPPORT_UTF */")
|
print("#endif /* SUPPORT_UNICODE */")
|
||||||
print()
|
print()
|
||||||
print("#endif /* PCRE2_PCRE2TEST */")
|
print("#endif /* PCRE2_PCRE2TEST */")
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,8 @@ one. */
|
||||||
#include "../src/config.h"
|
#include "../src/config.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF
|
#ifndef SUPPORT_UNICODE
|
||||||
#define SUPPORT_UTF
|
#define SUPPORT_UNICODE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
|
@ -278,11 +278,11 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||||
/* #undef SUPPORT_PCRE8 */
|
/* #undef SUPPORT_PCRE8 */
|
||||||
|
|
||||||
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||||
This will work even in an EBCDIC environment, but it is incompatible with
|
will work even in an EBCDIC environment, but it is incompatible with the
|
||||||
the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||||
ASCII/UTF-8/16/32, but not both at once. */
|
ASCII/Unicode, but not both at once. */
|
||||||
/* #undef SUPPORT_UTF */
|
/* #undef SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||||
/* #undef SUPPORT_VALGRIND */
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
|
@ -193,32 +193,32 @@ must all be greater than zero. */
|
||||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||||
|
|
||||||
/* Error codes for pcre2[_dfa]_match() */
|
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, and
|
||||||
|
context functions. */
|
||||||
|
|
||||||
#define PCRE2_ERROR_BADCOUNT (-29)
|
#define PCRE2_ERROR_BADDATA (-29)
|
||||||
#define PCRE2_ERROR_BADENDIANNESS (-30)
|
#define PCRE2_ERROR_BADLENGTH (-30)
|
||||||
#define PCRE2_ERROR_BADLENGTH (-31)
|
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||||
#define PCRE2_ERROR_BADMAGIC (-32)
|
#define PCRE2_ERROR_BADMODE (-32)
|
||||||
#define PCRE2_ERROR_BADMODE (-33)
|
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||||
#define PCRE2_ERROR_BADOFFSET (-34)
|
#define PCRE2_ERROR_BADOPTION (-34)
|
||||||
#define PCRE2_ERROR_BADOPTION (-35)
|
#define PCRE2_ERROR_BADUTFOFFSET (-35)
|
||||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */
|
||||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
#define PCRE2_ERROR_DFA_BADRESTART (-37)
|
||||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
#define PCRE2_ERROR_DFA_RECURSE (-38)
|
||||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
#define PCRE2_ERROR_DFA_UCOND (-39)
|
||||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
#define PCRE2_ERROR_DFA_UITEM (-40)
|
||||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
#define PCRE2_ERROR_DFA_WSSIZE (-41)
|
||||||
#define PCRE2_ERROR_DFA_UMLIMIT (-42)
|
#define PCRE2_ERROR_INTERNAL (-42)
|
||||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
#define PCRE2_ERROR_JIT_BADOPTION (-43)
|
||||||
#define PCRE2_ERROR_INTERNAL (-44)
|
#define PCRE2_ERROR_JIT_STACKLIMIT (-44)
|
||||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
#define PCRE2_ERROR_MATCHLIMIT (-45)
|
||||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
#define PCRE2_ERROR_NOMEMORY (-46)
|
||||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
#define PCRE2_ERROR_NOSUBSTRING (-47)
|
||||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
#define PCRE2_ERROR_NULL (-48)
|
||||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
#define PCRE2_ERROR_RECURSELOOP (-49)
|
||||||
#define PCRE2_ERROR_NULL (-50)
|
#define PCRE2_ERROR_RECURSIONLIMIT (-50)
|
||||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
#define PCRE2_ERROR_UNSET (-51)
|
||||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
@ -257,8 +257,8 @@ must all be greater than zero. */
|
||||||
#define PCRE2_CONFIG_PARENSLIMIT 7
|
#define PCRE2_CONFIG_PARENSLIMIT 7
|
||||||
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
||||||
#define PCRE2_CONFIG_STACKRECURSE 8
|
#define PCRE2_CONFIG_STACKRECURSE 8
|
||||||
#define PCRE2_CONFIG_UNICODE_VERSION 9
|
#define PCRE2_CONFIG_UNICODE 9
|
||||||
#define PCRE2_CONFIG_UTF 10
|
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||||
#define PCRE2_CONFIG_VERSION 11
|
#define PCRE2_CONFIG_VERSION 11
|
||||||
|
|
||||||
/* Types for code units in patterns and subject strings. */
|
/* Types for code units in patterns and subject strings. */
|
||||||
|
@ -338,7 +338,7 @@ expanded for each width below. Start with functions that give general
|
||||||
information. */
|
information. */
|
||||||
|
|
||||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL int pcre2_config(int, void *, PCRE2_SIZE);
|
PCRE2_EXP_DECL int pcre2_config(uint32_t, void *, PCRE2_SIZE);
|
||||||
|
|
||||||
|
|
||||||
/* Functions for manipulating contexts. */
|
/* Functions for manipulating contexts. */
|
||||||
|
@ -437,16 +437,16 @@ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
||||||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||||
int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||||
int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR, PCRE2_SIZE *); \
|
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||||
int, PCRE2_SIZE *); \
|
unsigned int, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||||
|
@ -622,24 +622,27 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
#undef PCRE2_OTHER_FUNCTIONS
|
#undef PCRE2_OTHER_FUNCTIONS
|
||||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
|
||||||
/* Re-define PCRE2_SUFFIX to use the external width value, if defined.
|
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||||
Otherwise, undefine the other macros and make PCRE2_SUFFIX a no-op, to reduce
|
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||||
confusion. */
|
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||||
|
|
||||||
#undef PCRE2_SUFFIX
|
#undef PCRE2_SUFFIX
|
||||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 8 && \
|
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||||
PCRE2_CODE_UNIT_WIDTH != 16 && \
|
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||||
PCRE2_CODE_UNIT_WIDTH != 32
|
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||||
#error PCRE2_CODE_UNIT_WIDTH must be 8, 16, or 32
|
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||||
#endif
|
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||||
|
PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||||
#else
|
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||||
#undef PCRE2_JOIN
|
#undef PCRE2_JOIN
|
||||||
#undef PCRE2_GLUE
|
#undef PCRE2_GLUE
|
||||||
#define PCRE2_SUFFIX(a) a
|
#define PCRE2_SUFFIX(a) a
|
||||||
|
#else
|
||||||
|
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
|
|
|
@ -231,7 +231,7 @@ static const uint8_t opcode_possessify[] = {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Check a character and a property *
|
* Check a character and a property *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -311,7 +311,7 @@ switch(ptype)
|
||||||
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -368,7 +368,7 @@ PCRE2_UCHAR base;
|
||||||
PCRE2_SPTR end;
|
PCRE2_SPTR end;
|
||||||
uint32_t chr;
|
uint32_t chr;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
uint32_t *clist_dest;
|
uint32_t *clist_dest;
|
||||||
const uint32_t *clist_src;
|
const uint32_t *clist_src;
|
||||||
#else
|
#else
|
||||||
|
@ -451,7 +451,7 @@ switch(c)
|
||||||
GETCHARINCTEST(chr, code);
|
GETCHARINCTEST(chr, code);
|
||||||
list[2] = chr;
|
list[2] = chr;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (chr < 128 || (chr < 256 && !utf))
|
if (chr < 128 || (chr < 256 && !utf))
|
||||||
list[3] = fcc[chr];
|
list[3] = fcc[chr];
|
||||||
else
|
else
|
||||||
|
@ -470,7 +470,7 @@ switch(c)
|
||||||
list[4] = NOTACHAR;
|
list[4] = NOTACHAR;
|
||||||
return code;
|
return code;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
case OP_PROP:
|
case OP_PROP:
|
||||||
case OP_NOTPROP:
|
case OP_NOTPROP:
|
||||||
if (code[0] != PT_CLIST)
|
if (code[0] != PT_CLIST)
|
||||||
|
@ -812,7 +812,7 @@ for(;;)
|
||||||
leftop = base_list[0];
|
leftop = base_list[0];
|
||||||
rightop = list[0];
|
rightop = list[0];
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
accepted = FALSE; /* Always set in non-unicode case. */
|
accepted = FALSE; /* Always set in non-unicode case. */
|
||||||
if (leftop == OP_PROP || leftop == OP_NOTPROP)
|
if (leftop == OP_PROP || leftop == OP_NOTPROP)
|
||||||
{
|
{
|
||||||
|
@ -915,7 +915,7 @@ for(;;)
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
|
accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
|
||||||
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
|
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
|
||||||
|
@ -1039,7 +1039,7 @@ for(;;)
|
||||||
case OP_EOD: /* Can always possessify before \z */
|
case OP_EOD: /* Can always possessify before \z */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
case OP_PROP:
|
case OP_PROP:
|
||||||
case OP_NOTPROP:
|
case OP_NOTPROP:
|
||||||
if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
|
if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
|
||||||
|
|
|
@ -433,7 +433,7 @@ static const int posix_class_maps[] = {
|
||||||
/* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by
|
/* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by
|
||||||
Unicode property escapes. */
|
Unicode property escapes. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
static const PCRE2_UCHAR string_PNd[] = {
|
static const PCRE2_UCHAR string_PNd[] = {
|
||||||
CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
|
CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
|
||||||
CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
||||||
|
@ -541,7 +541,7 @@ static PCRE2_SPTR posix_substitutes[] = {
|
||||||
NULL /* ^xdigit */
|
NULL /* ^xdigit */
|
||||||
};
|
};
|
||||||
#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *))
|
#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *))
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Masks for checking option settings. */
|
/* Masks for checking option settings. */
|
||||||
|
|
||||||
|
@ -887,7 +887,7 @@ for (;;)
|
||||||
case OP_NOTI:
|
case OP_NOTI:
|
||||||
branchlength++;
|
branchlength++;
|
||||||
cc += 2;
|
cc += 2;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
@ -901,7 +901,7 @@ for (;;)
|
||||||
case OP_NOTEXACTI:
|
case OP_NOTEXACTI:
|
||||||
branchlength += (int)GET2(cc,1);
|
branchlength += (int)GET2(cc,1);
|
||||||
cc += 2 + IMM2_SIZE;
|
cc += 2 + IMM2_SIZE;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
@ -1315,7 +1315,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
||||||
actual length is stored in the compiled code, so we must update "code"
|
actual length is stored in the compiled code, so we must update "code"
|
||||||
here. */
|
here. */
|
||||||
|
|
||||||
#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
case OP_XCLASS:
|
case OP_XCLASS:
|
||||||
ccode = code += GET(code, 1);
|
ccode = code += GET(code, 1);
|
||||||
goto CHECK_CLASS_REPEAT;
|
goto CHECK_CLASS_REPEAT;
|
||||||
|
@ -1325,7 +1325,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
||||||
case OP_NCLASS:
|
case OP_NCLASS:
|
||||||
ccode = code + PRIV(OP_lengths)[OP_CLASS];
|
ccode = code + PRIV(OP_lengths)[OP_CLASS];
|
||||||
|
|
||||||
#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
CHECK_CLASS_REPEAT:
|
CHECK_CLASS_REPEAT:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -2062,7 +2062,7 @@ return escape;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Handle \P and \p *
|
* Handle \P and \p *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -2678,7 +2678,7 @@ return -1;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Get othercase range *
|
* Get othercase range *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -2740,7 +2740,7 @@ for (++c; c <= d; c++)
|
||||||
*cptr = c; /* Rest of input range */
|
*cptr = c; /* Rest of input range */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -2780,7 +2780,7 @@ range. */
|
||||||
|
|
||||||
if ((options & PCRE2_CASELESS) != 0)
|
if ((options & PCRE2_CASELESS) != 0)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((options & PCRE2_UTF) != 0)
|
if ((options & PCRE2_UTF) != 0)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
@ -2810,7 +2810,7 @@ if ((options & PCRE2_CASELESS) != 0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Not UTF mode */
|
/* Not UTF mode */
|
||||||
|
|
||||||
|
@ -2844,7 +2844,7 @@ if (end >= start)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR *uchardata = *uchardptr;
|
PCRE2_UCHAR *uchardata = *uchardptr;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((options & PCRE2_UTF) != 0)
|
if ((options & PCRE2_UTF) != 0)
|
||||||
{
|
{
|
||||||
if (start < end)
|
if (start < end)
|
||||||
|
@ -2860,7 +2860,7 @@ if (end >= start)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Without UTF support, character values are constrained by the bit length,
|
/* Without UTF support, character values are constrained by the bit length,
|
||||||
and can only be > 256 for 16-bit and 32-bit libraries. */
|
and can only be > 256 for 16-bit and 32-bit libraries. */
|
||||||
|
@ -3042,7 +3042,7 @@ uint8_t classbits[32];
|
||||||
not do this for other options (e.g. PCRE2_EXTENDED) because they may change
|
not do this for other options (e.g. PCRE2_EXTENDED) because they may change
|
||||||
dynamically as we process the pattern. */
|
dynamically as we process the pattern. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */
|
PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */
|
||||||
|
@ -3235,7 +3235,7 @@ for (;; ptr++)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ptr++;
|
ptr++;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) FORWARDCHAR(ptr);
|
if (utf) FORWARDCHAR(ptr);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -3474,7 +3474,7 @@ for (;; ptr++)
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && HAS_EXTRALEN(c))
|
if (utf && HAS_EXTRALEN(c))
|
||||||
{ /* Braces are required because the */
|
{ /* Braces are required because the */
|
||||||
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
|
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
|
||||||
|
@ -3556,7 +3556,7 @@ for (;; ptr++)
|
||||||
that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
|
that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
|
||||||
directly. UCP support is not available unless UTF support is.*/
|
directly. UCP support is not available unless UTF support is.*/
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((options & PCRE2_UCP) != 0)
|
if ((options & PCRE2_UCP) != 0)
|
||||||
{
|
{
|
||||||
unsigned int ptype = 0;
|
unsigned int ptype = 0;
|
||||||
|
@ -3599,7 +3599,7 @@ for (;; ptr++)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* In the non-UCP case, or when UCP makes no difference, we build the
|
/* In the non-UCP case, or when UCP makes no difference, we build the
|
||||||
bit map for the POSIX class in a chunk of local store because we may be
|
bit map for the POSIX class in a chunk of local store because we may be
|
||||||
|
@ -3689,7 +3689,7 @@ for (;; ptr++)
|
||||||
|
|
||||||
switch (escape)
|
switch (escape)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
case ESC_du: /* These are the values given for \d etc */
|
case ESC_du: /* These are the values given for \d etc */
|
||||||
case ESC_DU: /* when PCRE2_UCP is set. We replace the */
|
case ESC_DU: /* when PCRE2_UCP is set. We replace the */
|
||||||
case ESC_wu: /* escape sequence with an appropriate \p */
|
case ESC_wu: /* escape sequence with an appropriate \p */
|
||||||
|
@ -3757,7 +3757,7 @@ for (;; ptr++)
|
||||||
cb, PRIV(vspace_list));
|
cb, PRIV(vspace_list));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
case ESC_p:
|
case ESC_p:
|
||||||
case ESC_P:
|
case ESC_P:
|
||||||
{
|
{
|
||||||
|
@ -3840,7 +3840,7 @@ for (;; ptr++)
|
||||||
|
|
||||||
/* Otherwise, we have a potential range; pick up the next character */
|
/* Otherwise, we have a potential range; pick up the next character */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{ /* Braces are required because the */
|
{ /* Braces are required because the */
|
||||||
GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */
|
GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */
|
||||||
|
@ -3940,7 +3940,7 @@ for (;; ptr++)
|
||||||
|
|
||||||
if (negate_class)
|
if (negate_class)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
int d;
|
int d;
|
||||||
#endif
|
#endif
|
||||||
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
|
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
|
||||||
|
@ -3951,7 +3951,7 @@ for (;; ptr++)
|
||||||
one other case. If so, generate a special OP_NOTPROP item instead of
|
one other case. If so, generate a special OP_NOTPROP item instead of
|
||||||
OP_NOTI. */
|
OP_NOTI. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && (options & PCRE2_CASELESS) != 0 &&
|
if (utf && (options & PCRE2_CASELESS) != 0 &&
|
||||||
(d = UCD_CASESET(c)) != 0)
|
(d = UCD_CASESET(c)) != 0)
|
||||||
{
|
{
|
||||||
|
@ -4032,7 +4032,7 @@ for (;; ptr++)
|
||||||
be listed) there are no characters < 256, we can omit the bitmap in the
|
be listed) there are no characters < 256, we can omit the bitmap in the
|
||||||
actual compiled code. */
|
actual compiled code. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0))
|
if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0))
|
||||||
#elif PCRE2_CODE_UNIT_WIDTH != 8
|
#elif PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
if (xclass && !should_flip_negation)
|
if (xclass && !should_flip_negation)
|
||||||
|
@ -4157,7 +4157,7 @@ for (;; ptr++)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
p++;
|
p++;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) FORWARDCHAR(p);
|
if (utf) FORWARDCHAR(p);
|
||||||
#endif
|
#endif
|
||||||
} /* Loop for comment characters */
|
} /* Loop for comment characters */
|
||||||
|
@ -4265,7 +4265,7 @@ for (;; ptr++)
|
||||||
/* If previous was a character type match (\d or similar), abolish it and
|
/* If previous was a character type match (\d or similar), abolish it and
|
||||||
create a suitable repeat item. The code is shared with single-character
|
create a suitable repeat item. The code is shared with single-character
|
||||||
repeats by setting op_type to add a suitable offset into repeat_type. Note
|
repeats by setting op_type to add a suitable offset into repeat_type. Note
|
||||||
the the Unicode property types will be present only when SUPPORT_UTF is
|
the the Unicode property types will be present only when SUPPORT_UNICODE is
|
||||||
defined, but we don't wrap the little bits of code here because it just
|
defined, but we don't wrap the little bits of code here because it just
|
||||||
makes it horribly messy. */
|
makes it horribly messy. */
|
||||||
|
|
||||||
|
@ -4880,7 +4880,7 @@ for (;; ptr++)
|
||||||
case OP_NOTEXACT:
|
case OP_NOTEXACT:
|
||||||
case OP_NOTEXACTI:
|
case OP_NOTEXACTI:
|
||||||
tempcode += PRIV(OP_lengths)[*tempcode];
|
tempcode += PRIV(OP_lengths)[*tempcode];
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && HAS_EXTRALEN(tempcode[-1]))
|
if (utf && HAS_EXTRALEN(tempcode[-1]))
|
||||||
tempcode += GET_EXTRALEN(tempcode[-1]);
|
tempcode += GET_EXTRALEN(tempcode[-1]);
|
||||||
#endif
|
#endif
|
||||||
|
@ -6407,7 +6407,7 @@ for (;; ptr++)
|
||||||
|
|
||||||
/* So are Unicode property matches, if supported. */
|
/* So are Unicode property matches, if supported. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
else if (escape == ESC_P || escape == ESC_p)
|
else if (escape == ESC_P || escape == ESC_p)
|
||||||
{
|
{
|
||||||
BOOL negated;
|
BOOL negated;
|
||||||
|
@ -6442,7 +6442,7 @@ for (;; ptr++)
|
||||||
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
|
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
|
||||||
cb->max_lookbehind == 0)
|
cb->max_lookbehind == 0)
|
||||||
cb->max_lookbehind = 1;
|
cb->max_lookbehind = 1;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (escape >= ESC_DU && escape <= ESC_wu)
|
if (escape >= ESC_DU && escape <= ESC_wu)
|
||||||
{
|
{
|
||||||
nestptr = ptr + 1; /* Where to resume */
|
nestptr = ptr + 1; /* Where to resume */
|
||||||
|
@ -6479,7 +6479,7 @@ for (;; ptr++)
|
||||||
mclength = 1;
|
mclength = 1;
|
||||||
mcbuffer[0] = c;
|
mcbuffer[0] = c;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && HAS_EXTRALEN(c))
|
if (utf && HAS_EXTRALEN(c))
|
||||||
ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
|
ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
|
||||||
#endif
|
#endif
|
||||||
|
@ -6493,7 +6493,7 @@ for (;; ptr++)
|
||||||
/* For caseless UTF mode, check whether this character has more than one
|
/* For caseless UTF mode, check whether this character has more than one
|
||||||
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
|
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && (options & PCRE2_CASELESS) != 0)
|
if (utf && (options & PCRE2_CASELESS) != 0)
|
||||||
{
|
{
|
||||||
GETCHAR(c, mcbuffer);
|
GETCHAR(c, mcbuffer);
|
||||||
|
@ -7527,7 +7527,7 @@ ptr += skipatstart;
|
||||||
|
|
||||||
/* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
|
/* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF
|
#ifndef SUPPORT_UNICODE
|
||||||
if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
||||||
{
|
{
|
||||||
errorcode = ERR32;
|
errorcode = ERR32;
|
||||||
|
@ -7911,7 +7911,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
|
||||||
points and cannot have another case. In 16-bit and 32-bit modes, we can
|
points and cannot have another case. In 16-bit and 32-bit modes, we can
|
||||||
check wide characters when UTF (and therefore UCP) is supported. */
|
check wide characters when UTF (and therefore UCP) is supported. */
|
||||||
|
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
else if (firstcu <= MAX_UTF_CODE_POINT &&
|
else if (firstcu <= MAX_UTF_CODE_POINT &&
|
||||||
UCD_OTHERCASE(firstcu) != firstcu)
|
UCD_OTHERCASE(firstcu) != firstcu)
|
||||||
re->flags |= PCRE2_FIRSTCASELESS;
|
re->flags |= PCRE2_FIRSTCASELESS;
|
||||||
|
@ -7945,7 +7945,7 @@ if (reqcuflags >= 0 &&
|
||||||
{
|
{
|
||||||
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
||||||
}
|
}
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
||||||
re->flags |= PCRE2_LASTCASELESS;
|
re->flags |= PCRE2_LASTCASELESS;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -75,7 +75,7 @@ Returns: 0 if data returned
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_config(int what, void *where, size_t length)
|
pcre2_config(uint32_t what, void *where, size_t length)
|
||||||
{
|
{
|
||||||
if (length < sizeof(int)) return PCRE2_ERROR_BADLENGTH;
|
if (length < sizeof(int)) return PCRE2_ERROR_BADLENGTH;
|
||||||
|
|
||||||
|
@ -145,7 +145,7 @@ switch (what)
|
||||||
|
|
||||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UTF
|
#if defined SUPPORT_UNICODE
|
||||||
const char *v = PRIV(unicode_version);
|
const char *v = PRIV(unicode_version);
|
||||||
#else
|
#else
|
||||||
const char *v = "Unicode not supported";
|
const char *v = "Unicode not supported";
|
||||||
|
@ -158,8 +158,8 @@ switch (what)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE2_CONFIG_UTF:
|
case PCRE2_CONFIG_UNICODE:
|
||||||
#if defined SUPPORT_UTF
|
#if defined SUPPORT_UNICODE
|
||||||
*((int *)where) = 1;
|
*((int *)where) = 1;
|
||||||
#else
|
#else
|
||||||
*((int *)where) = 0;
|
*((int *)where) = 0;
|
||||||
|
|
|
@ -263,8 +263,9 @@ if (mcontext != NULL)
|
||||||
* Set values in contexts *
|
* Set values in contexts *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* All these functions return 1 for success or 0 if invalid data is given. Only
|
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
|
||||||
some of the functions are able to test the validity of the data. */
|
data is given. Only some of the functions are able to test the validity of the
|
||||||
|
data. */
|
||||||
|
|
||||||
|
|
||||||
/* ------------ Compile contexts ------------ */
|
/* ------------ Compile contexts ------------ */
|
||||||
|
@ -274,7 +275,7 @@ pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||||
const unsigned char *tables)
|
const unsigned char *tables)
|
||||||
{
|
{
|
||||||
ccontext->tables = tables;
|
ccontext->tables = tables;
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
@ -285,10 +286,10 @@ switch(value)
|
||||||
case PCRE2_BSR_ANYCRLF:
|
case PCRE2_BSR_ANYCRLF:
|
||||||
case PCRE2_BSR_UNICODE:
|
case PCRE2_BSR_UNICODE:
|
||||||
ccontext->bsr_convention = value;
|
ccontext->bsr_convention = value;
|
||||||
return 1;
|
return 0;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return 0;
|
return PCRE2_ERROR_BADDATA;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -303,10 +304,10 @@ switch(newline)
|
||||||
case PCRE2_NEWLINE_ANY:
|
case PCRE2_NEWLINE_ANY:
|
||||||
case PCRE2_NEWLINE_ANYCRLF:
|
case PCRE2_NEWLINE_ANYCRLF:
|
||||||
ccontext->newline_convention = newline;
|
ccontext->newline_convention = newline;
|
||||||
return 1;
|
return 0;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return 0;
|
return PCRE2_ERROR_BADDATA;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,7 +315,7 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||||
{
|
{
|
||||||
ccontext->parens_nest_limit = limit;
|
ccontext->parens_nest_limit = limit;
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
@ -322,7 +323,7 @@ pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||||
int (*guard)(uint32_t))
|
int (*guard)(uint32_t))
|
||||||
{
|
{
|
||||||
ccontext->stack_guard = guard;
|
ccontext->stack_guard = guard;
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -336,10 +337,10 @@ switch(value)
|
||||||
case PCRE2_BSR_ANYCRLF:
|
case PCRE2_BSR_ANYCRLF:
|
||||||
case PCRE2_BSR_UNICODE:
|
case PCRE2_BSR_UNICODE:
|
||||||
mcontext->bsr_convention = value;
|
mcontext->bsr_convention = value;
|
||||||
return 1;
|
return 0;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return 0;
|
return PCRE2_ERROR_BADDATA;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -354,10 +355,10 @@ switch(newline)
|
||||||
case PCRE2_NEWLINE_ANY:
|
case PCRE2_NEWLINE_ANY:
|
||||||
case PCRE2_NEWLINE_ANYCRLF:
|
case PCRE2_NEWLINE_ANYCRLF:
|
||||||
mcontext->newline_convention = newline;
|
mcontext->newline_convention = newline;
|
||||||
return 1;
|
return 0;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return 0;
|
return PCRE2_ERROR_BADDATA;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,21 +368,21 @@ pcre2_set_callout(pcre2_match_context *mcontext,
|
||||||
{
|
{
|
||||||
mcontext->callout = callout;
|
mcontext->callout = callout;
|
||||||
mcontext->callout_data = callout_data;
|
mcontext->callout_data = callout_data;
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
{
|
{
|
||||||
mcontext->match_limit = limit;
|
mcontext->match_limit = limit;
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
{
|
{
|
||||||
mcontext->recursion_limit = limit;
|
mcontext->recursion_limit = limit;
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
@ -399,7 +400,7 @@ mcontext->stack_memctl.memory_data = mydata;
|
||||||
(void)myfree;
|
(void)myfree;
|
||||||
(void)mydata;
|
(void)mydata;
|
||||||
#endif
|
#endif
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of pcre2_context.c */
|
/* End of pcre2_context.c */
|
||||||
|
|
|
@ -391,7 +391,7 @@ PCRE2_SPTR start_subject = mb->start_subject;
|
||||||
PCRE2_SPTR end_subject = mb->end_subject;
|
PCRE2_SPTR end_subject = mb->end_subject;
|
||||||
PCRE2_SPTR start_code = mb->start_code;
|
PCRE2_SPTR start_code = mb->start_code;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||||
#else
|
#else
|
||||||
BOOL utf = FALSE;
|
BOOL utf = FALSE;
|
||||||
|
@ -447,7 +447,7 @@ if (*first_op == OP_REVERSE)
|
||||||
/* If we can't go back the amount required for the longest lookbehind
|
/* If we can't go back the amount required for the longest lookbehind
|
||||||
pattern, go back as far as we can; some alternatives may still be viable. */
|
pattern, go back as far as we can; some alternatives may still be viable. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
/* In character mode we have to step back character by character */
|
/* In character mode we have to step back character by character */
|
||||||
|
|
||||||
if (utf)
|
if (utf)
|
||||||
|
@ -570,11 +570,11 @@ for (;;)
|
||||||
if (ptr < end_subject)
|
if (ptr < end_subject)
|
||||||
{
|
{
|
||||||
clen = 1; /* Number of data items in the character */
|
clen = 1; /* Number of data items in the character */
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
GETCHARLENTEST(c, ptr, clen);
|
GETCHARLENTEST(c, ptr, clen);
|
||||||
#else
|
#else
|
||||||
c = *ptr;
|
c = *ptr;
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -652,9 +652,9 @@ for (;;)
|
||||||
if (coptable[codevalue] > 0)
|
if (coptable[codevalue] > 0)
|
||||||
{
|
{
|
||||||
dlen = 1;
|
dlen = 1;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
|
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
d = code[coptable[codevalue]];
|
d = code[coptable[codevalue]];
|
||||||
if (codevalue >= OP_TYPESTAR)
|
if (codevalue >= OP_TYPESTAR)
|
||||||
{
|
{
|
||||||
|
@ -948,11 +948,11 @@ for (;;)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR temp = ptr - 1;
|
PCRE2_SPTR temp = ptr - 1;
|
||||||
if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
|
if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (utf) { BACKCHAR(temp); }
|
if (utf) { BACKCHAR(temp); }
|
||||||
#endif
|
#endif
|
||||||
GETCHARTEST(d, temp);
|
GETCHARTEST(d, temp);
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||||
{
|
{
|
||||||
if (d == '_') left_word = TRUE; else
|
if (d == '_') left_word = TRUE; else
|
||||||
|
@ -972,12 +972,12 @@ for (;;)
|
||||||
if (ptr >= mb->last_used_ptr)
|
if (ptr >= mb->last_used_ptr)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR temp = ptr + 1;
|
PCRE2_SPTR temp = ptr + 1;
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (utf) { FORWARDCHAR(temp); }
|
if (utf) { FORWARDCHAR(temp); }
|
||||||
#endif
|
#endif
|
||||||
mb->last_used_ptr = temp;
|
mb->last_used_ptr = temp;
|
||||||
}
|
}
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||||
{
|
{
|
||||||
if (c == '_') right_word = TRUE; else
|
if (c == '_') right_word = TRUE; else
|
||||||
|
@ -1003,7 +1003,7 @@ for (;;)
|
||||||
if the support is in the binary; otherwise a compile-time error occurs.
|
if the support is in the binary; otherwise a compile-time error occurs.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
case OP_PROP:
|
case OP_PROP:
|
||||||
case OP_NOTPROP:
|
case OP_NOTPROP:
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
|
@ -1258,7 +1258,7 @@ for (;;)
|
||||||
argument. It keeps the code above fast for the other cases. The argument
|
argument. It keeps the code above fast for the other cases. The argument
|
||||||
is in the d variable. */
|
is in the d variable. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||||
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||||
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
||||||
|
@ -1501,7 +1501,7 @@ for (;;)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||||
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||||
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
||||||
|
@ -1785,7 +1785,7 @@ for (;;)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
||||||
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||||
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||||
|
@ -2063,7 +2063,7 @@ for (;;)
|
||||||
case OP_CHARI:
|
case OP_CHARI:
|
||||||
if (clen == 0) break;
|
if (clen == 0) break;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
||||||
|
@ -2077,7 +2077,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
/* Not UTF mode */
|
/* Not UTF mode */
|
||||||
{
|
{
|
||||||
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
|
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
|
||||||
|
@ -2086,7 +2086,7 @@ for (;;)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
/* This is a tricky one because it can match more than one character.
|
/* This is a tricky one because it can match more than one character.
|
||||||
Find out how many characters to skip, and then set up a negative state
|
Find out how many characters to skip, and then set up a negative state
|
||||||
|
@ -2222,11 +2222,11 @@ for (;;)
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
{
|
{
|
||||||
unsigned int otherd;
|
unsigned int otherd;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && d >= 128)
|
if (utf && d >= 128)
|
||||||
otherd = UCD_OTHERCASE(d);
|
otherd = UCD_OTHERCASE(d);
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
otherd = TABLE_GET(d, fcc, d);
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
if (c != d && c != otherd)
|
if (c != d && c != otherd)
|
||||||
{ ADD_NEW(state_offset + dlen + 1, 0); }
|
{ ADD_NEW(state_offset + dlen + 1, 0); }
|
||||||
|
@ -2257,11 +2257,11 @@ for (;;)
|
||||||
uint32_t otherd = NOTACHAR;
|
uint32_t otherd = NOTACHAR;
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && d >= 128)
|
if (utf && d >= 128)
|
||||||
otherd = UCD_OTHERCASE(d);
|
otherd = UCD_OTHERCASE(d);
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
otherd = TABLE_GET(d, fcc, d);
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
}
|
}
|
||||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
@ -2300,11 +2300,11 @@ for (;;)
|
||||||
uint32_t otherd = NOTACHAR;
|
uint32_t otherd = NOTACHAR;
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && d >= 128)
|
if (utf && d >= 128)
|
||||||
otherd = UCD_OTHERCASE(d);
|
otherd = UCD_OTHERCASE(d);
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
otherd = TABLE_GET(d, fcc, d);
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
}
|
}
|
||||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
@ -2341,11 +2341,11 @@ for (;;)
|
||||||
uint32_t otherd = NOTACHAR;
|
uint32_t otherd = NOTACHAR;
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && d >= 128)
|
if (utf && d >= 128)
|
||||||
otherd = UCD_OTHERCASE(d);
|
otherd = UCD_OTHERCASE(d);
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
otherd = TABLE_GET(d, fcc, d);
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
}
|
}
|
||||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
@ -2374,11 +2374,11 @@ for (;;)
|
||||||
uint32_t otherd = NOTACHAR;
|
uint32_t otherd = NOTACHAR;
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && d >= 128)
|
if (utf && d >= 128)
|
||||||
otherd = UCD_OTHERCASE(d);
|
otherd = UCD_OTHERCASE(d);
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
otherd = TABLE_GET(d, fcc, d);
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
}
|
}
|
||||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
@ -2414,11 +2414,11 @@ for (;;)
|
||||||
uint32_t otherd = NOTACHAR;
|
uint32_t otherd = NOTACHAR;
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && d >= 128)
|
if (utf && d >= 128)
|
||||||
otherd = UCD_OTHERCASE(d);
|
otherd = UCD_OTHERCASE(d);
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
otherd = TABLE_GET(d, fcc, d);
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
}
|
}
|
||||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
@ -2747,7 +2747,7 @@ for (;;)
|
||||||
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
|
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
|
||||||
{
|
{
|
||||||
int charcount = local_offsets[rc+1] - local_offsets[rc];
|
int charcount = local_offsets[rc+1] - local_offsets[rc];
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
||||||
|
@ -2851,7 +2851,7 @@ for (;;)
|
||||||
PCRE2_SPTR p = ptr;
|
PCRE2_SPTR p = ptr;
|
||||||
PCRE2_SPTR pp = local_ptr;
|
PCRE2_SPTR pp = local_ptr;
|
||||||
charcount = (int)(pp - p);
|
charcount = (int)(pp - p);
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
||||||
#endif
|
#endif
|
||||||
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
||||||
|
@ -2933,7 +2933,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR p = start_subject + local_offsets[0];
|
PCRE2_SPTR p = start_subject + local_offsets[0];
|
||||||
|
@ -3106,14 +3106,24 @@ if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
||||||
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
||||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||||
|
|
||||||
/* Check that the first field in the block is the magic number. If it is not,
|
/* FIXME: Remove BADENDIANNESS if saving/restoring is not to be implemented. */
|
||||||
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
|
|
||||||
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
|
/* Check that the first field in the block is the magic number. If it is not,
|
||||||
means that the pattern is likely compiled with different endianness. */
|
return with PCRE2_ERROR_BADMAGIC. */
|
||||||
|
|
||||||
|
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||||
|
|
||||||
|
#ifdef FIXME
|
||||||
|
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
|
||||||
|
this comment and code:
|
||||||
|
|
||||||
|
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
|
||||||
|
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
|
||||||
|
with different endianness. */
|
||||||
|
|
||||||
if (re->magic_number != MAGIC_NUMBER)
|
|
||||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||||
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Check the code unit width. */
|
/* Check the code unit width. */
|
||||||
|
|
||||||
|
@ -3238,7 +3248,7 @@ switch(newline)
|
||||||
we must also check that a starting offset does not point into the middle of a
|
we must also check that a starting offset does not point into the middle of a
|
||||||
multiunit character. */
|
multiunit character. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
{
|
{
|
||||||
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
|
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
|
||||||
|
@ -3253,7 +3263,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
return PCRE2_ERROR_BADUTFOFFSET;
|
return PCRE2_ERROR_BADUTFOFFSET;
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Set up the first code unit to match, if available. The first_codeunit value
|
/* Set up the first code unit to match, if available. The first_codeunit value
|
||||||
is never set for an anchored regular expression, but the anchoring may be
|
is never set for an anchored regular expression, but the anchoring may be
|
||||||
|
@ -3270,7 +3280,7 @@ if (!anchored)
|
||||||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||||
{
|
{
|
||||||
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -3290,7 +3300,7 @@ if ((re->flags & PCRE2_LASTSET) != 0)
|
||||||
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||||
{
|
{
|
||||||
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
|
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
|
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -3327,7 +3337,7 @@ for (;;)
|
||||||
if (firstline)
|
if (firstline)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR t = start_match;
|
PCRE2_SPTR t = start_match;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
while (t < mb->end_subject && !IS_NEWLINE(t))
|
while (t < mb->end_subject && !IS_NEWLINE(t))
|
||||||
|
@ -3362,7 +3372,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
if (start_match > mb->start_subject + start_offset)
|
if (start_match > mb->start_subject + start_offset)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||||
|
@ -3516,7 +3526,7 @@ for (;;)
|
||||||
|
|
||||||
if (firstline && IS_NEWLINE(start_match)) break;
|
if (firstline && IS_NEWLINE(start_match)) break;
|
||||||
start_match++;
|
start_match++;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
ACROSSCHAR(start_match < end_subject, *start_match,
|
ACROSSCHAR(start_match < end_subject, *start_match,
|
||||||
|
|
|
@ -198,35 +198,34 @@ static const char match_error_texts[] =
|
||||||
"UTF-16 error: isolated low surrogate\0"
|
"UTF-16 error: isolated low surrogate\0"
|
||||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||||
"bad count value\0"
|
"bad data value\0"
|
||||||
/* 30 */
|
/* 30 */
|
||||||
"pattern compiled with other endianness\0"
|
|
||||||
"bad length\0"
|
"bad length\0"
|
||||||
"magic number missing\0"
|
"magic number missing\0"
|
||||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||||
"bad offset value\0"
|
"bad offset value\0"
|
||||||
/* 35 */
|
|
||||||
"bad option value\0"
|
"bad option value\0"
|
||||||
|
/* 35 */
|
||||||
"bad offset into UTF string\0"
|
"bad offset into UTF string\0"
|
||||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||||
"invalid data in workspace for DFA restart\0"
|
"invalid data in workspace for DFA restart\0"
|
||||||
"too much recursion for DFA matching\0"
|
"too much recursion for DFA matching\0"
|
||||||
/* 40 */
|
|
||||||
"backreference condition or recursion test not supported for DFA matching\0"
|
"backreference condition or recursion test not supported for DFA matching\0"
|
||||||
|
/* 40 */
|
||||||
"item unsupported for DFA matching\0"
|
"item unsupported for DFA matching\0"
|
||||||
"match limit not supported for DFA matching\0"
|
|
||||||
"workspace size exceeded in DFA matching\0"
|
"workspace size exceeded in DFA matching\0"
|
||||||
"internal error - pattern overwritten?\0"
|
"internal error - pattern overwritten?\0"
|
||||||
/* 45 */
|
|
||||||
"bad JIT option\0"
|
"bad JIT option\0"
|
||||||
"JIT stack limit reached\0"
|
"JIT stack limit reached\0"
|
||||||
|
/* 45 */
|
||||||
"match limit exceeded\0"
|
"match limit exceeded\0"
|
||||||
"no more memory\0"
|
"no more memory\0"
|
||||||
"unknown or unset substring\0"
|
"unknown or unset substring\0"
|
||||||
/* 50 */
|
|
||||||
"NULL argument passed\0"
|
"NULL argument passed\0"
|
||||||
"nested recursion at the same subject position\0"
|
"nested recursion at the same subject position\0"
|
||||||
|
/* 50 */
|
||||||
"recursion limit exceeded\0"
|
"recursion limit exceeded\0"
|
||||||
|
"requested value is not set\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -38,11 +38,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* We do not support both EBCDIC and UTF at the same time. The "configure"
|
/* We do not support both EBCDIC and Unicode at the same time. The "configure"
|
||||||
script prevents both being selected, but not everybody uses "configure". */
|
script prevents both being selected, but not everybody uses "configure". */
|
||||||
|
|
||||||
#if defined EBCDIC && defined SUPPORT_UTF
|
#if defined EBCDIC && defined SUPPORT_UNICODE
|
||||||
#error The use of both EBCDIC and SUPPORT_UTF is not supported.
|
#error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Standard C headers */
|
/* Standard C headers */
|
||||||
|
@ -597,14 +597,14 @@ there are some longer strings as well.
|
||||||
|
|
||||||
This means that, on EBCDIC platforms, the PCRE library can handle either
|
This means that, on EBCDIC platforms, the PCRE library can handle either
|
||||||
EBCDIC, or UTF-8, but not both. To support both in the same compiled library
|
EBCDIC, or UTF-8, but not both. To support both in the same compiled library
|
||||||
would need different lookups depending on whether PCRE_UTF8 was set or not.
|
would need different lookups depending on whether PCRE2_UTF was set or not.
|
||||||
This would make it impossible to use characters in switch/case statements,
|
This would make it impossible to use characters in switch/case statements,
|
||||||
which would reduce performance. For a theoretical use (which nobody has asked
|
which would reduce performance. For a theoretical use (which nobody has asked
|
||||||
for) in a minority area (EBCDIC platforms), this is not sensible. Any
|
for) in a minority area (EBCDIC platforms), this is not sensible. Any
|
||||||
application that did need both could compile two versions of the library, using
|
application that did need both could compile two versions of the library, using
|
||||||
macros to give the functions distinct names. */
|
macros to give the functions distinct names. */
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF
|
#ifndef SUPPORT_UNICODE
|
||||||
|
|
||||||
/* UTF-8 support is not enabled; use the platform-dependent character literals
|
/* UTF-8 support is not enabled; use the platform-dependent character literals
|
||||||
so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
|
so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
|
||||||
|
@ -920,7 +920,7 @@ a positive value. */
|
||||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||||
|
|
||||||
#else /* SUPPORT_UTF */
|
#else /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
|
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
|
||||||
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
|
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
|
||||||
|
@ -1189,7 +1189,7 @@ only. */
|
||||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||||
|
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* -------------------- End of character and string names -------------------*/
|
/* -------------------- End of character and string names -------------------*/
|
||||||
|
|
||||||
|
@ -1775,10 +1775,10 @@ typedef struct {
|
||||||
|
|
||||||
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
||||||
|
|
||||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
|
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as
|
||||||
defined, so the following items are omitted. */
|
0, so the following items are omitted. */
|
||||||
|
|
||||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
#if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0
|
||||||
|
|
||||||
/* This is the largest non-UTF code point. */
|
/* This is the largest non-UTF code point. */
|
||||||
|
|
||||||
|
|
|
@ -208,9 +208,9 @@ tables. */
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
#define MAX_255(c) TRUE
|
#define MAX_255(c) TRUE
|
||||||
#define MAX_MARK ((1u << 8) - 1)
|
#define MAX_MARK ((1u << 8) - 1)
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
#define SUPPORT_WIDE_CHARS
|
#define SUPPORT_WIDE_CHARS
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
#define TABLE_GET(c, table, default) ((table)[c])
|
#define TABLE_GET(c, table, default) ((table)[c])
|
||||||
|
|
||||||
#else /* Code units are 16 or 32 bits */
|
#else /* Code units are 16 or 32 bits */
|
||||||
|
@ -246,7 +246,7 @@ complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
||||||
used when UTF is not supported. To make sure they can never even appear when
|
used when UTF is not supported. To make sure they can never even appear when
|
||||||
UTF support is omitted, we don't even define them. */
|
UTF support is omitted, we don't even define them. */
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF
|
#ifndef SUPPORT_UNICODE
|
||||||
|
|
||||||
/* #define MAX_UTF_SINGLE_CU */
|
/* #define MAX_UTF_SINGLE_CU */
|
||||||
/* #define HAS_EXTRALEN(c) */
|
/* #define HAS_EXTRALEN(c) */
|
||||||
|
@ -263,7 +263,7 @@ UTF support is omitted, we don't even define them. */
|
||||||
/* #define FORWARDCHAR(eptr) */
|
/* #define FORWARDCHAR(eptr) */
|
||||||
/* #define ACROSSCHAR(condition, eptr, action) */
|
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||||
|
|
||||||
#else /* SUPPORT_UTF */
|
#else /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* ------------------- 8-bit support ------------------ */
|
/* ------------------- 8-bit support ------------------ */
|
||||||
|
|
||||||
|
@ -527,7 +527,7 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
|
||||||
#define PUTCHAR(c, p) (*p = c, 1)
|
#define PUTCHAR(c, p) (*p = c, 1)
|
||||||
|
|
||||||
#endif /* UTF-32 character handling */
|
#endif /* UTF-32 character handling */
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
/* Mode-dependent macros that have the same definition in all modes. */
|
/* Mode-dependent macros that have the same definition in all modes. */
|
||||||
|
|
|
@ -145,7 +145,7 @@ static int
|
||||||
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
|
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
|
||||||
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
|
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UTF
|
#if defined SUPPORT_UNICODE
|
||||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -173,7 +173,7 @@ length = mb->ovector[offset+1] - mb->ovector[offset];
|
||||||
|
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UTF
|
#if defined SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
/* Match characters up to the end of the reference. NOTE: the number of
|
/* Match characters up to the end of the reference. NOTE: the number of
|
||||||
|
@ -352,7 +352,7 @@ typedef struct heapframe {
|
||||||
struct heapframe *Xprevframe;
|
struct heapframe *Xprevframe;
|
||||||
struct heapframe *Xnextframe;
|
struct heapframe *Xnextframe;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
PCRE2_SPTR Xcharptr;
|
PCRE2_SPTR Xcharptr;
|
||||||
#endif
|
#endif
|
||||||
PCRE2_SPTR Xeptr;
|
PCRE2_SPTR Xeptr;
|
||||||
|
@ -378,7 +378,7 @@ typedef struct heapframe {
|
||||||
uint32_t Xop;
|
uint32_t Xop;
|
||||||
uint32_t Xsave_capture_last;
|
uint32_t Xsave_capture_last;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
uint32_t Xprop_value;
|
uint32_t Xprop_value;
|
||||||
int Xprop_type;
|
int Xprop_type;
|
||||||
int Xprop_fail_result;
|
int Xprop_fail_result;
|
||||||
|
@ -399,7 +399,7 @@ typedef struct heapframe {
|
||||||
eptrblock Xnewptrb;
|
eptrblock Xnewptrb;
|
||||||
recursion_info Xnew_recursive;
|
recursion_info Xnew_recursive;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
PCRE2_UCHAR Xocchars[6];
|
PCRE2_UCHAR Xocchars[6];
|
||||||
#endif
|
#endif
|
||||||
} heapframe;
|
} heapframe;
|
||||||
|
@ -610,7 +610,7 @@ HEAP_RECURSE:
|
||||||
|
|
||||||
/* Ditto for the local variables */
|
/* Ditto for the local variables */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
#define charptr frame->Xcharptr
|
#define charptr frame->Xcharptr
|
||||||
#define prop_value frame->Xprop_value
|
#define prop_value frame->Xprop_value
|
||||||
#define prop_type frame->Xprop_type
|
#define prop_type frame->Xprop_type
|
||||||
|
@ -666,7 +666,7 @@ declarations can be cut out in a block. The only declarations within blocks
|
||||||
below are for variables that do not have to be preserved over a recursive call
|
below are for variables that do not have to be preserved over a recursive call
|
||||||
to RMATCH(). */
|
to RMATCH(). */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
PCRE2_SPTR charptr;
|
PCRE2_SPTR charptr;
|
||||||
#endif
|
#endif
|
||||||
PCRE2_SPTR callpat;
|
PCRE2_SPTR callpat;
|
||||||
|
@ -684,7 +684,7 @@ uint32_t number;
|
||||||
uint32_t op;
|
uint32_t op;
|
||||||
uint32_t save_capture_last;
|
uint32_t save_capture_last;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
uint32_t prop_value;
|
uint32_t prop_value;
|
||||||
int prop_type;
|
int prop_type;
|
||||||
int prop_fail_result;
|
int prop_fail_result;
|
||||||
|
@ -721,7 +721,7 @@ the alternative names that are used. */
|
||||||
/* These statements are here to stop the compiler complaining about unitialized
|
/* These statements are here to stop the compiler complaining about unitialized
|
||||||
variables. */
|
variables. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
prop_value = 0;
|
prop_value = 0;
|
||||||
prop_fail_result = 0;
|
prop_fail_result = 0;
|
||||||
#endif
|
#endif
|
||||||
|
@ -742,7 +742,7 @@ call because it's quite a complicated macro. It has to be used in one
|
||||||
particular way. This shouldn't, however, impact performance when true recursion
|
particular way. This shouldn't, however, impact performance when true recursion
|
||||||
is being used. */
|
is being used. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
utf = (mb->poptions & PCRE2_UTF) != 0;
|
utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||||
#else
|
#else
|
||||||
utf = FALSE;
|
utf = FALSE;
|
||||||
|
@ -1662,7 +1662,7 @@ for (;;)
|
||||||
back a number of characters, not bytes. */
|
back a number of characters, not bytes. */
|
||||||
|
|
||||||
case OP_REVERSE:
|
case OP_REVERSE:
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
i = GET(ecode, 1);
|
i = GET(ecode, 1);
|
||||||
|
@ -2197,7 +2197,7 @@ for (;;)
|
||||||
be "non-word" characters. Remember the earliest consulted character for
|
be "non-word" characters. Remember the earliest consulted character for
|
||||||
partial matching. */
|
partial matching. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
/* Get status of previous character */
|
/* Get status of previous character */
|
||||||
|
@ -2257,7 +2257,7 @@ for (;;)
|
||||||
if (eptr == mb->start_subject) prev_is_word = FALSE; else
|
if (eptr == mb->start_subject) prev_is_word = FALSE; else
|
||||||
{
|
{
|
||||||
if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1;
|
if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||||
{
|
{
|
||||||
c = eptr[-1];
|
c = eptr[-1];
|
||||||
|
@ -2283,7 +2283,7 @@ for (;;)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
|
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||||
{
|
{
|
||||||
c = *eptr;
|
c = *eptr;
|
||||||
|
@ -2334,7 +2334,7 @@ for (;;)
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
eptr++;
|
eptr++;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
|
if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
|
||||||
#endif
|
#endif
|
||||||
ecode++;
|
ecode++;
|
||||||
|
@ -2550,7 +2550,7 @@ for (;;)
|
||||||
ecode++;
|
ecode++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
/* Check the next character by Unicode property. We will get here only
|
/* Check the next character by Unicode property. We will get here only
|
||||||
if the support is in the binary; otherwise a compile-time error occurs. */
|
if the support is in the binary; otherwise a compile-time error occurs. */
|
||||||
|
|
||||||
|
@ -2684,7 +2684,7 @@ for (;;)
|
||||||
CHECK_PARTIAL();
|
CHECK_PARTIAL();
|
||||||
ecode++;
|
ecode++;
|
||||||
break;
|
break;
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
/* Match a back reference, possibly repeatedly. Look past the end of the
|
/* Match a back reference, possibly repeatedly. Look past the end of the
|
||||||
|
@ -2955,7 +2955,7 @@ for (;;)
|
||||||
|
|
||||||
/* First, ensure the minimum number of matches are present. */
|
/* First, ensure the minimum number of matches are present. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
for (i = 1; i <= min; i++)
|
for (i = 1; i <= min; i++)
|
||||||
|
@ -3007,7 +3007,7 @@ for (;;)
|
||||||
|
|
||||||
if (minimize)
|
if (minimize)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
for (fi = min;; fi++)
|
for (fi = min;; fi++)
|
||||||
|
@ -3063,7 +3063,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
pp = eptr;
|
pp = eptr;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
for (i = min; i < max; i++)
|
for (i = min; i < max; i++)
|
||||||
|
@ -3232,7 +3232,7 @@ for (;;)
|
||||||
SCHECK_PARTIAL();
|
SCHECK_PARTIAL();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
GETCHARLENTEST(c, eptr, len);
|
GETCHARLENTEST(c, eptr, len);
|
||||||
#else
|
#else
|
||||||
c = *eptr;
|
c = *eptr;
|
||||||
|
@ -3248,7 +3248,7 @@ for (;;)
|
||||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21);
|
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
if (eptr-- == pp) break; /* Stop if tried at original pos */
|
if (eptr-- == pp) break; /* Stop if tried at original pos */
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) BACKCHAR(eptr);
|
if (utf) BACKCHAR(eptr);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -3262,7 +3262,7 @@ for (;;)
|
||||||
/* Match a single character, casefully */
|
/* Match a single character, casefully */
|
||||||
|
|
||||||
case OP_CHAR:
|
case OP_CHAR:
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
length = 1;
|
length = 1;
|
||||||
|
@ -3299,7 +3299,7 @@ for (;;)
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
length = 1;
|
length = 1;
|
||||||
|
@ -3334,7 +3334,7 @@ for (;;)
|
||||||
|
|
||||||
if (fc != dc)
|
if (fc != dc)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (dc != UCD_OTHERCASE(fc))
|
if (dc != UCD_OTHERCASE(fc))
|
||||||
#endif
|
#endif
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
|
@ -3342,7 +3342,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Not UTF mode */
|
/* Not UTF mode */
|
||||||
{
|
{
|
||||||
|
@ -3436,7 +3436,7 @@ for (;;)
|
||||||
for speed. */
|
for speed. */
|
||||||
|
|
||||||
REPEATCHAR:
|
REPEATCHAR:
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
length = 1;
|
length = 1;
|
||||||
|
@ -3527,7 +3527,7 @@ for (;;)
|
||||||
value of fc will always be < 128. */
|
value of fc will always be < 128. */
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* When not in UTF-8 mode, load a single-byte character. */
|
/* When not in UTF-8 mode, load a single-byte character. */
|
||||||
fc = *ecode++;
|
fc = *ecode++;
|
||||||
|
@ -3547,11 +3547,11 @@ for (;;)
|
||||||
/* fc must be < 128 if UTF is enabled. */
|
/* fc must be < 128 if UTF is enabled. */
|
||||||
foc = mb->fcc[fc];
|
foc = mb->fcc[fc];
|
||||||
#else
|
#else
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && fc > 127)
|
if (utf && fc > 127)
|
||||||
foc = UCD_OTHERCASE(fc);
|
foc = UCD_OTHERCASE(fc);
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
foc = TABLE_GET(fc, mb->fcc, fc);
|
foc = TABLE_GET(fc, mb->fcc, fc);
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
|
||||||
|
@ -3682,7 +3682,7 @@ for (;;)
|
||||||
SCHECK_PARTIAL();
|
SCHECK_PARTIAL();
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
register uint32_t ch, och;
|
register uint32_t ch, och;
|
||||||
|
@ -3705,7 +3705,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
{
|
{
|
||||||
register uint32_t ch = ecode[1];
|
register uint32_t ch = ecode[1];
|
||||||
c = *eptr++;
|
c = *eptr++;
|
||||||
|
@ -3803,14 +3803,14 @@ for (;;)
|
||||||
|
|
||||||
if (op >= OP_NOTSTARI) /* Caseless */
|
if (op >= OP_NOTSTARI) /* Caseless */
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && fc > 127)
|
if (utf && fc > 127)
|
||||||
foc = UCD_OTHERCASE(fc);
|
foc = UCD_OTHERCASE(fc);
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
foc = TABLE_GET(fc, mb->fcc, fc);
|
foc = TABLE_GET(fc, mb->fcc, fc);
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
register uint32_t d;
|
register uint32_t d;
|
||||||
|
@ -3826,7 +3826,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
/* Not UTF mode */
|
/* Not UTF mode */
|
||||||
{
|
{
|
||||||
for (i = 1; i <= min; i++)
|
for (i = 1; i <= min; i++)
|
||||||
|
@ -3845,7 +3845,7 @@ for (;;)
|
||||||
|
|
||||||
if (minimize)
|
if (minimize)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
register uint32_t d;
|
register uint32_t d;
|
||||||
|
@ -3864,7 +3864,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /*SUPPORT_UTF */
|
#endif /*SUPPORT_UNICODE */
|
||||||
/* Not UTF mode */
|
/* Not UTF mode */
|
||||||
{
|
{
|
||||||
for (fi = min;; fi++)
|
for (fi = min;; fi++)
|
||||||
|
@ -3890,7 +3890,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
pp = eptr;
|
pp = eptr;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
register uint32_t d;
|
register uint32_t d;
|
||||||
|
@ -3917,7 +3917,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
/* Not UTF mode */
|
/* Not UTF mode */
|
||||||
{
|
{
|
||||||
for (i = min; i < max; i++)
|
for (i = min; i < max; i++)
|
||||||
|
@ -3947,7 +3947,7 @@ for (;;)
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
register uint32_t d;
|
register uint32_t d;
|
||||||
|
@ -3981,7 +3981,7 @@ for (;;)
|
||||||
|
|
||||||
if (minimize)
|
if (minimize)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
register uint32_t d;
|
register uint32_t d;
|
||||||
|
@ -4025,7 +4025,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
pp = eptr;
|
pp = eptr;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
register uint32_t d;
|
register uint32_t d;
|
||||||
|
@ -4144,7 +4144,7 @@ for (;;)
|
||||||
REPEATTYPE:
|
REPEATTYPE:
|
||||||
ctype = *ecode++; /* Code for the character type */
|
ctype = *ecode++; /* Code for the character type */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (ctype == OP_PROP || ctype == OP_NOTPROP)
|
if (ctype == OP_PROP || ctype == OP_NOTPROP)
|
||||||
{
|
{
|
||||||
prop_fail_result = ctype == OP_NOTPROP;
|
prop_fail_result = ctype == OP_NOTPROP;
|
||||||
|
@ -4162,7 +4162,7 @@ for (;;)
|
||||||
|
|
||||||
if (min > 0)
|
if (min > 0)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (prop_type >= 0)
|
if (prop_type >= 0)
|
||||||
{
|
{
|
||||||
switch(prop_type)
|
switch(prop_type)
|
||||||
|
@ -4378,11 +4378,11 @@ for (;;)
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Handle all other cases when the coding is UTF-8 */
|
/* Handle all other cases when the coding is UTF-8 */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) switch(ctype)
|
if (utf) switch(ctype)
|
||||||
{
|
{
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
|
@ -4631,7 +4631,7 @@ for (;;)
|
||||||
} /* End switch(ctype) */
|
} /* End switch(ctype) */
|
||||||
|
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Code for the non-UTF-8 case for minimum matching of operators other
|
/* Code for the non-UTF-8 case for minimum matching of operators other
|
||||||
than OP_PROP and OP_NOTPROP. */
|
than OP_PROP and OP_NOTPROP. */
|
||||||
|
@ -4889,7 +4889,7 @@ for (;;)
|
||||||
|
|
||||||
if (minimize)
|
if (minimize)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (prop_type >= 0)
|
if (prop_type >= 0)
|
||||||
{
|
{
|
||||||
switch(prop_type)
|
switch(prop_type)
|
||||||
|
@ -5138,9 +5138,9 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
for (fi = min;; fi++)
|
for (fi = min;; fi++)
|
||||||
|
@ -5410,7 +5410,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
pp = eptr; /* Remember where we started */
|
pp = eptr; /* Remember where we started */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (prop_type >= 0)
|
if (prop_type >= 0)
|
||||||
{
|
{
|
||||||
switch(prop_type)
|
switch(prop_type)
|
||||||
|
@ -5696,9 +5696,9 @@ for (;;)
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
switch(ctype)
|
switch(ctype)
|
||||||
|
@ -5940,7 +5940,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
/* Not UTF mode */
|
/* Not UTF mode */
|
||||||
{
|
{
|
||||||
switch(ctype)
|
switch(ctype)
|
||||||
|
@ -6219,13 +6219,13 @@ switch (frame->Xwhere)
|
||||||
#ifdef SUPPORT_WIDE_CHARS
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
LBL(20) LBL(21)
|
LBL(20) LBL(21)
|
||||||
#endif
|
#endif
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
LBL(16) LBL(18)
|
LBL(16) LBL(18)
|
||||||
LBL(22) LBL(23) LBL(28) LBL(30)
|
LBL(22) LBL(23) LBL(28) LBL(30)
|
||||||
LBL(32) LBL(34) LBL(42) LBL(46)
|
LBL(32) LBL(34) LBL(42) LBL(46)
|
||||||
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
|
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
|
||||||
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
|
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
default:
|
default:
|
||||||
return PCRE2_ERROR_INTERNAL;
|
return PCRE2_ERROR_INTERNAL;
|
||||||
}
|
}
|
||||||
|
@ -6398,14 +6398,21 @@ if (code == NULL || subject == NULL || match_data == NULL)
|
||||||
return PCRE2_ERROR_NULL;
|
return PCRE2_ERROR_NULL;
|
||||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||||
|
|
||||||
/* Check that the first field in the block is the magic number. If it is not,
|
/* Check that the first field in the block is the magic number. */
|
||||||
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
|
|
||||||
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
|
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||||
means that the pattern is likely compiled with different endianness. */
|
|
||||||
|
#ifdef FIXME
|
||||||
|
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
|
||||||
|
this comment and code:
|
||||||
|
|
||||||
|
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
|
||||||
|
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
|
||||||
|
with different endianness. */
|
||||||
|
|
||||||
if (re->magic_number != MAGIC_NUMBER)
|
|
||||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||||
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Check the code unit width. */
|
/* Check the code unit width. */
|
||||||
|
|
||||||
|
@ -6451,7 +6458,7 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
|
||||||
we must also check that a starting offset does not point into the middle of a
|
we must also check that a starting offset does not point into the middle of a
|
||||||
multiunit character. */
|
multiunit character. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
{
|
{
|
||||||
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
|
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
|
||||||
|
@ -6466,7 +6473,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
return PCRE2_ERROR_BADUTFOFFSET;
|
return PCRE2_ERROR_BADUTFOFFSET;
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* If the pattern was successfully studied with JIT support, run the JIT
|
/* If the pattern was successfully studied with JIT support, run the JIT
|
||||||
executable instead of the rest of this function. Most options must be set at
|
executable instead of the rest of this function. Most options must be set at
|
||||||
|
@ -6640,7 +6647,7 @@ if (!anchored)
|
||||||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||||
{
|
{
|
||||||
first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
|
first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -6660,7 +6667,7 @@ if ((re->flags & PCRE2_LASTSET) != 0)
|
||||||
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||||
{
|
{
|
||||||
req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
|
req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
|
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -6696,7 +6703,7 @@ for(;;)
|
||||||
if (firstline)
|
if (firstline)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR t = start_match;
|
PCRE2_SPTR t = start_match;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
while (t < mb->end_subject && !IS_NEWLINE(t))
|
while (t < mb->end_subject && !IS_NEWLINE(t))
|
||||||
|
@ -6731,7 +6738,7 @@ for(;;)
|
||||||
{
|
{
|
||||||
if (start_match > mb->start_subject + start_offset)
|
if (start_match > mb->start_subject + start_offset)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||||
|
@ -6905,7 +6912,7 @@ for(;;)
|
||||||
case MATCH_THEN:
|
case MATCH_THEN:
|
||||||
mb->ignore_skip_arg = 0;
|
mb->ignore_skip_arg = 0;
|
||||||
new_start_match = start_match + 1;
|
new_start_match = start_match + 1;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
ACROSSCHAR(new_start_match < end_subject, *new_start_match,
|
ACROSSCHAR(new_start_match < end_subject, *new_start_match,
|
||||||
new_start_match++);
|
new_start_match++);
|
||||||
|
|
|
@ -81,12 +81,12 @@ PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||||
{
|
{
|
||||||
uint32_t c;
|
uint32_t c;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||||
#else
|
#else
|
||||||
(void)utf;
|
(void)utf;
|
||||||
c = *ptr;
|
c = *ptr;
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
{
|
{
|
||||||
|
@ -172,7 +172,7 @@ PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||||
uint32_t c;
|
uint32_t c;
|
||||||
ptr--;
|
ptr--;
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
BACKCHAR(ptr);
|
BACKCHAR(ptr);
|
||||||
|
@ -182,7 +182,7 @@ else c = *ptr;
|
||||||
#else
|
#else
|
||||||
(void)utf;
|
(void)utf;
|
||||||
c = *ptr;
|
c = *ptr;
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
{
|
{
|
||||||
|
|
|
@ -50,10 +50,11 @@ into a UTF string. The behaviour is different for each code unit width. */
|
||||||
#include "pcre2_internal.h"
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
|
||||||
/* If SUPPORT_UTF is not defined, this function will never be called. Supply a
|
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
||||||
dummy function because some compilers do not like empty source modules. */
|
Supply a dummy function because some compilers do not like empty source
|
||||||
|
modules. */
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF
|
#ifndef SUPPORT_UNICODE
|
||||||
unsigned int
|
unsigned int
|
||||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||||
{
|
{
|
||||||
|
@ -61,7 +62,7 @@ PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||||
(void)(buffer);
|
(void)(buffer);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#else /* SUPPORT_UTF */
|
#else /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
|
@ -114,6 +115,6 @@ return 2;
|
||||||
return 1;
|
return 1;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* End of pcre_ord2utf.c */
|
/* End of pcre_ord2utf.c */
|
||||||
|
|
|
@ -56,11 +56,9 @@ Arguments:
|
||||||
what what information is required
|
what what information is required
|
||||||
where where to put the information
|
where where to put the information
|
||||||
|
|
||||||
Returns: 0 if data returned, negative on error
|
Returns: 0 if data returned, negative on error or unset value
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* FIXME: Remove BADENDIANNESS if saving/restoring is not to be implemented. */
|
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||||
{
|
{
|
||||||
|
@ -69,13 +67,21 @@ const pcre2_real_code *re = (pcre2_real_code *)code;
|
||||||
if (re == NULL || where == NULL) return PCRE2_ERROR_NULL;
|
if (re == NULL || where == NULL) return PCRE2_ERROR_NULL;
|
||||||
|
|
||||||
/* Check that the first field in the block is the magic number. If it is not,
|
/* Check that the first field in the block is the magic number. If it is not,
|
||||||
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
|
return with PCRE2_ERROR_BADMAGIC. */
|
||||||
REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
|
|
||||||
means that the pattern is likely compiled with different endianness. */
|
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||||
|
|
||||||
|
#ifdef FIXME
|
||||||
|
If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
|
||||||
|
this comment and code:
|
||||||
|
|
||||||
|
/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
|
||||||
|
with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
|
||||||
|
with different endianness. */
|
||||||
|
|
||||||
if (re->magic_number != MAGIC_NUMBER)
|
|
||||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||||
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Check that this pattern was compiled in the correct bit mode */
|
/* Check that this pattern was compiled in the correct bit mode */
|
||||||
|
|
||||||
|
@ -151,6 +157,7 @@ switch(what)
|
||||||
|
|
||||||
case PCRE2_INFO_MATCHLIMIT:
|
case PCRE2_INFO_MATCHLIMIT:
|
||||||
*((uint32_t *)where) = re->limit_match;
|
*((uint32_t *)where) = re->limit_match;
|
||||||
|
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||||
|
@ -179,6 +186,7 @@ switch(what)
|
||||||
|
|
||||||
case PCRE2_INFO_RECURSIONLIMIT:
|
case PCRE2_INFO_RECURSIONLIMIT:
|
||||||
*((uint32_t *)where) = re->limit_recursion;
|
*((uint32_t *)where) = re->limit_recursion;
|
||||||
|
if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE2_INFO_SIZE:
|
case PCRE2_INFO_SIZE:
|
||||||
|
|
|
@ -94,7 +94,7 @@ BOOL one_code_unit = !utf;
|
||||||
|
|
||||||
/* If UTF is supported and requested, check for a valid single code unit. */
|
/* If UTF is supported and requested, check for a valid single code unit. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
@ -105,7 +105,7 @@ if (utf)
|
||||||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||||
#endif /* CODE_UNIT_WIDTH */
|
#endif /* CODE_UNIT_WIDTH */
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Handle a valid one-code-unit character at any width. */
|
/* Handle a valid one-code-unit character at any width. */
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@ if (one_code_unit)
|
||||||
for each width. If UTF is not supported, control should never get here, but we
|
for each width. If UTF is not supported, control should never get here, but we
|
||||||
need a return statement to keep the compiler happy. */
|
need a return statement to keep the compiler happy. */
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF
|
#ifndef SUPPORT_UNICODE
|
||||||
return 0;
|
return 0;
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
@ -178,7 +178,7 @@ as an indication. */
|
||||||
fprintf(f, "\\X{%x}", c);
|
fprintf(f, "\\X{%x}", c);
|
||||||
return 0;
|
return 0;
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -221,7 +221,7 @@ into the main code, however, we just put one into this function. */
|
||||||
static const char *
|
static const char *
|
||||||
get_ucpname(unsigned int ptype, unsigned int pvalue)
|
get_ucpname(unsigned int ptype, unsigned int pvalue)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
int i;
|
int i;
|
||||||
for (i = utt_size - 1; i >= 0; i--)
|
for (i = utt_size - 1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
|
@ -233,7 +233,7 @@ return (i >= 0)? utt_names + utt[i].name_offset : "??";
|
||||||
(void)ptype;
|
(void)ptype;
|
||||||
(void)pvalue;
|
(void)pvalue;
|
||||||
return "??";
|
return "??";
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -228,7 +228,7 @@ for (;;)
|
||||||
case OP_NOTPOSPLUSI:
|
case OP_NOTPOSPLUSI:
|
||||||
branchlength++;
|
branchlength++;
|
||||||
cc += 2;
|
cc += 2;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
@ -249,7 +249,7 @@ for (;;)
|
||||||
case OP_NOTEXACTI:
|
case OP_NOTEXACTI:
|
||||||
branchlength += GET2(cc,1);
|
branchlength += GET2(cc,1);
|
||||||
cc += 2 + IMM2_SIZE;
|
cc += 2 + IMM2_SIZE;
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
@ -297,7 +297,7 @@ for (;;)
|
||||||
appear, but leave the code, just in case.) */
|
appear, but leave the code, just in case.) */
|
||||||
|
|
||||||
case OP_ANYBYTE:
|
case OP_ANYBYTE:
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) return -1;
|
if (utf) return -1;
|
||||||
#endif
|
#endif
|
||||||
branchlength++;
|
branchlength++;
|
||||||
|
@ -536,7 +536,7 @@ for (;;)
|
||||||
case OP_NOTPOSQUERYI:
|
case OP_NOTPOSQUERYI:
|
||||||
|
|
||||||
cc += PRIV(OP_lengths)[op];
|
cc += PRIV(OP_lengths)[op];
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
@ -608,7 +608,7 @@ SET_BIT(c);
|
||||||
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
|
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
|
||||||
the end of the character, even when caseless. */
|
the end of the character, even when caseless. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
@ -617,7 +617,7 @@ if (utf)
|
||||||
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
|
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* If caseless, handle the other case of the character. */
|
/* If caseless, handle the other case of the character. */
|
||||||
|
|
||||||
|
@ -671,7 +671,7 @@ set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||||
register uint32_t c;
|
register uint32_t c;
|
||||||
for (c = 0; c < table_limit; c++)
|
for (c = 0; c < table_limit; c++)
|
||||||
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
|
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (table_limit == 32) return;
|
if (table_limit == 32) return;
|
||||||
for (c = 128; c < 256; c++)
|
for (c = 128; c < 256; c++)
|
||||||
{
|
{
|
||||||
|
@ -712,7 +712,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||||
register uint32_t c;
|
register uint32_t c;
|
||||||
for (c = 0; c < table_limit; c++)
|
for (c = 0; c < table_limit; c++)
|
||||||
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
|
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -752,7 +752,7 @@ set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
|
||||||
register uint32_t c;
|
register uint32_t c;
|
||||||
int yield = SSB_DONE;
|
int yield = SSB_DONE;
|
||||||
|
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
int table_limit = utf? 16:32;
|
int table_limit = utf? 16:32;
|
||||||
#else
|
#else
|
||||||
int table_limit = 32;
|
int table_limit = 32;
|
||||||
|
@ -866,7 +866,7 @@ do
|
||||||
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
|
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
|
||||||
while ((c = *p++) < NOTACHAR)
|
while ((c = *p++) < NOTACHAR)
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR buff[6];
|
PCRE2_UCHAR buff[6];
|
||||||
|
@ -1042,7 +1042,7 @@ do
|
||||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||||
units of horizontal space characters. */
|
units of horizontal space characters. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
SET_BIT(0xC2); /* For U+00A0 */
|
SET_BIT(0xC2); /* For U+00A0 */
|
||||||
|
@ -1081,7 +1081,7 @@ do
|
||||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||||
units of vertical space characters. */
|
units of vertical space characters. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
||||||
|
@ -1181,7 +1181,7 @@ do
|
||||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||||
units of horizontal space characters. */
|
units of horizontal space characters. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
SET_BIT(0xC2); /* For U+00A0 */
|
SET_BIT(0xC2); /* For U+00A0 */
|
||||||
|
@ -1218,7 +1218,7 @@ do
|
||||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||||
units of vertical space characters. */
|
units of vertical space characters. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
||||||
|
@ -1287,7 +1287,7 @@ do
|
||||||
character modes, set the 0xFF bit to indicate code units >= 255. */
|
character modes, set the 0xFF bit to indicate code units >= 255. */
|
||||||
|
|
||||||
case OP_NCLASS:
|
case OP_NCLASS:
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
re->start_bitmap[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
re->start_bitmap[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||||
|
@ -1318,7 +1318,7 @@ do
|
||||||
|
|
||||||
if (classmap != NULL)
|
if (classmap != NULL)
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];
|
for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];
|
||||||
|
|
|
@ -108,8 +108,8 @@ Returns: if successful: 0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data, int stringnumber,
|
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE left, right;
|
PCRE2_SIZE left, right;
|
||||||
PCRE2_SIZE p = 0;
|
PCRE2_SIZE p = 0;
|
||||||
|
@ -189,8 +189,8 @@ Returns: if successful: zero
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data, int stringnumber,
|
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||||
PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE left, right;
|
PCRE2_SIZE left, right;
|
||||||
PCRE2_SIZE p = 0;
|
PCRE2_SIZE p = 0;
|
||||||
|
@ -288,7 +288,7 @@ Returns: 0 if successful, else a negative error number
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
||||||
int stringnumber, PCRE2_SIZE *sizeptr)
|
unsigned int stringnumber, PCRE2_SIZE *sizeptr)
|
||||||
{
|
{
|
||||||
if (stringnumber >= match_data->oveccount ||
|
if (stringnumber >= match_data->oveccount ||
|
||||||
stringnumber > match_data->code->top_bracket ||
|
stringnumber > match_data->code->top_bracket ||
|
||||||
|
|
|
@ -76,7 +76,7 @@ as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
|
||||||
handling wide characters. */
|
handling wide characters. */
|
||||||
|
|
||||||
#if defined PCRE2_PCRE2TEST || \
|
#if defined PCRE2_PCRE2TEST || \
|
||||||
(defined SUPPORT_UTF && \
|
(defined SUPPORT_UNICODE && \
|
||||||
defined PCRE2_CODE_UNIT_WIDTH && \
|
defined PCRE2_CODE_UNIT_WIDTH && \
|
||||||
PCRE2_CODE_UNIT_WIDTH == 8)
|
PCRE2_CODE_UNIT_WIDTH == 8)
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ const uint8_t PRIV(utf8_table4)[] = {
|
||||||
#endif /* UTF-8 support needed */
|
#endif /* UTF-8 support needed */
|
||||||
|
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
|
|
||||||
/* Table to translate from particular type value to the general value. */
|
/* Table to translate from particular type value to the general value. */
|
||||||
|
|
||||||
|
@ -728,6 +728,6 @@ const ucp_type_table PRIV(utt)[] = {
|
||||||
|
|
||||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||||
|
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* End of pcre2_tables.c */
|
/* End of pcre2_tables.c */
|
||||||
|
|
|
@ -32,7 +32,7 @@ condition to cut out the tables when not needed. But don't leave
|
||||||
a totally empty module because some compilers barf at that.
|
a totally empty module because some compilers barf at that.
|
||||||
Instead, just supply small dummy tables. */
|
Instead, just supply small dummy tables. */
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF
|
#ifndef SUPPORT_UNICODE
|
||||||
const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};
|
const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};
|
||||||
const uint8_t PRIV(ucd_stage1)[] = {0};
|
const uint8_t PRIV(ucd_stage1)[] = {0};
|
||||||
const uint16_t PRIV(ucd_stage2)[] = {0};
|
const uint16_t PRIV(ucd_stage2)[] = {0};
|
||||||
|
@ -3628,6 +3628,6 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */
|
||||||
#if UCD_BLOCK_SIZE != 128
|
#if UCD_BLOCK_SIZE != 128
|
||||||
#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h
|
#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h
|
||||||
#endif
|
#endif
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
#endif /* PCRE2_PCRE2TEST */
|
#endif /* PCRE2_PCRE2TEST */
|
||||||
|
|
|
@ -50,12 +50,12 @@ strings. */
|
||||||
#include "pcre2_internal.h"
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF
|
#ifndef SUPPORT_UNICODE
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Dummy function when UTF not supported *
|
* Dummy function when Unicode is not supported *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function should never be called when UTF is not supported. */
|
/* This function should never be called when Unicode is not supported. */
|
||||||
|
|
||||||
int
|
int
|
||||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||||
|
@ -388,6 +388,6 @@ for (p = string; length-- > 0; p++)
|
||||||
return 0;
|
return 0;
|
||||||
#endif /* CODE_UNIT_WIDTH */
|
#endif /* CODE_UNIT_WIDTH */
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* End of pcre2_valid_utf.c */
|
/* End of pcre2_valid_utf.c */
|
||||||
|
|
|
@ -103,7 +103,7 @@ while ((t = *data++) != XCL_END)
|
||||||
uint32_t x, y;
|
uint32_t x, y;
|
||||||
if (t == XCL_SINGLE)
|
if (t == XCL_SINGLE)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||||
|
@ -115,7 +115,7 @@ while ((t = *data++) != XCL_END)
|
||||||
}
|
}
|
||||||
else if (t == XCL_RANGE)
|
else if (t == XCL_RANGE)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||||
|
@ -130,7 +130,7 @@ while ((t = *data++) != XCL_END)
|
||||||
if (c >= x && c <= y) return !negated;
|
if (c >= x && c <= y) return !negated;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UNICODE
|
||||||
else /* XCL_PROP & XCL_NOTPROP */
|
else /* XCL_PROP & XCL_NOTPROP */
|
||||||
{
|
{
|
||||||
const ucd_record *prop = GET_UCD(c);
|
const ucd_record *prop = GET_UCD(c);
|
||||||
|
@ -262,7 +262,7 @@ while ((t = *data++) != XCL_END)
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
(void)utf; /* Avoid compiler warning */
|
(void)utf; /* Avoid compiler warning */
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UNICODE */
|
||||||
}
|
}
|
||||||
|
|
||||||
return negated; /* char did not match */
|
return negated; /* char did not match */
|
||||||
|
|
125
src/pcre2test.c
125
src/pcre2test.c
|
@ -196,6 +196,7 @@ so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
|
||||||
for building the library. */
|
for building the library. */
|
||||||
|
|
||||||
#define PRIV(name) name
|
#define PRIV(name) name
|
||||||
|
#define PCRE2_CODE_UNIT_WIDTH 0
|
||||||
#include "pcre2.h"
|
#include "pcre2.h"
|
||||||
#include "pcre2posix.h"
|
#include "pcre2posix.h"
|
||||||
#include "pcre2_internal.h"
|
#include "pcre2_internal.h"
|
||||||
|
@ -208,16 +209,17 @@ of PRIV avoids name clashes. */
|
||||||
#include "pcre2_tables.c"
|
#include "pcre2_tables.c"
|
||||||
#include "pcre2_ucd.c"
|
#include "pcre2_ucd.c"
|
||||||
|
|
||||||
/* When PCRE2_CODE_UNIT_WIDTH is unset, pcre2_internal.h does not include
|
/* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
|
||||||
pcre2_intmodedep.h, which is where mode-dependent macros and structures are
|
pcre2_intmodedep.h, which is where mode-dependent macros and structures are
|
||||||
defined. We can now include it for each supported code unit width. Because
|
defined. We can now include it for each supported code unit width. Because
|
||||||
PCRE2_CODE_UNIT_WIDTH was not defined before including pcre2.h, it will have
|
PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
|
||||||
left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately while
|
have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
|
||||||
including these files, and then restore it to a no-op. Because LINK_SIZE may be
|
while including these files, and then restore it to a no-op. Because LINK_SIZE
|
||||||
changed in 16-bit mode and forced to 1 in 32-bit mode, the order of these
|
may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
|
||||||
inclusions should not be changed. */
|
these inclusions should not be changed. */
|
||||||
|
|
||||||
#undef PCRE2_SUFFIX
|
#undef PCRE2_SUFFIX
|
||||||
|
#undef PCRE2_CODE_UNIT_WIDTH
|
||||||
|
|
||||||
#ifdef SUPPORT_PCRE8
|
#ifdef SUPPORT_PCRE8
|
||||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||||
|
@ -576,7 +578,7 @@ static coptstruct coptlist[] = {
|
||||||
{ "pcre16", CONF_FIX, SUPPORT_16 },
|
{ "pcre16", CONF_FIX, SUPPORT_16 },
|
||||||
{ "pcre32", CONF_FIX, SUPPORT_32 },
|
{ "pcre32", CONF_FIX, SUPPORT_32 },
|
||||||
{ "pcre8", CONF_FIX, SUPPORT_8 },
|
{ "pcre8", CONF_FIX, SUPPORT_8 },
|
||||||
{ "utf", CONF_INT, PCRE2_CONFIG_UTF }
|
{ "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
|
||||||
};
|
};
|
||||||
|
|
||||||
#define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
|
#define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
|
||||||
|
@ -2815,22 +2817,26 @@ pattern.
|
||||||
Arguments:
|
Arguments:
|
||||||
what code for the required information
|
what code for the required information
|
||||||
where where to put the answer
|
where where to put the answer
|
||||||
|
unsetok PCRE2_ERROR_UNSET is an "expected" result
|
||||||
|
|
||||||
Returns: the return from pcre2_pattern_info()
|
Returns: the return from pcre2_pattern_info()
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pattern_info(int what, void *where)
|
pattern_info(int what, void *where, BOOL unsetok)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
|
PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
|
||||||
if (rc >= 0) return 0;
|
if (rc >= 0) return 0;
|
||||||
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
|
if (rc != PCRE2_ERROR_UNSET || !unsetok)
|
||||||
what);
|
{
|
||||||
if (rc == PCRE2_ERROR_BADMODE)
|
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
|
||||||
fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
|
what);
|
||||||
"%d-bit mode\n", test_mode,
|
if (rc == PCRE2_ERROR_BADMODE)
|
||||||
8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
|
fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
|
||||||
|
"%d-bit mode\n", test_mode,
|
||||||
|
8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
|
||||||
|
}
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3026,32 +3032,61 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
||||||
{
|
{
|
||||||
const void *nametable;
|
const void *nametable;
|
||||||
const uint8_t *start_bits;
|
const uint8_t *start_bits;
|
||||||
|
BOOL match_limit_set, recursion_limit_set;
|
||||||
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
|
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
|
||||||
hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit,
|
hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit,
|
||||||
maxlookbehind, minlength, nameentrysize, namecount, newline_convention,
|
maxlookbehind, minlength, nameentrysize, namecount, newline_convention,
|
||||||
recursion_limit;
|
recursion_limit;
|
||||||
|
|
||||||
|
/* These info requests may return PCRE2_ERROR_UNSET. */
|
||||||
|
|
||||||
|
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
match_limit_set = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_ERROR_UNSET:
|
||||||
|
match_limit_set = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return PR_ABEND;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE))
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
recursion_limit_set = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_ERROR_UNSET:
|
||||||
|
recursion_limit_set = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return PR_ABEND;
|
||||||
|
}
|
||||||
|
|
||||||
/* These info requests should always succeed. */
|
/* These info requests should always succeed. */
|
||||||
|
|
||||||
if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax) +
|
if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_BSR, &bsr_convention) +
|
pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count) +
|
pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits) +
|
pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit) +
|
pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype) +
|
pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf) +
|
pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_JCHANGED, &jchanged) +
|
pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit) +
|
pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype) +
|
pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty) +
|
pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit) +
|
pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind) +
|
pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_MINLENGTH, &minlength) +
|
pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_NAMECOUNT, &namecount) +
|
pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize) +
|
pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
|
||||||
pattern_info(PCRE2_INFO_NAMETABLE, &nametable) +
|
pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
|
||||||
pattern_info(PCRE2_INFO_NEWLINE, &newline_convention) +
|
|
||||||
pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit)
|
|
||||||
!= 0)
|
!= 0)
|
||||||
return PR_ABEND;
|
return PR_ABEND;
|
||||||
|
|
||||||
|
@ -3063,10 +3098,10 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
||||||
if (maxlookbehind > 0)
|
if (maxlookbehind > 0)
|
||||||
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
||||||
|
|
||||||
if (match_limit != UINT32_MAX)
|
if (match_limit_set)
|
||||||
fprintf(outfile, "Match limit = %u\n", match_limit);
|
fprintf(outfile, "Match limit = %u\n", match_limit);
|
||||||
|
|
||||||
if (recursion_limit != UINT32_MAX)
|
if (recursion_limit_set)
|
||||||
fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
|
fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
|
||||||
|
|
||||||
if (namecount > 0)
|
if (namecount > 0)
|
||||||
|
@ -3099,8 +3134,8 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
||||||
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
|
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
|
||||||
if (match_empty) fprintf(outfile, "May match empty string\n");
|
if (match_empty) fprintf(outfile, "May match empty string\n");
|
||||||
|
|
||||||
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options);
|
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
|
||||||
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options);
|
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
|
||||||
|
|
||||||
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
|
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
|
||||||
cluttering up the verification output of non-UTF test files. */
|
cluttering up the verification output of non-UTF test files. */
|
||||||
|
@ -3234,7 +3269,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
||||||
if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
|
if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
|
||||||
{
|
{
|
||||||
size_t jitsize;
|
size_t jitsize;
|
||||||
if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize) == 0)
|
if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize, FALSE) == 0)
|
||||||
{
|
{
|
||||||
if (jitsize > 0)
|
if (jitsize > 0)
|
||||||
fprintf(outfile, "JIT compilation was successful\n");
|
fprintf(outfile, "JIT compilation was successful\n");
|
||||||
|
@ -3625,14 +3660,14 @@ if ((pat_patctl.control & CTL_MEMORY) != 0)
|
||||||
if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32);
|
if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
(void)pattern_info(PCRE2_INFO_SIZE, &size);
|
(void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
|
||||||
(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count);
|
(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
|
||||||
(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
|
||||||
fprintf(outfile, "Memory allocation (code space): %d\n",
|
fprintf(outfile, "Memory allocation (code space): %d\n",
|
||||||
(int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
|
(int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
|
||||||
if (pat_patctl.jit != 0)
|
if (pat_patctl.jit != 0)
|
||||||
{
|
{
|
||||||
(void)pattern_info(PCRE2_INFO_JITSIZE, &size);
|
(void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
|
||||||
fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
|
fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4452,7 +4487,7 @@ for (gmatched = 0;; gmatched++)
|
||||||
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
||||||
{
|
{
|
||||||
uint32_t maxcapcount;
|
uint32_t maxcapcount;
|
||||||
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount) < 0)
|
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
|
||||||
return PR_SKIP;
|
return PR_SKIP;
|
||||||
capcount = maxcapcount + 1; /* Allow for full match */
|
capcount = maxcapcount + 1; /* Allow for full match */
|
||||||
if (capcount > (int)dat_datctl.oveccount) capcount = dat_datctl.oveccount;
|
if (capcount > (int)dat_datctl.oveccount) capcount = dat_datctl.oveccount;
|
||||||
|
@ -4943,7 +4978,7 @@ printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
|
||||||
printf(" pcre8 8 bit library support enabled [0, 1]\n");
|
printf(" pcre8 8 bit library support enabled [0, 1]\n");
|
||||||
printf(" pcre16 16 bit library support enabled [0, 1]\n");
|
printf(" pcre16 16 bit library support enabled [0, 1]\n");
|
||||||
printf(" pcre32 32 bit library support enabled [0, 1]\n");
|
printf(" pcre32 32 bit library support enabled [0, 1]\n");
|
||||||
printf(" utf Unicode Transformation Format supported [0, 1]\n");
|
printf(" unicode Unicode and UTF support enabled [0, 1]\n");
|
||||||
printf(" -d set default pattern control 'debug'\n");
|
printf(" -d set default pattern control 'debug'\n");
|
||||||
printf(" -dfa set default subject control 'dfa'\n");
|
printf(" -dfa set default subject control 'dfa'\n");
|
||||||
printf(" -help show usage information\n");
|
printf(" -help show usage information\n");
|
||||||
|
@ -5057,7 +5092,7 @@ printf(" 16-bit support\n");
|
||||||
printf(" 32-bit support\n");
|
printf(" 32-bit support\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc, sizeof(rc));
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &rc, sizeof(rc));
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
printf(" UTF support (Unicode version %s)\n", uversion);
|
printf(" UTF support (Unicode version %s)\n", uversion);
|
||||||
else
|
else
|
||||||
|
|
|
@ -384,15 +384,15 @@ aaaaa2
|
||||||
010203040506
|
010203040506
|
||||||
RC=0
|
RC=0
|
||||||
======== STDERR ========
|
======== STDERR ========
|
||||||
pcre2grep: pcre2_match() gave error -47 while matching this text:
|
pcre2grep: pcre2_match() gave error -45 while matching this text:
|
||||||
|
|
||||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
|
|
||||||
pcre2grep: pcre2_match() gave error -47 while matching this text:
|
pcre2grep: pcre2_match() gave error -45 while matching this text:
|
||||||
|
|
||||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
|
|
||||||
pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded.
|
pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
|
||||||
pcre2grep: Check your regex for nested unlimited loops.
|
pcre2grep: Check your regex for nested unlimited loops.
|
||||||
---------------------------- Test 38 ------------------------------
|
---------------------------- Test 38 ------------------------------
|
||||||
This line contains a binary zero here >< for testing.
|
This line contains a binary zero here >< for testing.
|
||||||
|
@ -510,23 +510,23 @@ In the middle of a line, PATTERN appears.
|
||||||
Check up on PATTERN near the end.
|
Check up on PATTERN near the end.
|
||||||
RC=0
|
RC=0
|
||||||
---------------------------- Test 62 -----------------------------
|
---------------------------- Test 62 -----------------------------
|
||||||
pcre2grep: pcre2_match() gave error -47 while matching text that starts:
|
pcre2grep: pcre2_match() gave error -45 while matching text that starts:
|
||||||
|
|
||||||
This is a file of miscellaneous text that is used as test data for checking
|
This is a file of miscellaneous text that is used as test data for checking
|
||||||
that the pcregrep command is working correctly. The file must be more than 24K
|
that the pcregrep command is working correctly. The file must be more than 24K
|
||||||
long so that it needs more than a single read
|
long so that it needs more than a single read
|
||||||
|
|
||||||
pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded.
|
pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
|
||||||
pcre2grep: Check your regex for nested unlimited loops.
|
pcre2grep: Check your regex for nested unlimited loops.
|
||||||
RC=1
|
RC=1
|
||||||
---------------------------- Test 63 -----------------------------
|
---------------------------- Test 63 -----------------------------
|
||||||
pcre2grep: pcre2_match() gave error -52 while matching text that starts:
|
pcre2grep: pcre2_match() gave error -50 while matching text that starts:
|
||||||
|
|
||||||
This is a file of miscellaneous text that is used as test data for checking
|
This is a file of miscellaneous text that is used as test data for checking
|
||||||
that the pcregrep command is working correctly. The file must be more than 24K
|
that the pcregrep command is working correctly. The file must be more than 24K
|
||||||
long so that it needs more than a single read
|
long so that it needs more than a single read
|
||||||
|
|
||||||
pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded.
|
pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
|
||||||
pcre2grep: Check your regex for nested unlimited loops.
|
pcre2grep: Check your regex for nested unlimited loops.
|
||||||
RC=1
|
RC=1
|
||||||
---------------------------- Test 64 ------------------------------
|
---------------------------- Test 64 ------------------------------
|
||||||
|
|
|
@ -888,7 +888,7 @@ Subject length lower bound = 3
|
||||||
a\x{123}aa\=offset=1
|
a\x{123}aa\=offset=1
|
||||||
0: aa
|
0: aa
|
||||||
a\x{123}aa\=offset=2
|
a\x{123}aa\=offset=2
|
||||||
Error -36 (bad UTF-8 offset)
|
Error -35 (bad UTF-8 offset)
|
||||||
a\x{123}aa\=offset=3
|
a\x{123}aa\=offset=3
|
||||||
0: aa
|
0: aa
|
||||||
a\x{123}aa\=offset=4
|
a\x{123}aa\=offset=4
|
||||||
|
@ -896,7 +896,7 @@ Error -36 (bad UTF-8 offset)
|
||||||
a\x{123}aa\=offset=5
|
a\x{123}aa\=offset=5
|
||||||
No match
|
No match
|
||||||
a\x{123}aa\=offset=6
|
a\x{123}aa\=offset=6
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
|
|
||||||
/\x{1234}+/Ii,utf
|
/\x{1234}+/Ii,utf
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
|
|
|
@ -787,9 +787,9 @@ Subject length lower bound = 3
|
||||||
a\x{123}aa\=offset=4
|
a\x{123}aa\=offset=4
|
||||||
No match
|
No match
|
||||||
a\x{123}aa\=offset=5
|
a\x{123}aa\=offset=5
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
a\x{123}aa\=offset=6
|
a\x{123}aa\=offset=6
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
|
|
||||||
/\x{1234}+/Ii,utf
|
/\x{1234}+/Ii,utf
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
|
@ -851,9 +851,9 @@ Subject length lower bound = 1
|
||||||
|
|
||||||
/a/utf
|
/a/utf
|
||||||
\x{10000}\=offset=1
|
\x{10000}\=offset=1
|
||||||
Error -36 (bad UTF-16 offset)
|
Error -35 (bad UTF-16 offset)
|
||||||
\x{10000}ab\=offset=1
|
\x{10000}ab\=offset=1
|
||||||
Error -36 (bad UTF-16 offset)
|
Error -35 (bad UTF-16 offset)
|
||||||
\x{10000}ab\=offset=2
|
\x{10000}ab\=offset=2
|
||||||
0: a
|
0: a
|
||||||
\x{10000}ab\=offset=3
|
\x{10000}ab\=offset=3
|
||||||
|
@ -861,7 +861,7 @@ No match
|
||||||
\x{10000}ab\=offset=4
|
\x{10000}ab\=offset=4
|
||||||
No match
|
No match
|
||||||
\x{10000}ab\=offset=5
|
\x{10000}ab\=offset=5
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
|
|
||||||
/<2F><><EFBFBD>/utf
|
/<2F><><EFBFBD>/utf
|
||||||
Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate
|
Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate
|
||||||
|
|
|
@ -779,9 +779,9 @@ Subject length lower bound = 3
|
||||||
a\x{123}aa\=offset=4
|
a\x{123}aa\=offset=4
|
||||||
No match
|
No match
|
||||||
a\x{123}aa\=offset=5
|
a\x{123}aa\=offset=5
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
a\x{123}aa\=offset=6
|
a\x{123}aa\=offset=6
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
|
|
||||||
/\x{1234}+/Ii,utf
|
/\x{1234}+/Ii,utf
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
|
@ -851,9 +851,9 @@ No match
|
||||||
\x{10000}ab\=offset=3
|
\x{10000}ab\=offset=3
|
||||||
No match
|
No match
|
||||||
\x{10000}ab\=offset=4
|
\x{10000}ab\=offset=4
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
\x{10000}ab\=offset=5
|
\x{10000}ab\=offset=5
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
|
|
||||||
/<2F><><EFBFBD>/utf
|
/<2F><><EFBFBD>/utf
|
||||||
Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined
|
Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined
|
||||||
|
|
|
@ -986,7 +986,7 @@ Subject length lower bound = 4
|
||||||
0: abcd
|
0: abcd
|
||||||
1: a
|
1: a
|
||||||
2: d
|
2: d
|
||||||
copy substring 5 failed (-49): unknown or unset substring
|
copy substring 5 failed (-47): unknown or unset substring
|
||||||
|
|
||||||
/(.{20})/I
|
/(.{20})/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
@ -1040,9 +1040,9 @@ Subject length lower bound = 4
|
||||||
2: <unset>
|
2: <unset>
|
||||||
3: f
|
3: f
|
||||||
1G a (1)
|
1G a (1)
|
||||||
get substring 2 failed (-49): unknown or unset substring
|
get substring 2 failed (-47): unknown or unset substring
|
||||||
3G f (1)
|
3G f (1)
|
||||||
get substring 4 failed (-49): unknown or unset substring
|
get substring 4 failed (-47): unknown or unset substring
|
||||||
0L adef
|
0L adef
|
||||||
1L a
|
1L a
|
||||||
2L
|
2L
|
||||||
|
@ -1055,7 +1055,7 @@ get substring 4 failed (-49): unknown or unset substring
|
||||||
1G bc (2)
|
1G bc (2)
|
||||||
2G bc (2)
|
2G bc (2)
|
||||||
3G f (1)
|
3G f (1)
|
||||||
get substring 4 failed (-49): unknown or unset substring
|
get substring 4 failed (-47): unknown or unset substring
|
||||||
0L bcdef
|
0L bcdef
|
||||||
1L bc
|
1L bc
|
||||||
2L bc
|
2L bc
|
||||||
|
@ -4370,7 +4370,7 @@ Subject length lower bound = 8
|
||||||
0: abcdefgh
|
0: abcdefgh
|
||||||
1: cd
|
1: cd
|
||||||
2: gh
|
2: gh
|
||||||
copy substring 'three' failed (-49): unknown or unset substring
|
copy substring 'three' failed (-47): unknown or unset substring
|
||||||
|
|
||||||
/(?P<Tes>)(?P<Test>)/IB
|
/(?P<Tes>)(?P<Test>)/IB
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
@ -5737,7 +5737,7 @@ No match
|
||||||
0: a1
|
0: a1
|
||||||
1: a1
|
1: a1
|
||||||
2: a1
|
2: a1
|
||||||
copy substring 'Z' failed (-49): unknown or unset substring
|
copy substring 'Z' failed (-47): unknown or unset substring
|
||||||
C a1 (2) A
|
C a1 (2) A
|
||||||
|
|
||||||
/(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
|
/(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
|
||||||
|
@ -5778,7 +5778,7 @@ Subject length lower bound = 2
|
||||||
C a (1) A
|
C a (1) A
|
||||||
cd\=copy=A
|
cd\=copy=A
|
||||||
0: cd
|
0: cd
|
||||||
copy substring 'A' failed (-49): unknown or unset substring
|
copy substring 'A' failed (-47): unknown or unset substring
|
||||||
|
|
||||||
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
|
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
|
||||||
Capturing subpattern count = 4
|
Capturing subpattern count = 4
|
||||||
|
@ -5822,7 +5822,7 @@ No match
|
||||||
0: a1
|
0: a1
|
||||||
1: a1
|
1: a1
|
||||||
2: a1
|
2: a1
|
||||||
get substring 'Z' failed (-49): unknown or unset substring
|
get substring 'Z' failed (-47): unknown or unset substring
|
||||||
G a1 (2) A
|
G a1 (2) A
|
||||||
|
|
||||||
/^(?P<A>a)(?P<A>b)/I,dupnames
|
/^(?P<A>a)(?P<A>b)/I,dupnames
|
||||||
|
@ -5853,7 +5853,7 @@ Subject length lower bound = 2
|
||||||
G a (1) A
|
G a (1) A
|
||||||
cd\=get=A
|
cd\=get=A
|
||||||
0: cd
|
0: cd
|
||||||
get substring 'A' failed (-49): unknown or unset substring
|
get substring 'A' failed (-47): unknown or unset substring
|
||||||
|
|
||||||
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
|
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
|
||||||
Capturing subpattern count = 4
|
Capturing subpattern count = 4
|
||||||
|
@ -10446,7 +10446,7 @@ Partial match: abc
|
||||||
abc\=offset=3
|
abc\=offset=3
|
||||||
No match
|
No match
|
||||||
abc\=offset=4
|
abc\=offset=4
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
abc\=offset=-4
|
abc\=offset=-4
|
||||||
** Invalid value in 'offset=-4'
|
** Invalid value in 'offset=-4'
|
||||||
|
|
||||||
|
@ -11129,15 +11129,15 @@ Matched, but too many substrings
|
||||||
|
|
||||||
/((?2))((?1))/
|
/((?2))((?1))/
|
||||||
abc
|
abc
|
||||||
Failed: error -51: nested recursion at the same subject position
|
Failed: error -49: nested recursion at the same subject position
|
||||||
|
|
||||||
/((?(R2)a+|(?1)b))/
|
/((?(R2)a+|(?1)b))/
|
||||||
aaaabcde
|
aaaabcde
|
||||||
Failed: error -51: nested recursion at the same subject position
|
Failed: error -49: nested recursion at the same subject position
|
||||||
|
|
||||||
/(?(R)a*(?1)|((?R))b)/
|
/(?(R)a*(?1)|((?R))b)/
|
||||||
aaaabcde
|
aaaabcde
|
||||||
Failed: error -51: nested recursion at the same subject position
|
Failed: error -49: nested recursion at the same subject position
|
||||||
|
|
||||||
/(a+|(?R)b)/
|
/(a+|(?R)b)/
|
||||||
Failed: error 140 at offset 7: recursion could loop indefinitely
|
Failed: error 140 at offset 7: recursion could loop indefinitely
|
||||||
|
@ -12129,11 +12129,11 @@ Subject length lower bound = 3
|
||||||
aaaaaaaaaaaaaz
|
aaaaaaaaaaaaaz
|
||||||
No match
|
No match
|
||||||
aaaaaaaaaaaaaz\=match_limit=3000
|
aaaaaaaaaaaaaz\=match_limit=3000
|
||||||
Failed: error -47: match limit exceeded
|
Failed: error -45: match limit exceeded
|
||||||
|
|
||||||
/(a+)*zz/
|
/(a+)*zz/
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=10
|
aaaaaaaaaaaaaz\=recursion_limit=10
|
||||||
Failed: error -52: recursion limit exceeded
|
Failed: error -50: recursion limit exceeded
|
||||||
|
|
||||||
/(*LIMIT_MATCH=3000)(a+)*zz/I
|
/(*LIMIT_MATCH=3000)(a+)*zz/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
@ -12142,9 +12142,9 @@ Starting code units: a z
|
||||||
Last code unit = 'z'
|
Last code unit = 'z'
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
aaaaaaaaaaaaaz
|
aaaaaaaaaaaaaz
|
||||||
Failed: error -47: match limit exceeded
|
Failed: error -45: match limit exceeded
|
||||||
aaaaaaaaaaaaaz\=match_limit=60000
|
aaaaaaaaaaaaaz\=match_limit=60000
|
||||||
Failed: error -47: match limit exceeded
|
Failed: error -45: match limit exceeded
|
||||||
|
|
||||||
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
|
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
@ -12153,7 +12153,7 @@ Starting code units: a z
|
||||||
Last code unit = 'z'
|
Last code unit = 'z'
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
aaaaaaaaaaaaaz
|
aaaaaaaaaaaaaz
|
||||||
Failed: error -47: match limit exceeded
|
Failed: error -45: match limit exceeded
|
||||||
|
|
||||||
/(*LIMIT_MATCH=60000)(a+)*zz/I
|
/(*LIMIT_MATCH=60000)(a+)*zz/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
@ -12164,7 +12164,7 @@ Subject length lower bound = 2
|
||||||
aaaaaaaaaaaaaz
|
aaaaaaaaaaaaaz
|
||||||
No match
|
No match
|
||||||
aaaaaaaaaaaaaz\=match_limit=3000
|
aaaaaaaaaaaaaz\=match_limit=3000
|
||||||
Failed: error -47: match limit exceeded
|
Failed: error -45: match limit exceeded
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=10)(a+)*zz/I
|
/(*LIMIT_RECURSION=10)(a+)*zz/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
@ -12173,9 +12173,9 @@ Starting code units: a z
|
||||||
Last code unit = 'z'
|
Last code unit = 'z'
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
aaaaaaaaaaaaaz
|
aaaaaaaaaaaaaz
|
||||||
Failed: error -52: recursion limit exceeded
|
Failed: error -50: recursion limit exceeded
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=1000
|
aaaaaaaaaaaaaz\=recursion_limit=1000
|
||||||
Failed: error -52: recursion limit exceeded
|
Failed: error -50: recursion limit exceeded
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
|
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
@ -12195,7 +12195,7 @@ Subject length lower bound = 2
|
||||||
aaaaaaaaaaaaaz
|
aaaaaaaaaaaaaz
|
||||||
No match
|
No match
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=10
|
aaaaaaaaaaaaaz\=recursion_limit=10
|
||||||
Failed: error -52: recursion limit exceeded
|
Failed: error -50: recursion limit exceeded
|
||||||
|
|
||||||
# This test causes a segfault with Perl 5.18.0
|
# This test causes a segfault with Perl 5.18.0
|
||||||
|
|
||||||
|
|
|
@ -6132,7 +6132,7 @@ No match
|
||||||
|
|
||||||
/^(?(2)a|(1)(2))+$/
|
/^(?(2)a|(1)(2))+$/
|
||||||
123a
|
123a
|
||||||
Failed: error -40: backreference condition or recursion test not supported for DFA matching
|
Failed: error -39: backreference condition or recursion test not supported for DFA matching
|
||||||
|
|
||||||
/(?<=a|bbbb)c/
|
/(?<=a|bbbb)c/
|
||||||
ac
|
ac
|
||||||
|
@ -7059,7 +7059,7 @@ Partial match: dogs
|
||||||
|
|
||||||
/abc\K123/
|
/abc\K123/
|
||||||
xyzabc123pqr
|
xyzabc123pqr
|
||||||
Failed: error -41: item unsupported for DFA matching
|
Failed: error -40: item unsupported for DFA matching
|
||||||
|
|
||||||
/(?<=abc)123/
|
/(?<=abc)123/
|
||||||
xyzabc123pqr
|
xyzabc123pqr
|
||||||
|
@ -7185,29 +7185,29 @@ No match
|
||||||
|
|
||||||
/^(?!a(*SKIP)b)/
|
/^(?!a(*SKIP)b)/
|
||||||
ac
|
ac
|
||||||
Failed: error -41: item unsupported for DFA matching
|
Failed: error -40: item unsupported for DFA matching
|
||||||
|
|
||||||
/^(?=a(*SKIP)b|ac)/
|
/^(?=a(*SKIP)b|ac)/
|
||||||
** Failers
|
** Failers
|
||||||
No match
|
No match
|
||||||
ac
|
ac
|
||||||
Failed: error -41: item unsupported for DFA matching
|
Failed: error -40: item unsupported for DFA matching
|
||||||
|
|
||||||
/^(?=a(*THEN)b|ac)/
|
/^(?=a(*THEN)b|ac)/
|
||||||
ac
|
ac
|
||||||
Failed: error -41: item unsupported for DFA matching
|
Failed: error -40: item unsupported for DFA matching
|
||||||
|
|
||||||
/^(?=a(*PRUNE)b)/
|
/^(?=a(*PRUNE)b)/
|
||||||
ab
|
ab
|
||||||
Failed: error -41: item unsupported for DFA matching
|
Failed: error -40: item unsupported for DFA matching
|
||||||
** Failers
|
** Failers
|
||||||
No match
|
No match
|
||||||
ac
|
ac
|
||||||
Failed: error -41: item unsupported for DFA matching
|
Failed: error -40: item unsupported for DFA matching
|
||||||
|
|
||||||
/^(?(?!a(*SKIP)b))/
|
/^(?(?!a(*SKIP)b))/
|
||||||
ac
|
ac
|
||||||
Failed: error -41: item unsupported for DFA matching
|
Failed: error -40: item unsupported for DFA matching
|
||||||
|
|
||||||
/(?<=abc)def/
|
/(?<=abc)def/
|
||||||
abc\=ph
|
abc\=ph
|
||||||
|
@ -7277,7 +7277,7 @@ Partial match: abc
|
||||||
abc\=offset=3
|
abc\=offset=3
|
||||||
No match
|
No match
|
||||||
abc\=offset=4
|
abc\=offset=4
|
||||||
Failed: error -34: bad offset value
|
Failed: error -33: bad offset value
|
||||||
abc\=offset=-4
|
abc\=offset=-4
|
||||||
** Invalid value in 'offset=-4'
|
** Invalid value in 'offset=-4'
|
||||||
|
|
||||||
|
@ -7403,7 +7403,7 @@ No match
|
||||||
|
|
||||||
/((?2))((?1))/
|
/((?2))((?1))/
|
||||||
abc
|
abc
|
||||||
Failed: error -51: nested recursion at the same subject position
|
Failed: error -49: nested recursion at the same subject position
|
||||||
|
|
||||||
/(?(R)a+|(?R)b)/
|
/(?(R)a+|(?R)b)/
|
||||||
aaaabcde
|
aaaabcde
|
||||||
|
@ -7419,11 +7419,11 @@ Failed: error -51: nested recursion at the same subject position
|
||||||
|
|
||||||
/((?(R2)a+|(?1)b))/
|
/((?(R2)a+|(?1)b))/
|
||||||
aaaabcde
|
aaaabcde
|
||||||
Failed: error -40: backreference condition or recursion test not supported for DFA matching
|
Failed: error -39: backreference condition or recursion test not supported for DFA matching
|
||||||
|
|
||||||
/(?(R)a*(?1)|((?R))b)/
|
/(?(R)a*(?1)|((?R))b)/
|
||||||
aaaabcde
|
aaaabcde
|
||||||
Failed: error -51: nested recursion at the same subject position
|
Failed: error -49: nested recursion at the same subject position
|
||||||
|
|
||||||
/(a+)/no_auto_possess
|
/(a+)/no_auto_possess
|
||||||
aaaa\=ovector=3
|
aaaa\=ovector=3
|
||||||
|
@ -7572,7 +7572,7 @@ Partial match: \x0d\x0d\x0d
|
||||||
|
|
||||||
/abcdef/
|
/abcdef/
|
||||||
abc\=dfa_restart
|
abc\=dfa_restart
|
||||||
Failed: error -38: invalid data in workspace for DFA restart
|
Failed: error -37: invalid data in workspace for DFA restart
|
||||||
|
|
||||||
/<H((?(?!<H|F>)(.)|(?R))++)*F>/
|
/<H((?(?!<H|F>)(.)|(?R))++)*F>/
|
||||||
text <H more text <H texting more hexA0-"\xA0" hex above 7F-"\xBC" F> text xxxxx <H text F> text F> text2 <H text sample F> more text.
|
text <H more text <H texting more hexA0-"\xA0" hex above 7F-"\xBC" F> text xxxxx <H text F> text F> text2 <H text sample F> more text.
|
||||||
|
|
|
@ -1230,7 +1230,7 @@ Partial match: the cat
|
||||||
|
|
||||||
/ab\Cde/utf
|
/ab\Cde/utf
|
||||||
abXde
|
abXde
|
||||||
Failed: error -41: item unsupported for DFA matching
|
Failed: error -40: item unsupported for DFA matching
|
||||||
|
|
||||||
/(?<=ab\Cde)X/utf
|
/(?<=ab\Cde)X/utf
|
||||||
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
|
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
|
||||||
|
|
Loading…
Reference in New Issue