Implement --never-backslash-C
This commit is contained in:
parent
5923caf05e
commit
3263d44b97
|
@ -70,6 +70,7 @@
|
||||||
# 2015-04-24 PH added support for PCRE2_DEBUG
|
# 2015-04-24 PH added support for PCRE2_DEBUG
|
||||||
# 2015-07-16 PH updated for new pcre2_find_bracket source module
|
# 2015-07-16 PH updated for new pcre2_find_bracket source module
|
||||||
# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III)
|
# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III)
|
||||||
|
# 2015-10=16 PH added support for never-backslash-C
|
||||||
|
|
||||||
PROJECT(PCRE2 C)
|
PROJECT(PCRE2 C)
|
||||||
|
|
||||||
|
@ -162,6 +163,9 @@ SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
|
||||||
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
||||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
||||||
|
|
||||||
|
SET(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL
|
||||||
|
"If ON, backslash-C (upper case C) is locked out.")
|
||||||
|
|
||||||
SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL
|
SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL
|
||||||
"Enable Valgrind support.")
|
"Enable Valgrind support.")
|
||||||
|
|
||||||
|
@ -252,6 +256,10 @@ IF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||||
SET(BSR_ANYCRLF 1)
|
SET(BSR_ANYCRLF 1)
|
||||||
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||||
|
|
||||||
|
IF(PCRE2_NEVER_BACKSLASH_C)
|
||||||
|
SET(NEVER_BACKSLASH_C 1)
|
||||||
|
ENDIF(PCRE2_NEVER_BACKSLASH_C)
|
||||||
|
|
||||||
IF(PCRE2_SUPPORT_UNICODE)
|
IF(PCRE2_SUPPORT_UNICODE)
|
||||||
SET(SUPPORT_UNICODE 1)
|
SET(SUPPORT_UNICODE 1)
|
||||||
ENDIF(PCRE2_SUPPORT_UNICODE)
|
ENDIF(PCRE2_SUPPORT_UNICODE)
|
||||||
|
@ -719,6 +727,7 @@ IF(PCRE2_SHOW_REPORT)
|
||||||
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
|
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
|
||||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
|
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
|
||||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||||
|
MESSAGE(STATUS " \\C is disabled .................. : ${PCRE2_NEVER_BACKSLASH_C}")
|
||||||
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
|
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
|
||||||
MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}")
|
MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}")
|
||||||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
||||||
|
|
|
@ -201,6 +201,8 @@ escape was being ignored.
|
||||||
57. Fixed integer overflow for patterns whose minimum matching length is very,
|
57. Fixed integer overflow for patterns whose minimum matching length is very,
|
||||||
very large.
|
very large.
|
||||||
|
|
||||||
|
58. Implemented --never-backslash-C.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
9
README
9
README
|
@ -220,6 +220,13 @@ library. They are also documented in the pcre2build man page.
|
||||||
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
||||||
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||||
|
|
||||||
|
. In a pattern, the escape sequence \C matches a single code unit, even in a
|
||||||
|
UTF mode. This can be dangerous because it breaks up multi-code-unit
|
||||||
|
characters. You can build PCRE2 with the use of \C permanently locked out by
|
||||||
|
adding --enable-never-backslash-C (note the upper case C) to the "configure"
|
||||||
|
command. When \C is allowed by the library, individual applications can lock
|
||||||
|
it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option.
|
||||||
|
|
||||||
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
||||||
pattern. This limits the amount of system stack that a pattern uses when it
|
pattern. This limits the amount of system stack that a pattern uses when it
|
||||||
is compiled. The default is 250, but you can change it by setting, for
|
is compiled. The default is 250, but you can change it by setting, for
|
||||||
|
@ -833,4 +840,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 16 July 2015
|
Last updated: 16 October 2015
|
||||||
|
|
64
RunTest
64
RunTest
|
@ -75,7 +75,10 @@ title17="Test 17: JIT-specific features when JIT is available"
|
||||||
title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
|
title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
|
||||||
title19="Test 19: Tests of the POSIX interface with UTF/UCP"
|
title19="Test 19: Tests of the POSIX interface with UTF/UCP"
|
||||||
title20="Test 20: Serialization tests"
|
title20="Test 20: Serialization tests"
|
||||||
maxtest=20
|
title21="Test 21: \C tests without UTF (supported for DFA matching)"
|
||||||
|
title22="Test 22: \C tests with UTF (not supported for DFA matching)"
|
||||||
|
title23="Test 23: \C disabled test"
|
||||||
|
maxtest=23
|
||||||
|
|
||||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||||
echo $title0
|
echo $title0
|
||||||
|
@ -99,6 +102,9 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||||
echo $title18
|
echo $title18
|
||||||
echo $title19
|
echo $title19
|
||||||
echo $title20
|
echo $title20
|
||||||
|
echo $title21
|
||||||
|
echo $title22
|
||||||
|
echo $title23
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -223,6 +229,9 @@ do17=no
|
||||||
do18=no
|
do18=no
|
||||||
do19=no
|
do19=no
|
||||||
do20=no
|
do20=no
|
||||||
|
do21=no
|
||||||
|
do22=no
|
||||||
|
do23=no
|
||||||
|
|
||||||
while [ $# -gt 0 ] ; do
|
while [ $# -gt 0 ] ; do
|
||||||
case $1 in
|
case $1 in
|
||||||
|
@ -247,6 +256,9 @@ while [ $# -gt 0 ] ; do
|
||||||
18) do18=yes;;
|
18) do18=yes;;
|
||||||
19) do19=yes;;
|
19) do19=yes;;
|
||||||
20) do20=yes;;
|
20) do20=yes;;
|
||||||
|
21) do21=yes;;
|
||||||
|
22) do22=yes;;
|
||||||
|
23) do23=yes;;
|
||||||
-8) arg8=yes;;
|
-8) arg8=yes;;
|
||||||
-16) arg16=yes;;
|
-16) arg16=yes;;
|
||||||
-32) arg32=yes;;
|
-32) arg32=yes;;
|
||||||
|
@ -326,6 +338,11 @@ support16=$?
|
||||||
$sim ./pcre2test -C pcre2-32 >/dev/null
|
$sim ./pcre2test -C pcre2-32 >/dev/null
|
||||||
support32=$?
|
support32=$?
|
||||||
|
|
||||||
|
# \C may be disabled
|
||||||
|
|
||||||
|
$sim ./pcre2test -C backslash-C >/dev/null
|
||||||
|
supportBSC=$?
|
||||||
|
|
||||||
# Initialize all bitsizes skipped
|
# Initialize all bitsizes skipped
|
||||||
|
|
||||||
test8=skip
|
test8=skip
|
||||||
|
@ -400,7 +417,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
||||||
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
|
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
|
||||||
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
|
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
|
||||||
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
|
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
|
||||||
$do20 = no \
|
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no \
|
||||||
]; then
|
]; then
|
||||||
do0=yes
|
do0=yes
|
||||||
do1=yes
|
do1=yes
|
||||||
|
@ -423,6 +440,9 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
||||||
do18=yes
|
do18=yes
|
||||||
do19=yes
|
do19=yes
|
||||||
do20=yes
|
do20=yes
|
||||||
|
do21=yes
|
||||||
|
do22=yes
|
||||||
|
do23=yes
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||||
|
@ -781,6 +801,46 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||||
checkresult $? 20 ""
|
checkresult $? 20 ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# \C tests without UTF - DFA matching is supported
|
||||||
|
|
||||||
|
if [ "$do21" = yes ] ; then
|
||||||
|
echo $title21
|
||||||
|
if [ $supportBSC -eq 0 ] ; then
|
||||||
|
echo " Skipped because \C is disabled"
|
||||||
|
else
|
||||||
|
for opt in "" $jitopt -dfa; do
|
||||||
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput21 testtry
|
||||||
|
checkresult $? 21 "$opt"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# \C tests with UTF - DFA matching is not supported for \C in UTF mode
|
||||||
|
|
||||||
|
if [ "$do22" = yes ] ; then
|
||||||
|
echo $title22
|
||||||
|
if [ $supportBSC -eq 0 ] ; then
|
||||||
|
echo " Skipped because \C is disabled"
|
||||||
|
else
|
||||||
|
for opt in "" $jitopt; do
|
||||||
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput22 testtry
|
||||||
|
checkresult $? 22-$bits "$opt"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test when \C is disabled
|
||||||
|
|
||||||
|
if [ "$do23" = yes ] ; then
|
||||||
|
echo $title23
|
||||||
|
if [ $supportBSC -ne 0 ] ; then
|
||||||
|
echo " Skipped because \C is not disabled"
|
||||||
|
else
|
||||||
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput23 testtry
|
||||||
|
checkresult $? 23 ""
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# End of loop for 8/16/32-bit tests
|
# End of loop for 8/16/32-bit tests
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
70
RunTest.bat
70
RunTest.bat
|
@ -13,11 +13,10 @@
|
||||||
@rem line. Added argument validation and added error reporting.
|
@rem line. Added argument validation and added error reporting.
|
||||||
@rem
|
@rem
|
||||||
@rem Sheri Pierce added logic to skip feature dependent tests
|
@rem Sheri Pierce added logic to skip feature dependent tests
|
||||||
@rem tests 4 5 9 15 and 18 require utf support
|
@rem tests 4 5 7 10 12 14 19 and 22 require Unicode support
|
||||||
@rem tests 6 7 10 16 and 19 require ucp support
|
@rem 8 requires Unicode and link size 2
|
||||||
@rem 11 requires ucp and link size 2
|
@rem 16 requires absence of jit support
|
||||||
@rem 12 requires presence of jit support
|
@rem 17 requires presence of jit support
|
||||||
@rem 13 requires absence of jit support
|
|
||||||
@rem Sheri P also added override tests for study and jit testing
|
@rem Sheri P also added override tests for study and jit testing
|
||||||
@rem Zoltan Herczeg added libpcre16 support
|
@rem Zoltan Herczeg added libpcre16 support
|
||||||
@rem Zoltan Herczeg added libpcre32 support
|
@rem Zoltan Herczeg added libpcre32 support
|
||||||
|
@ -25,6 +24,7 @@
|
||||||
@rem
|
@rem
|
||||||
@rem The file was converted for PCRE2 by PH, February 2015.
|
@rem The file was converted for PCRE2 by PH, February 2015.
|
||||||
@rem Updated for new test 14 (moving others up a number), August 2015.
|
@rem Updated for new test 14 (moving others up a number), August 2015.
|
||||||
|
@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.
|
||||||
|
|
||||||
|
|
||||||
setlocal enabledelayedexpansion
|
setlocal enabledelayedexpansion
|
||||||
|
@ -65,6 +65,8 @@ set support32=%ERRORLEVEL%
|
||||||
set unicode=%ERRORLEVEL%
|
set unicode=%ERRORLEVEL%
|
||||||
%pcre2test% -C jit >NUL
|
%pcre2test% -C jit >NUL
|
||||||
set jit=%ERRORLEVEL%
|
set jit=%ERRORLEVEL%
|
||||||
|
%pcre2test% -C backslash-C >NUL
|
||||||
|
set supportBSC=%ERRORLEVEL%
|
||||||
|
|
||||||
if %support8% EQU 1 (
|
if %support8% EQU 1 (
|
||||||
if not exist testout8 md testout8
|
if not exist testout8 md testout8
|
||||||
|
@ -101,18 +103,21 @@ set do17=no
|
||||||
set do18=no
|
set do18=no
|
||||||
set do19=no
|
set do19=no
|
||||||
set do20=no
|
set do20=no
|
||||||
|
set do21=no
|
||||||
|
set do22=no
|
||||||
|
set do23=no
|
||||||
set all=yes
|
set all=yes
|
||||||
|
|
||||||
for %%a in (%*) do (
|
for %%a in (%*) do (
|
||||||
set valid=no
|
set valid=no
|
||||||
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20) do if %%v == %%a set valid=yes
|
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do if %%v == %%a set valid=yes
|
||||||
if "!valid!" == "yes" (
|
if "!valid!" == "yes" (
|
||||||
set do%%a=yes
|
set do%%a=yes
|
||||||
set all=no
|
set all=no
|
||||||
) else (
|
) else (
|
||||||
echo Invalid test number - %%a!
|
echo Invalid test number - %%a!
|
||||||
echo Usage %0 [ test_number ] ...
|
echo Usage %0 [ test_number ] ...
|
||||||
echo Where test_number is one or more optional test numbers 1 through 20, default is all tests.
|
echo Where test_number is one or more optional test numbers 1 through 23, default is all tests.
|
||||||
exit /b 1
|
exit /b 1
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -139,6 +144,9 @@ if "%all%" == "yes" (
|
||||||
set do18=yes
|
set do18=yes
|
||||||
set do19=yes
|
set do19=yes
|
||||||
set do20=yes
|
set do20=yes
|
||||||
|
set do21=yes
|
||||||
|
set do22=yes
|
||||||
|
set do23=yes
|
||||||
)
|
)
|
||||||
|
|
||||||
@echo RunTest.bat's pcre2test output is written to newly created subfolders
|
@echo RunTest.bat's pcre2test output is written to newly created subfolders
|
||||||
|
@ -187,6 +195,9 @@ if "%do17%" == "yes" call :do17
|
||||||
if "%do18%" == "yes" call :do18
|
if "%do18%" == "yes" call :do18
|
||||||
if "%do19%" == "yes" call :do19
|
if "%do19%" == "yes" call :do19
|
||||||
if "%do20%" == "yes" call :do20
|
if "%do20%" == "yes" call :do20
|
||||||
|
if "%do21%" == "yes" call :do21
|
||||||
|
if "%do22%" == "yes" call :do22
|
||||||
|
if "%do23%" == "yes" call :do23
|
||||||
:modeSkip
|
:modeSkip
|
||||||
if "%mode%" == "" (
|
if "%mode%" == "" (
|
||||||
set mode=-16
|
set mode=-16
|
||||||
|
@ -323,7 +334,7 @@ if %unicode% EQU 0 (
|
||||||
goto :eof
|
goto :eof
|
||||||
|
|
||||||
:do6
|
:do6
|
||||||
call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q -dfa
|
call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q
|
||||||
goto :eof
|
goto :eof
|
||||||
|
|
||||||
:do7
|
:do7
|
||||||
|
@ -331,7 +342,7 @@ if %unicode% EQU 0 (
|
||||||
echo Test 7 Skipped due to absence of Unicode support.
|
echo Test 7 Skipped due to absence of Unicode support.
|
||||||
goto :eof
|
goto :eof
|
||||||
)
|
)
|
||||||
call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q -dfa
|
call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q
|
||||||
goto :eof
|
goto :eof
|
||||||
|
|
||||||
:do8
|
:do8
|
||||||
|
@ -395,10 +406,14 @@ if %bits% EQU 8 (
|
||||||
echo Test 13 Skipped when running 8-bit tests.
|
echo Test 13 Skipped when running 8-bit tests.
|
||||||
goto :eof
|
goto :eof
|
||||||
)
|
)
|
||||||
call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q -dfa
|
call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q
|
||||||
goto :eof
|
goto :eof
|
||||||
|
|
||||||
:do14
|
:do14
|
||||||
|
if %unicode% EQU 0 (
|
||||||
|
echo Test 14 Skipped due to absence of Unicode support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
call :runsub 14 testout "DFA specials for UTF and UCP support" -q
|
call :runsub 14 testout "DFA specials for UTF and UCP support" -q
|
||||||
goto :eof
|
goto :eof
|
||||||
|
|
||||||
|
@ -442,6 +457,10 @@ if %bits% EQU 16 (
|
||||||
if %bits% EQU 32 (
|
if %bits% EQU 32 (
|
||||||
echo Test 19 Skipped when running 32-bit tests.
|
echo Test 19 Skipped when running 32-bit tests.
|
||||||
goto :eof
|
goto :eof
|
||||||
|
)
|
||||||
|
if %unicode% EQU 0 (
|
||||||
|
echo Test 19 Skipped due to absence of Unicode support.
|
||||||
|
goto :eof
|
||||||
)
|
)
|
||||||
call :runsub 19 testout "POSIX interface with UTF-8 and UCP" -q
|
call :runsub 19 testout "POSIX interface with UTF-8 and UCP" -q
|
||||||
goto :eof
|
goto :eof
|
||||||
|
@ -450,6 +469,37 @@ goto :eof
|
||||||
call :runsub 20 testout "Serialization tests" -q
|
call :runsub 20 testout "Serialization tests" -q
|
||||||
goto :eof
|
goto :eof
|
||||||
|
|
||||||
|
:do21
|
||||||
|
if %supportBSC% EQU 0 (
|
||||||
|
echo Test 21 Skipped due to absence of backslash-C support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 21 testout "Backslash-C tests without UTF" -q
|
||||||
|
call :runsub 21 testout "Backslash-C tests without UTF (DFA)" -q -dfa
|
||||||
|
if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -jit
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do22
|
||||||
|
if %supportBSC% EQU 0 (
|
||||||
|
echo Test 22 Skipped due to absence of backslash-C support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
if %unicode% EQU 0 (
|
||||||
|
echo Test 22 Skipped due to absence of Unicode support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 22 testout "Backslash-C tests with UTF" -q
|
||||||
|
if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -jit
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:do23
|
||||||
|
if %supportBSC% EQU 1 (
|
||||||
|
echo Test 23 Skipped due to presence of backslash-C support.
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
call :runsub 23 testout "Backslash-C disabled test" -q
|
||||||
|
goto :eof
|
||||||
|
|
||||||
:conferror
|
:conferror
|
||||||
@echo.
|
@echo.
|
||||||
@echo Either your build is incomplete or you have a configuration error.
|
@echo Either your build is incomplete or you have a configuration error.
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#cmakedefine EBCDIC 1
|
#cmakedefine EBCDIC 1
|
||||||
#cmakedefine EBCDIC_NL25 1
|
#cmakedefine EBCDIC_NL25 1
|
||||||
#cmakedefine HEAP_MATCH_RECURSE 1
|
#cmakedefine HEAP_MATCH_RECURSE 1
|
||||||
|
#cmakedefine NEVER_BACKSLASH_C 1
|
||||||
|
|
||||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||||
|
|
12
configure.ac
12
configure.ac
|
@ -190,6 +190,12 @@ AC_ARG_ENABLE(bsr-anycrlf,
|
||||||
[\R matches only CR, LF, CRLF by default]),
|
[\R matches only CR, LF, CRLF by default]),
|
||||||
, enable_bsr_anycrlf=no)
|
, enable_bsr_anycrlf=no)
|
||||||
|
|
||||||
|
# Handle --enable-never-backslash-C
|
||||||
|
AC_ARG_ENABLE(never-backslash-C,
|
||||||
|
AS_HELP_STRING([--enable-never-backslash-C],
|
||||||
|
[use of \C causes an error]),
|
||||||
|
, enable_never_backslash_C=no)
|
||||||
|
|
||||||
# Handle --enable-ebcdic
|
# Handle --enable-ebcdic
|
||||||
AC_ARG_ENABLE(ebcdic,
|
AC_ARG_ENABLE(ebcdic,
|
||||||
AS_HELP_STRING([--enable-ebcdic],
|
AS_HELP_STRING([--enable-ebcdic],
|
||||||
|
@ -604,6 +610,11 @@ if test "$enable_bsr_anycrlf" = "yes"; then
|
||||||
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if test "$enable_never_backslash_C" = "yes"; then
|
||||||
|
AC_DEFINE([NEVER_BACKSLASH_C], [], [
|
||||||
|
Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.])
|
||||||
|
fi
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||||
The value of LINK_SIZE determines the number of bytes used to store
|
The value of LINK_SIZE determines the number of bytes used to store
|
||||||
links as offsets within the compiled regex. The default is 2, which
|
links as offsets within the compiled regex. The default is 2, which
|
||||||
|
@ -881,6 +892,7 @@ $PACKAGE-$VERSION configuration summary:
|
||||||
Enable Unicode support .......... : ${enable_unicode}
|
Enable Unicode support .......... : ${enable_unicode}
|
||||||
Newline char/sequence ........... : ${enable_newline}
|
Newline char/sequence ........... : ${enable_newline}
|
||||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||||
|
\C is disabled .................. : ${enable_never_backslash_C}
|
||||||
EBCDIC coding ................... : ${enable_ebcdic}
|
EBCDIC coding ................... : ${enable_ebcdic}
|
||||||
EBCDIC code for NL .............. : ${ebcdic_nl_code}
|
EBCDIC code for NL .............. : ${ebcdic_nl_code}
|
||||||
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
||||||
|
|
|
@ -220,6 +220,13 @@ library. They are also documented in the pcre2build man page.
|
||||||
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
||||||
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||||
|
|
||||||
|
. In a pattern, the escape sequence \C matches a single code unit, even in a
|
||||||
|
UTF mode. This can be dangerous because it breaks up multi-code-unit
|
||||||
|
characters. You can build PCRE2 with the use of \C permanently locked out by
|
||||||
|
adding --enable-never-backslash-C (note the upper case C) to the "configure"
|
||||||
|
command. When \C is allowed by the library, individual applications can lock
|
||||||
|
it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option.
|
||||||
|
|
||||||
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
||||||
pattern. This limits the amount of system stack that a pattern uses when it
|
pattern. This limits the amount of system stack that a pattern uses when it
|
||||||
is compiled. The default is 250, but you can change it by setting, for
|
is compiled. The default is 250, but you can change it by setting, for
|
||||||
|
@ -833,4 +840,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 16 July 2015
|
Last updated: 16 October 2015
|
||||||
|
|
|
@ -126,8 +126,10 @@ running redundant checks.
|
||||||
<P>
|
<P>
|
||||||
The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to
|
The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to
|
||||||
problems, because it may leave the current matching point in the middle of a
|
problems, because it may leave the current matching point in the middle of a
|
||||||
multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used to
|
multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an
|
||||||
lock out the use of \C, causing a compile-time error if it is encountered.
|
application to lock out the use of \C, causing a compile-time error if it is
|
||||||
|
encountered. It is also possible to build PCRE2 with the use of \C permanently
|
||||||
|
disabled.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Another way that performance can be hit is by running a pattern that has a very
|
Another way that performance can be hit is by running a pattern that has a very
|
||||||
|
@ -187,7 +189,7 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 13 April 2015
|
Last updated: 16 October 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -60,19 +60,21 @@ units, not characters, as is the contents of the variable pointed at by
|
||||||
The options are:
|
The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject string is not the end of a line
|
PCRE2_NOTEOL Subject is not the end of a line
|
||||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||||
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
|
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the
|
||||||
is not a valid match
|
subject is not a valid match
|
||||||
PCRE2_NO_UTF_CHECK Do not check the subject or replacement for
|
PCRE2_NO_UTF_CHECK Do not check the subject or replacement
|
||||||
UTF validity (only relevant if PCRE2_UTF
|
for UTF validity (only relevant if
|
||||||
was set at compile time)
|
PCRE2_UTF was set at compile time)
|
||||||
|
PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing
|
||||||
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
||||||
</pre>
|
</pre>
|
||||||
The function returns the number of substitutions, which may be zero if there
|
The function returns the number of substitutions, which may be zero if there
|
||||||
were no matches. The result can be greater than one only when
|
were no matches. The result can be greater than one only when
|
||||||
PCRE2_SUBSTITUTE_GLOBAL is set.
|
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
|
||||||
|
is returned.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -1237,7 +1237,8 @@ This option locks out the use of \C in the pattern that is being compiled.
|
||||||
This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because
|
This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because
|
||||||
it may leave the current matching point in the middle of a multi-code-unit
|
it may leave the current matching point in the middle of a multi-code-unit
|
||||||
character. This option may be useful in applications that process patterns from
|
character. This option may be useful in applications that process patterns from
|
||||||
external sources.
|
external sources. Note that there is also a build-time option that permanently
|
||||||
|
locks out the use of \C.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_NEVER_UCP
|
PCRE2_NEVER_UCP
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2613,43 +2614,17 @@ same number causes an error at compile time.
|
||||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR \fIreplacementzfP,</b>
|
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacement</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *\fIoutputbuffer\zfP,</b>
|
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *\fIoutputbuffer\zfP,</b>
|
||||||
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
|
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
|
||||||
string in <i>outputbuffer</i>, replacing the part that was matched with the
|
string in <i>outputbuffer</i>, replacing the part that was matched with the
|
||||||
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
|
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
|
||||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
|
||||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
|
||||||
dollar character is an escape character that can specify the insertion of
|
|
||||||
characters from capturing groups or (*MARK) items in the pattern. The following
|
|
||||||
forms are recognized:
|
|
||||||
<pre>
|
|
||||||
$$ insert a dollar character
|
|
||||||
$<n> or ${<n>} insert the contents of group <n>
|
|
||||||
$*MARK or ${*MARK} insert the name of the last (*MARK) encountered
|
|
||||||
</pre>
|
|
||||||
Either a group number or a group name can be given for <n>. Curly brackets are
|
|
||||||
required only if the following character would be interpreted as part of the
|
|
||||||
number or name. The number may be zero to include the entire matched string.
|
|
||||||
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
|
||||||
string "+$1$0$1+", the result is "=+babcb+=". Group insertion is done by
|
|
||||||
calling <b>pcre2_copy_byname()</b> or <b>pcre2_copy_bynumber()</b> as
|
|
||||||
appropriate.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The facility for inserting a (*MARK) name can be used to perform simple
|
|
||||||
simultaneous substitutions, as this <b>pcre2test</b> example shows:
|
|
||||||
<pre>
|
|
||||||
/(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
|
|
||||||
apple lemon
|
|
||||||
2: pear orange
|
|
||||||
</PRE>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
||||||
<b>pcre2_match()</b>, except that the partial matching options are not
|
<b>pcre2_match()</b>, except that the partial matching options are not
|
||||||
permitted, and <i>match_data</i> may be passed as NULL, in which case a match
|
permitted, and <i>match_data</i> may be passed as NULL, in which case a match
|
||||||
|
@ -2658,25 +2633,112 @@ functions from the match context, if provided, or else those that were used to
|
||||||
allocate memory for the compiled code.
|
allocate memory for the compiled code.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the
|
The <i>outlengthptr</i> argument must point to a variable that contains the
|
||||||
|
length, in code units, of the output buffer. If the function is successful,
|
||||||
|
the value is updated to contain the length of the new string, excluding the
|
||||||
|
trailing zero that is automatically added. If the function is not successful,
|
||||||
|
the value is set to PCRE2_UNSET for general errors (such as output buffer too
|
||||||
|
small). For syntax errors in the replacement string, the value is set to the
|
||||||
|
offset in the replacement string where the error was detected.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||||
|
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||||
|
dollar character is an escape character that can specify the insertion of
|
||||||
|
characters from capturing groups or (*MARK) items in the pattern. The following
|
||||||
|
forms are always recognized:
|
||||||
|
<pre>
|
||||||
|
$$ insert a dollar character
|
||||||
|
$<n> or ${<n>} insert the contents of group <n>
|
||||||
|
$*MARK or ${*MARK} insert the name of the last (*MARK) encountered
|
||||||
|
</pre>
|
||||||
|
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||||
|
required only if the following character would be interpreted as part of the
|
||||||
|
number or name. The number may be zero to include the entire matched string.
|
||||||
|
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
||||||
|
string "+$1$0$1+", the result is "=+babcb+=".
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The facility for inserting a (*MARK) name can be used to perform simple
|
||||||
|
simultaneous substitutions, as this <b>pcre2test</b> example shows:
|
||||||
|
<pre>
|
||||||
|
/(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
|
||||||
|
apple lemon
|
||||||
|
2: pear orange
|
||||||
|
</pre>
|
||||||
|
There is an additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the
|
||||||
function to iterate over the subject string, replacing every matching
|
function to iterate over the subject string, replacing every matching
|
||||||
substring. If this is not set, only the first matching substring is replaced.
|
substring. If this is not set, only the first matching substring is replaced.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>outlengthptr</i> argument must point to a variable that contains the
|
A second additional option, PCRE2_SUBSTITUTE_EXTENDED, causes extra processing
|
||||||
length, in code units, of the output buffer. It is updated to contain the
|
to be applied to the replacement string. Without this option, only the dollar
|
||||||
length of the new string, excluding the trailing zero that is automatically
|
character is special, and only the group insertion forms listed above are
|
||||||
added.
|
valid. When PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The function returns the number of replacements that were made. This may be
|
Firstly, backslash in a replacement string is interpreted as an escape
|
||||||
zero if no matches were found, and is never greater than 1 unless
|
character. The usual forms such as \n or \x{ddd} can be used to specify
|
||||||
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
|
particular character codes, and backslash followed by any non-alphanumeric
|
||||||
is returned. Except for PCRE2_ERROR_NOMATCH (which is never returned), any
|
character quotes that character. Extended quoting can be coded using \Q...\E,
|
||||||
errors from <b>pcre2_match()</b> or the substring copying functions are passed
|
exactly as in pattern strings.
|
||||||
straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid
|
</P>
|
||||||
replacement string (unrecognized sequence following a dollar sign), and
|
<P>
|
||||||
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
|
There are also four escape sequences for forcing the case of inserted letters.
|
||||||
|
The insertion mechanism has three states: no case forcing, force upper case,
|
||||||
|
and force lower case. The escape sequences change the current state: \U and
|
||||||
|
\L change to upper or lower case forcing, respectively, and \E (when not
|
||||||
|
terminating a \Q quoted sequence) reverts to no case forcing. The sequences
|
||||||
|
\u and \l force the next character (if it is a letter) to upper or lower
|
||||||
|
case, respectively, and then the state automatically reverts to no case
|
||||||
|
forcing. Case forcing applies to all inserted characters, including those from
|
||||||
|
captured groups and letters within \Q...\E quoted sequences.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note that case forcing sequences such as \U...\E do not nest. For example,
|
||||||
|
the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no
|
||||||
|
effect.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
||||||
|
flexibility to group substitution. The syntax is similar to that used by Bash:
|
||||||
|
<pre>
|
||||||
|
${<n>:-<string>}
|
||||||
|
${<n>:+<string1>:<string2>}
|
||||||
|
</pre>
|
||||||
|
As before, <n> may be a group number or a name. The first form specifies a
|
||||||
|
default value. If group <n> is set, its value is inserted; if not, <string> is
|
||||||
|
expanded and the result inserted. The second form specifies strings that are
|
||||||
|
expanded and inserted when group <n> is set or unset, respectively. The first
|
||||||
|
form is just a convenient shorthand for
|
||||||
|
<pre>
|
||||||
|
${<n>:+${<n>}:<string>}
|
||||||
|
</pre>
|
||||||
|
Backslash can be used to escape colons and closing curly brackets in the
|
||||||
|
replacement strings. A change of the case forcing state within a replacement
|
||||||
|
string remains in force afterwards, as shown in this <b>pcre2test</b> example:
|
||||||
|
<pre>
|
||||||
|
/(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
|
||||||
|
body
|
||||||
|
1: hello
|
||||||
|
somebody
|
||||||
|
1: HELLO
|
||||||
|
</pre>
|
||||||
|
If successful, the function returns the number of replacements that were made.
|
||||||
|
This may be zero if no matches were found, and is never greater than 1 unless
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In the event of an error, a negative error code is returned. Except for
|
||||||
|
PCRE2_ERROR_NOMATCH (which is never returned), errors from <b>pcre2_match()</b>
|
||||||
|
are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
|
||||||
|
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
|
||||||
|
errors in the replacement string, with more particular errors being
|
||||||
|
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
|
||||||
|
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
|
||||||
|
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
|
||||||
|
PCRE2 errors, a text message that describes the error can be obtained by
|
||||||
|
calling <b>pcre2_get_error_message()</b>.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC35" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
<br><a name="SEC35" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2908,8 +2970,8 @@ There are in addition the following errors that are specific to
|
||||||
PCRE2_ERROR_DFA_UITEM
|
PCRE2_ERROR_DFA_UITEM
|
||||||
</pre>
|
</pre>
|
||||||
This return is given if <b>pcre2_dfa_match()</b> encounters an item in the
|
This return is given if <b>pcre2_dfa_match()</b> encounters an item in the
|
||||||
pattern that it does not support, for instance, the use of \C or a back
|
pattern that it does not support, for instance, the use of \C in a UTF mode or
|
||||||
reference.
|
a back reference.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_DFA_UCOND
|
PCRE2_ERROR_DFA_UCOND
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2953,7 +3015,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC40" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC40" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 22 September 2015
|
Last updated: 16 October 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -18,23 +18,24 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
<li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||||
<li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
|
<li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
|
||||||
<li><a name="TOC5" href="#SEC5">UNICODE AND UTF SUPPORT</a>
|
<li><a name="TOC5" href="#SEC5">UNICODE AND UTF SUPPORT</a>
|
||||||
<li><a name="TOC6" href="#SEC6">JUST-IN-TIME COMPILER SUPPORT</a>
|
<li><a name="TOC6" href="#SEC6">DISABLING THE USE OF \C</a>
|
||||||
<li><a name="TOC7" href="#SEC7">NEWLINE RECOGNITION</a>
|
<li><a name="TOC7" href="#SEC7">JUST-IN-TIME COMPILER SUPPORT</a>
|
||||||
<li><a name="TOC8" href="#SEC8">WHAT \R MATCHES</a>
|
<li><a name="TOC8" href="#SEC8">NEWLINE RECOGNITION</a>
|
||||||
<li><a name="TOC9" href="#SEC9">HANDLING VERY LARGE PATTERNS</a>
|
<li><a name="TOC9" href="#SEC9">WHAT \R MATCHES</a>
|
||||||
<li><a name="TOC10" href="#SEC10">AVOIDING EXCESSIVE STACK USAGE</a>
|
<li><a name="TOC10" href="#SEC10">HANDLING VERY LARGE PATTERNS</a>
|
||||||
<li><a name="TOC11" href="#SEC11">LIMITING PCRE2 RESOURCE USAGE</a>
|
<li><a name="TOC11" href="#SEC11">AVOIDING EXCESSIVE STACK USAGE</a>
|
||||||
<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
<li><a name="TOC12" href="#SEC12">LIMITING PCRE2 RESOURCE USAGE</a>
|
||||||
<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a>
|
<li><a name="TOC13" href="#SEC13">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
||||||
<li><a name="TOC14" href="#SEC14">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
<li><a name="TOC14" href="#SEC14">USING EBCDIC CODE</a>
|
||||||
<li><a name="TOC15" href="#SEC15">PCRE2GREP BUFFER SIZE</a>
|
<li><a name="TOC15" href="#SEC15">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
||||||
<li><a name="TOC16" href="#SEC16">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a>
|
<li><a name="TOC16" href="#SEC16">PCRE2GREP BUFFER SIZE</a>
|
||||||
<li><a name="TOC17" href="#SEC17">INCLUDING DEBUGGING CODE</a>
|
<li><a name="TOC17" href="#SEC17">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a>
|
||||||
<li><a name="TOC18" href="#SEC18">DEBUGGING WITH VALGRIND SUPPORT</a>
|
<li><a name="TOC18" href="#SEC18">INCLUDING DEBUGGING CODE</a>
|
||||||
<li><a name="TOC19" href="#SEC19">CODE COVERAGE REPORTING</a>
|
<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
|
||||||
<li><a name="TOC20" href="#SEC20">SEE ALSO</a>
|
<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
|
||||||
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
|
<li><a name="TOC21" href="#SEC21">SEE ALSO</a>
|
||||||
<li><a name="TOC22" href="#SEC22">REVISION</a>
|
<li><a name="TOC22" href="#SEC22">AUTHOR</a>
|
||||||
|
<li><a name="TOC23" href="#SEC23">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
|
<br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -148,13 +149,19 @@ properties. The application can request that they do by setting the PCRE2_UCP
|
||||||
option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also
|
option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also
|
||||||
request this by starting with (*UCP).
|
request this by starting with (*UCP).
|
||||||
</P>
|
</P>
|
||||||
|
<br><a name="SEC6" href="#TOC1">DISABLING THE USE OF \C</a><br>
|
||||||
<P>
|
<P>
|
||||||
The \C escape sequence, which matches a single code unit, even in a UTF mode,
|
The \C escape sequence, which matches a single code unit, even in a UTF mode,
|
||||||
can cause unpredictable behaviour because it may leave the current matching
|
can cause unpredictable behaviour because it may leave the current matching
|
||||||
point in the middle of a multi-code-unit character. It can be locked out by
|
point in the middle of a multi-code-unit character. The application can lock it
|
||||||
setting the PCRE2_NEVER_BACKSLASH_C option.
|
out by setting the PCRE2_NEVER_BACKSLASH_C option when calling
|
||||||
|
<b>pcre2_compile()</b>. There is also a build-time option
|
||||||
|
<pre>
|
||||||
|
--enable-never-backslash-C
|
||||||
|
</pre>
|
||||||
|
(note the upper case C) which locks out the use of \C entirely.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
|
<br><a name="SEC7" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
Just-in-time compiler support is included in the build by specifying
|
Just-in-time compiler support is included in the build by specifying
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -171,7 +178,7 @@ pcre2grep automatically makes use of it, unless you add
|
||||||
</pre>
|
</pre>
|
||||||
to the "configure" command.
|
to the "configure" command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">NEWLINE RECOGNITION</a><br>
|
<br><a name="SEC8" href="#TOC1">NEWLINE RECOGNITION</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, PCRE2 interprets the linefeed (LF) character as indicating the end
|
By default, PCRE2 interprets the linefeed (LF) character as indicating the end
|
||||||
of a line. This is the normal newline character on Unix-like systems. You can
|
of a line. This is the normal newline character on Unix-like systems. You can
|
||||||
|
@ -208,7 +215,7 @@ Whatever default line ending convention is selected when PCRE2 is built can be
|
||||||
overridden by applications that use the library. At build time it is
|
overridden by applications that use the library. At build time it is
|
||||||
conventional to use the standard for your operating system.
|
conventional to use the standard for your operating system.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">WHAT \R MATCHES</a><br>
|
<br><a name="SEC9" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
||||||
independently of what has been selected as the line ending sequence. If you
|
independently of what has been selected as the line ending sequence. If you
|
||||||
|
@ -220,7 +227,7 @@ the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
|
||||||
selected when PCRE2 is built can be overridden by applications that use the
|
selected when PCRE2 is built can be overridden by applications that use the
|
||||||
called.
|
called.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
<br><a name="SEC10" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Within a compiled pattern, offset values are used to point from one part to
|
Within a compiled pattern, offset values are used to point from one part to
|
||||||
another (for example, from an opening parenthesis to an alternation
|
another (for example, from an opening parenthesis to an alternation
|
||||||
|
@ -239,7 +246,7 @@ longer offsets slows down the operation of PCRE2 because it has to load
|
||||||
additional data when handling them. For the 32-bit library the value is always
|
additional data when handling them. For the 32-bit library the value is always
|
||||||
4 and cannot be overridden; the value of --with-link-size is ignored.
|
4 and cannot be overridden; the value of --with-link-size is ignored.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
<br><a name="SEC11" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
||||||
<P>
|
<P>
|
||||||
When matching with the <b>pcre2_match()</b> function, PCRE2 implements
|
When matching with the <b>pcre2_match()</b> function, PCRE2 implements
|
||||||
backtracking by making recursive calls to an internal function called
|
backtracking by making recursive calls to an internal function called
|
||||||
|
@ -261,7 +268,7 @@ custom memory management functions can be called instead. PCRE2 runs noticeably
|
||||||
more slowly when built in this way. This option affects only the
|
more slowly when built in this way. This option affects only the
|
||||||
<b>pcre2_match()</b> function; it is not relevant for <b>pcre2_dfa_match()</b>.
|
<b>pcre2_match()</b> function; it is not relevant for <b>pcre2_dfa_match()</b>.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">LIMITING PCRE2 RESOURCE USAGE</a><br>
|
<br><a name="SEC12" href="#TOC1">LIMITING PCRE2 RESOURCE USAGE</a><br>
|
||||||
<P>
|
<P>
|
||||||
Internally, PCRE2 has a function called <b>match()</b>, which it calls
|
Internally, PCRE2 has a function called <b>match()</b>, which it calls
|
||||||
repeatedly (sometimes recursively) when matching a pattern with the
|
repeatedly (sometimes recursively) when matching a pattern with the
|
||||||
|
@ -290,7 +297,7 @@ constraints. However, you can set a lower limit by adding, for example,
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
to the <b>configure</b> command. This value can also be overridden at run time.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
<br><a name="SEC13" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 uses fixed tables for processing characters whose code points are less
|
PCRE2 uses fixed tables for processing characters whose code points are less
|
||||||
than 256. By default, PCRE2 is built with a set of tables that are distributed
|
than 256. By default, PCRE2 is built with a set of tables that are distributed
|
||||||
|
@ -307,7 +314,7 @@ compiling, because <b>dftables</b> is run on the local host. If you need to
|
||||||
create alternative tables when cross compiling, you will have to do so "by
|
create alternative tables when cross compiling, you will have to do so "by
|
||||||
hand".)
|
hand".)
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
|
<br><a name="SEC14" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 assumes by default that it will run in an environment where the character
|
PCRE2 assumes by default that it will run in an environment where the character
|
||||||
code is ASCII or Unicode, which is a superset of ASCII. This is the case for
|
code is ASCII or Unicode, which is a superset of ASCII. This is the case for
|
||||||
|
@ -342,7 +349,7 @@ The options that select newline behaviour, such as --enable-newline-is-cr,
|
||||||
and equivalent run-time options, refer to these character values in an EBCDIC
|
and equivalent run-time options, refer to these character values in an EBCDIC
|
||||||
environment.
|
environment.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
<br><a name="SEC15" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, <b>pcre2grep</b> reads all files as plain text. You can build it so
|
By default, <b>pcre2grep</b> reads all files as plain text. You can build it so
|
||||||
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
||||||
|
@ -355,7 +362,7 @@ to the <b>configure</b> command. These options naturally require that the
|
||||||
relevant libraries are installed on your system. Configuration will fail if
|
relevant libraries are installed on your system. Configuration will fail if
|
||||||
they are not.
|
they are not.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">PCRE2GREP BUFFER SIZE</a><br>
|
<br><a name="SEC16" href="#TOC1">PCRE2GREP BUFFER SIZE</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
|
<b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
|
||||||
scanning, in order to be able to output "before" and "after" lines when it
|
scanning, in order to be able to output "before" and "after" lines when it
|
||||||
|
@ -370,7 +377,7 @@ parameter value by adding, for example,
|
||||||
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override this
|
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override this
|
||||||
value by using --buffer-size on the command line..
|
value by using --buffer-size on the command line..
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC16" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
<br><a name="SEC17" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
If you add one of
|
If you add one of
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -404,7 +411,7 @@ automatically included, you may need to add something like
|
||||||
</pre>
|
</pre>
|
||||||
immediately before the <b>configure</b> command.
|
immediately before the <b>configure</b> command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">INCLUDING DEBUGGING CODE</a><br>
|
<br><a name="SEC18" href="#TOC1">INCLUDING DEBUGGING CODE</a><br>
|
||||||
<P>
|
<P>
|
||||||
If you add
|
If you add
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -413,7 +420,7 @@ If you add
|
||||||
to the <b>configure</b> command, additional debugging code is included in the
|
to the <b>configure</b> command, additional debugging code is included in the
|
||||||
build. This feature is intended for use by the PCRE2 maintainers.
|
build. This feature is intended for use by the PCRE2 maintainers.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
|
<br><a name="SEC19" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
If you add
|
If you add
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -423,7 +430,7 @@ to the <b>configure</b> command, PCRE2 will use valgrind annotations to mark
|
||||||
certain memory regions as unaddressable. This allows it to detect invalid
|
certain memory regions as unaddressable. This allows it to detect invalid
|
||||||
memory accesses, and is mostly useful for debugging PCRE2 itself.
|
memory accesses, and is mostly useful for debugging PCRE2 itself.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC19" href="#TOC1">CODE COVERAGE REPORTING</a><br>
|
<br><a name="SEC20" href="#TOC1">CODE COVERAGE REPORTING</a><br>
|
||||||
<P>
|
<P>
|
||||||
If your C compiler is gcc, you can build a version of PCRE2 that can generate a
|
If your C compiler is gcc, you can build a version of PCRE2 that can generate a
|
||||||
code coverage report for its test suite. To enable this, you must install
|
code coverage report for its test suite. To enable this, you must install
|
||||||
|
@ -480,11 +487,11 @@ This cleans all coverage data including the generated coverage report. For more
|
||||||
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC20" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC21" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2api</b>(3), <b>pcre2-config</b>(3).
|
<b>pcre2api</b>(3), <b>pcre2-config</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC22" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
|
@ -493,9 +500,9 @@ University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC23" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 24 April 2015
|
Last updated: 16 October 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -1236,14 +1236,21 @@ with \C in UTF-8 or UTF-16 mode means that the rest of the string may start
|
||||||
with a malformed UTF character. This has undefined results, because PCRE2
|
with a malformed UTF character. This has undefined results, because PCRE2
|
||||||
assumes that it is matching character by character in a valid UTF string (by
|
assumes that it is matching character by character in a valid UTF string (by
|
||||||
default it checks the subject string's validity at the start of processing
|
default it checks the subject string's validity at the start of processing
|
||||||
unless the PCRE2_NO_UTF_CHECK option is used). An application can lock out the
|
unless the PCRE2_NO_UTF_CHECK option is used).
|
||||||
use of \C by setting the PCRE2_NEVER_BACKSLASH_C option.
|
</P>
|
||||||
|
<P>
|
||||||
|
An application can lock out the use of \C by setting the
|
||||||
|
PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to
|
||||||
|
build PCRE2 with the use of \C permanently disabled.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 does not allow \C to appear in lookbehind assertions
|
PCRE2 does not allow \C to appear in lookbehind assertions
|
||||||
<a href="#lookbehind">(described below)</a>
|
<a href="#lookbehind">(described below)</a>
|
||||||
in a UTF mode, because this would make it impossible to calculate the length of
|
in a UTF mode, because this would make it impossible to calculate the length of
|
||||||
the lookbehind.
|
the lookbehind. Neither the alternative matching function
|
||||||
|
<b>pcre2_dfa_match()</b> not the JIT optimizer support \C in a UTF mode. The
|
||||||
|
former gives a match-time error; the latter fails to optimize and so the match
|
||||||
|
is always run using the interpreter.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In general, the \C escape sequence is best avoided. However, one way of using
|
In general, the \C escape sequence is best avoided. However, one way of using
|
||||||
|
@ -3351,7 +3358,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 01 September 2015
|
Last updated: 16 October 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -12,17 +12,21 @@ This page is part of the PCRE2 HTML documentation. It was generated
|
||||||
automatically from the original man page. If there is any nonsense in it,
|
automatically from the original man page. If there is any nonsense in it,
|
||||||
please consult the man page, in case the conversion went wrong.
|
please consult the man page, in case the conversion went wrong.
|
||||||
<br>
|
<br>
|
||||||
<br><b>
|
<ul>
|
||||||
PCRE2 PERFORMANCE
|
<li><a name="TOC1" href="#SEC1">PCRE2 PERFORMANCE</a>
|
||||||
</b><br>
|
<li><a name="TOC2" href="#SEC2">COMPILED PATTERN MEMORY USAGE</a>
|
||||||
|
<li><a name="TOC3" href="#SEC3">STACK USAGE AT RUN TIME</a>
|
||||||
|
<li><a name="TOC4" href="#SEC4">PROCESSING TIME</a>
|
||||||
|
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
||||||
|
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
||||||
|
</ul>
|
||||||
|
<br><a name="SEC1" href="#TOC1">PCRE2 PERFORMANCE</a><br>
|
||||||
<P>
|
<P>
|
||||||
Two aspects of performance are discussed below: memory usage and processing
|
Two aspects of performance are discussed below: memory usage and processing
|
||||||
time. The way you express your pattern as a regular expression can affect both
|
time. The way you express your pattern as a regular expression can affect both
|
||||||
of them.
|
of them.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><a name="SEC2" href="#TOC1">COMPILED PATTERN MEMORY USAGE</a><br>
|
||||||
COMPILED PATTERN MEMORY USAGE
|
|
||||||
</b><br>
|
|
||||||
<P>
|
<P>
|
||||||
Patterns are compiled by PCRE2 into a reasonably efficient interpretive code,
|
Patterns are compiled by PCRE2 into a reasonably efficient interpretive code,
|
||||||
so that most simple patterns do not use much memory. However, there is one case
|
so that most simple patterns do not use much memory. However, there is one case
|
||||||
|
@ -75,9 +79,7 @@ pattern. Nevertheless, if the atomic grouping is not a problem and the loss of
|
||||||
speed is acceptable, this kind of rewriting will allow you to process patterns
|
speed is acceptable, this kind of rewriting will allow you to process patterns
|
||||||
that PCRE2 cannot otherwise handle.
|
that PCRE2 cannot otherwise handle.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><a name="SEC3" href="#TOC1">STACK USAGE AT RUN TIME</a><br>
|
||||||
STACK USAGE AT RUN TIME
|
|
||||||
</b><br>
|
|
||||||
<P>
|
<P>
|
||||||
When <b>pcre2_match()</b> is used for matching, certain kinds of pattern can
|
When <b>pcre2_match()</b> is used for matching, certain kinds of pattern can
|
||||||
cause it to use large amounts of the process stack. In some environments the
|
cause it to use large amounts of the process stack. In some environments the
|
||||||
|
@ -86,9 +88,7 @@ SIGSEGV. Rewriting your pattern can often help. The
|
||||||
<a href="pcre2stack.html"><b>pcre2stack</b></a>
|
<a href="pcre2stack.html"><b>pcre2stack</b></a>
|
||||||
documentation discusses this issue in detail.
|
documentation discusses this issue in detail.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><a name="SEC4" href="#TOC1">PROCESSING TIME</a><br>
|
||||||
PROCESSING TIME
|
|
||||||
</b><br>
|
|
||||||
<P>
|
<P>
|
||||||
Certain items in regular expression patterns are processed more efficiently
|
Certain items in regular expression patterns are processed more efficiently
|
||||||
than others. It is more efficient to use a character class like [aeiou] than a
|
than others. It is more efficient to use a character class like [aeiou] than a
|
||||||
|
@ -177,9 +177,7 @@ appreciable time with strings longer than about 20 characters.
|
||||||
In many cases, the solution to this kind of performance issue is to use an
|
In many cases, the solution to this kind of performance issue is to use an
|
||||||
atomic group or a possessive quantifier.
|
atomic group or a possessive quantifier.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||||
AUTHOR
|
|
||||||
</b><br>
|
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
|
@ -188,9 +186,7 @@ University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||||
REVISION
|
|
||||||
</b><br>
|
|
||||||
<P>
|
<P>
|
||||||
Last updated: 02 January 2015
|
Last updated: 02 January 2015
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -111,9 +111,10 @@ it matches a literal "u".
|
||||||
\W a "non-word" character
|
\W a "non-word" character
|
||||||
\X a Unicode extended grapheme cluster
|
\X a Unicode extended grapheme cluster
|
||||||
</pre>
|
</pre>
|
||||||
The application can lock out the use of \C by setting the
|
\C is dangerous because it may leave the current matching point in the middle
|
||||||
PCRE2_NEVER_BACKSLASH_C option. It is dangerous because it may leave the
|
of a UTF-8 or UTF-16 character. The application can lock out the use of \C by
|
||||||
current matching point in the middle of a UTF-8 or UTF-16 character.
|
setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2
|
||||||
|
with the use of \C permanently disabled.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode
|
By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode
|
||||||
|
@ -588,7 +589,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 17 July 2015
|
Last updated: 16 October 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -155,6 +155,7 @@ following options output the value and set the exit code as indicated:
|
||||||
The following options output 1 for true or 0 for false, and set the exit code
|
The following options output 1 for true or 0 for false, and set the exit code
|
||||||
to the same value:
|
to the same value:
|
||||||
<pre>
|
<pre>
|
||||||
|
backslash-C \C is supported (not locked out)
|
||||||
ebcdic compiled for an EBCDIC environment
|
ebcdic compiled for an EBCDIC environment
|
||||||
jit just-in-time support is available
|
jit just-in-time support is available
|
||||||
pcre2-16 the 16-bit library was built
|
pcre2-16 the 16-bit library was built
|
||||||
|
@ -510,7 +511,7 @@ Setting compilation options
|
||||||
<P>
|
<P>
|
||||||
The following modifiers set options for <b>pcre2_compile()</b>. The most common
|
The following modifiers set options for <b>pcre2_compile()</b>. The most common
|
||||||
ones have single-letter abbreviations. See
|
ones have single-letter abbreviations. See
|
||||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
for a description of their effects.
|
for a description of their effects.
|
||||||
<pre>
|
<pre>
|
||||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||||
|
@ -537,6 +538,7 @@ for a description of their effects.
|
||||||
no_utf_check set PCRE2_NO_UTF_CHECK
|
no_utf_check set PCRE2_NO_UTF_CHECK
|
||||||
ucp set PCRE2_UCP
|
ucp set PCRE2_UCP
|
||||||
ungreedy set PCRE2_UNGREEDY
|
ungreedy set PCRE2_UNGREEDY
|
||||||
|
use_offset_limit set PCRE2_USE_OFFSET_LIMIT
|
||||||
utf set PCRE2_UTF
|
utf set PCRE2_UTF
|
||||||
</pre>
|
</pre>
|
||||||
As well as turning on the PCRE2_UTF option, the <b>utf</b> modifier causes all
|
As well as turning on the PCRE2_UTF option, the <b>utf</b> modifier causes all
|
||||||
|
@ -564,6 +566,7 @@ about the pattern:
|
||||||
locale=<name> use this locale
|
locale=<name> use this locale
|
||||||
memory show memory used
|
memory show memory used
|
||||||
newline=<type> set newline type
|
newline=<type> set newline type
|
||||||
|
null_context compile with a NULL context
|
||||||
parens_nest_limit=<n> set maximum parentheses depth
|
parens_nest_limit=<n> set maximum parentheses depth
|
||||||
posix use the POSIX API
|
posix use the POSIX API
|
||||||
push push compiled pattern onto the stack
|
push push compiled pattern onto the stack
|
||||||
|
@ -642,6 +645,15 @@ is requested. For each callout, either its number or string is given, followed
|
||||||
by the item that follows it in the pattern.
|
by the item that follows it in the pattern.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
Passing a NULL context
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
Normally, <b>pcre2test</b> passes a context block to <b>pcre2_compile()</b>. If
|
||||||
|
the <b>null_context</b> modifier is set, however, NULL is passed. This is for
|
||||||
|
testing that <b>pcre2_compile()</b> behaves correctly in this case (it uses
|
||||||
|
default values).
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
Specifying a pattern in hex
|
Specifying a pattern in hex
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -920,9 +932,11 @@ pattern.
|
||||||
/g global global matching
|
/g global global matching
|
||||||
jitstack=<n> set size of JIT stack
|
jitstack=<n> set size of JIT stack
|
||||||
mark show mark values
|
mark show mark values
|
||||||
match_limit=>n> set a match limit
|
match_limit=<n> set a match limit
|
||||||
memory show memory usage
|
memory show memory usage
|
||||||
|
null_context match with a NULL context
|
||||||
offset=<n> set starting offset
|
offset=<n> set starting offset
|
||||||
|
offset_limit=<n> set offset limit
|
||||||
ovector=<n> set size of output vector
|
ovector=<n> set size of output vector
|
||||||
recursion_limit=<n> set a recursion limit
|
recursion_limit=<n> set a recursion limit
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
|
@ -1170,6 +1184,16 @@ The <b>offset</b> modifier sets an offset in the subject string at which
|
||||||
matching starts. Its value is a number of code units, not characters.
|
matching starts. Its value is a number of code units, not characters.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
Setting an offset limit
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
The <b>offset_limit</b> modifier sets a limit for unanchored matches. If a match
|
||||||
|
cannot be found starting at or before this offset in the subject, a "no match"
|
||||||
|
return is given. The data value is a number of code units, not characters. When
|
||||||
|
this modifier is used, the <b>use_offset_limit</b> modifier must have been set
|
||||||
|
for the pattern; if not, an error is generated.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
Setting the size of the output vector
|
Setting the size of the output vector
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1201,6 +1225,17 @@ this modifier has no effect, as there is no facility for passing a length.)
|
||||||
When testing <b>pcre2_substitute()</b>, this modifier also has the effect of
|
When testing <b>pcre2_substitute()</b>, this modifier also has the effect of
|
||||||
passing the replacement string as zero-terminated.
|
passing the replacement string as zero-terminated.
|
||||||
</P>
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Passing a NULL context
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
|
||||||
|
<b>pcre2_dfa_match()</b> or <b>pcre2_jit_match()</b>. If the <b>null_context</b>
|
||||||
|
modifier is set, however, NULL is passed. This is for testing that the matching
|
||||||
|
functions behave correctly in this case (they use default values). This
|
||||||
|
modifier cannot be used with the <b>find_limits</b> modifier or when testing the
|
||||||
|
substitution function.
|
||||||
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, <b>pcre2test</b> uses the standard PCRE2 matching function,
|
By default, <b>pcre2test</b> uses the standard PCRE2 matching function,
|
||||||
|
@ -1539,7 +1574,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 14 September 2015
|
Last updated: 17 October 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -71,11 +71,12 @@ The escape sequence \C can be used to match a single code unit, in a UTF mode,
|
||||||
but its use can lead to some strange effects because it breaks up multi-unit
|
but its use can lead to some strange effects because it breaks up multi-unit
|
||||||
characters (see the description of \C in the
|
characters (see the description of \C in the
|
||||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||||
documentation). The use of \C is not supported in the alternative matching
|
documentation). The use of \C is not supported by the alternative matching
|
||||||
function <b>pcre2_dfa_match()</b>, nor is it supported in UTF mode by the JIT
|
function <b>pcre2_dfa_match()</b> when in UTF mode. Its use provokes a
|
||||||
optimization. If JIT optimization is requested for a UTF pattern that contains
|
match-time error. The JIT optimization also does not support \C in UTF mode.
|
||||||
\C, it will not succeed, and so the matching will be carried out by the normal
|
If JIT optimization is requested for a UTF pattern that contains \C, it will
|
||||||
interpretive function.
|
not succeed, and so the matching will be carried out by the normal interpretive
|
||||||
|
function.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
|
The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
|
||||||
|
@ -275,7 +276,7 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 18 August 2015
|
Last updated: 16 October 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
10
doc/pcre2.3
10
doc/pcre2.3
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2 3 "13 April 2015" "PCRE2 10.20"
|
.TH PCRE2 3 "16 October 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH INTRODUCTION
|
.SH INTRODUCTION
|
||||||
|
@ -118,8 +118,10 @@ running redundant checks.
|
||||||
.P
|
.P
|
||||||
The use of the \eC escape sequence in a UTF-8 or UTF-16 pattern can lead to
|
The use of the \eC escape sequence in a UTF-8 or UTF-16 pattern can lead to
|
||||||
problems, because it may leave the current matching point in the middle of a
|
problems, because it may leave the current matching point in the middle of a
|
||||||
multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used to
|
multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an
|
||||||
lock out the use of \eC, causing a compile-time error if it is encountered.
|
application to lock out the use of \eC, causing a compile-time error if it is
|
||||||
|
encountered. It is also possible to build PCRE2 with the use of \eC permanently
|
||||||
|
disabled.
|
||||||
.P
|
.P
|
||||||
Another way that performance can be hit is by running a pattern that has a very
|
Another way that performance can be hit is by running a pattern that has a very
|
||||||
large search tree against a string that will never match. Nested unlimited
|
large search tree against a string that will never match. Nested unlimited
|
||||||
|
@ -187,6 +189,6 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 13 April 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
182
doc/pcre2.txt
182
doc/pcre2.txt
|
@ -104,8 +104,9 @@ SECURITY CONSIDERATIONS
|
||||||
The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead
|
The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead
|
||||||
to problems, because it may leave the current matching point in the
|
to problems, because it may leave the current matching point in the
|
||||||
middle of a multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C
|
middle of a multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C
|
||||||
option can be used to lock out the use of \C, causing a compile-time
|
option can be used by an application to lock out the use of \C, causing
|
||||||
error if it is encountered.
|
a compile-time error if it is encountered. It is also possible to build
|
||||||
|
PCRE2 with the use of \C permanently disabled.
|
||||||
|
|
||||||
Another way that performance can be hit is by running a pattern that
|
Another way that performance can be hit is by running a pattern that
|
||||||
has a very large search tree against a string that will never match.
|
has a very large search tree against a string that will never match.
|
||||||
|
@ -165,7 +166,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 13 April 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -1276,7 +1277,9 @@ COMPILING A PATTERN
|
||||||
piled. This escape can cause unpredictable behaviour in UTF-8 or
|
piled. This escape can cause unpredictable behaviour in UTF-8 or
|
||||||
UTF-16 modes, because it may leave the current matching point in the
|
UTF-16 modes, because it may leave the current matching point in the
|
||||||
middle of a multi-code-unit character. This option may be useful in
|
middle of a multi-code-unit character. This option may be useful in
|
||||||
applications that process patterns from external sources.
|
applications that process patterns from external sources. Note that
|
||||||
|
there is also a build-time option that permanently locks out the use of
|
||||||
|
\C.
|
||||||
|
|
||||||
PCRE2_NEVER_UCP
|
PCRE2_NEVER_UCP
|
||||||
|
|
||||||
|
@ -2571,19 +2574,36 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject,
|
int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject,
|
||||||
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
||||||
uint32_t options, pcre2_match_data *match_data,
|
uint32_t options, pcre2_match_data *match_data,
|
||||||
pcre2_match_context *mcontext, PCRE2_SPTR replacementzfP,
|
pcre2_match_context *mcontext, PCRE2_SPTR replacement,
|
||||||
PCRE2_SIZE rlength, PCRE2_UCHAR *outputbufferP,
|
PCRE2_SIZE rlength, PCRE2_UCHAR *outputbufferP,
|
||||||
PCRE2_SIZE *outlengthptr);
|
PCRE2_SIZE *outlengthptr);
|
||||||
|
|
||||||
This function calls pcre2_match() and then makes a copy of the subject
|
This function calls pcre2_match() and then makes a copy of the subject
|
||||||
string in outputbuffer, replacing the part that was matched with the
|
string in outputbuffer, replacing the part that was matched with the
|
||||||
replacement string, whose length is supplied in rlength. This can be
|
replacement string, whose length is supplied in rlength. This can be
|
||||||
given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
|
|
||||||
|
The first seven arguments of pcre2_substitute() are the same as for
|
||||||
|
pcre2_match(), except that the partial matching options are not permit-
|
||||||
|
ted, and match_data may be passed as NULL, in which case a match data
|
||||||
|
block is obtained and freed within this function, using memory manage-
|
||||||
|
ment functions from the match context, if provided, or else those that
|
||||||
|
were used to allocate memory for the compiled code.
|
||||||
|
|
||||||
|
The outlengthptr argument must point to a variable that contains the
|
||||||
|
length, in code units, of the output buffer. If the function is suc-
|
||||||
|
cessful, the value is updated to contain the length of the new string,
|
||||||
|
excluding the trailing zero that is automatically added. If the func-
|
||||||
|
tion is not successful, the value is set to PCRE2_UNSET for general
|
||||||
|
errors (such as output buffer too small). For syntax errors in the
|
||||||
|
replacement string, the value is set to the offset in the replacement
|
||||||
|
string where the error was detected.
|
||||||
|
|
||||||
In the replacement string, which is interpreted as a UTF string in UTF
|
In the replacement string, which is interpreted as a UTF string in UTF
|
||||||
mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK
|
mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK
|
||||||
option is set, a dollar character is an escape character that can spec-
|
option is set, a dollar character is an escape character that can spec-
|
||||||
ify the insertion of characters from capturing groups or (*MARK) items
|
ify the insertion of characters from capturing groups or (*MARK) items
|
||||||
in the pattern. The following forms are recognized:
|
in the pattern. The following forms are always recognized:
|
||||||
|
|
||||||
$$ insert a dollar character
|
$$ insert a dollar character
|
||||||
$<n> or ${<n>} insert the contents of group <n>
|
$<n> or ${<n>} insert the contents of group <n>
|
||||||
|
@ -2594,8 +2614,7 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
preted as part of the number or name. The number may be zero to include
|
preted as part of the number or name. The number may be zero to include
|
||||||
the entire matched string. For example, if the pattern a(b)c is
|
the entire matched string. For example, if the pattern a(b)c is
|
||||||
matched with "=abc=" and the replacement string "+$1$0$1+", the result
|
matched with "=abc=" and the replacement string "+$1$0$1+", the result
|
||||||
is "=+babcb+=". Group insertion is done by calling pcre2_copy_byname()
|
is "=+babcb+=".
|
||||||
or pcre2_copy_bynumber() as appropriate.
|
|
||||||
|
|
||||||
The facility for inserting a (*MARK) name can be used to perform simple
|
The facility for inserting a (*MARK) name can be used to perform simple
|
||||||
simultaneous substitutions, as this pcre2test example shows:
|
simultaneous substitutions, as this pcre2test example shows:
|
||||||
|
@ -2604,32 +2623,80 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
apple lemon
|
apple lemon
|
||||||
2: pear orange
|
2: pear orange
|
||||||
|
|
||||||
The first seven arguments of pcre2_substitute() are the same as for
|
There is an additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes
|
||||||
pcre2_match(), except that the partial matching options are not permit-
|
|
||||||
ted, and match_data may be passed as NULL, in which case a match data
|
|
||||||
block is obtained and freed within this function, using memory manage-
|
|
||||||
ment functions from the match context, if provided, or else those that
|
|
||||||
were used to allocate memory for the compiled code.
|
|
||||||
|
|
||||||
There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes
|
|
||||||
the function to iterate over the subject string, replacing every match-
|
the function to iterate over the subject string, replacing every match-
|
||||||
ing substring. If this is not set, only the first matching substring is
|
ing substring. If this is not set, only the first matching substring is
|
||||||
replaced.
|
replaced.
|
||||||
|
|
||||||
The outlengthptr argument must point to a variable that contains the
|
A second additional option, PCRE2_SUBSTITUTE_EXTENDED, causes extra
|
||||||
length, in code units, of the output buffer. It is updated to contain
|
processing to be applied to the replacement string. Without this
|
||||||
the length of the new string, excluding the trailing zero that is auto-
|
option, only the dollar character is special, and only the group inser-
|
||||||
matically added.
|
tion forms listed above are valid. When PCRE2_SUBSTITUTE_EXTENDED is
|
||||||
|
set, two things change:
|
||||||
|
|
||||||
The function returns the number of replacements that were made. This
|
Firstly, backslash in a replacement string is interpreted as an escape
|
||||||
may be zero if no matches were found, and is never greater than 1
|
character. The usual forms such as \n or \x{ddd} can be used to specify
|
||||||
unless PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a neg-
|
particular character codes, and backslash followed by any non-alphanu-
|
||||||
ative error code is returned. Except for PCRE2_ERROR_NOMATCH (which is
|
meric character quotes that character. Extended quoting can be coded
|
||||||
never returned), any errors from pcre2_match() or the substring copying
|
using \Q...\E, exactly as in pattern strings.
|
||||||
functions are passed straight back. PCRE2_ERROR_BADREPLACEMENT is
|
|
||||||
returned for an invalid replacement string (unrecognized sequence fol-
|
There are also four escape sequences for forcing the case of inserted
|
||||||
lowing a dollar sign), and PCRE2_ERROR_NOMEMORY is returned if the out-
|
letters. The insertion mechanism has three states: no case forcing,
|
||||||
put buffer is not big enough.
|
force upper case, and force lower case. The escape sequences change the
|
||||||
|
current state: \U and \L change to upper or lower case forcing, respec-
|
||||||
|
tively, and \E (when not terminating a \Q quoted sequence) reverts to
|
||||||
|
no case forcing. The sequences \u and \l force the next character (if
|
||||||
|
it is a letter) to upper or lower case, respectively, and then the
|
||||||
|
state automatically reverts to no case forcing. Case forcing applies to
|
||||||
|
all inserted characters, including those from captured groups and let-
|
||||||
|
ters within \Q...\E quoted sequences.
|
||||||
|
|
||||||
|
Note that case forcing sequences such as \U...\E do not nest. For exam-
|
||||||
|
ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
|
||||||
|
\E has no effect.
|
||||||
|
|
||||||
|
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
||||||
|
flexibility to group substitution. The syntax is similar to that used
|
||||||
|
by Bash:
|
||||||
|
|
||||||
|
${<n>:-<string>}
|
||||||
|
${<n>:+<string1>:<string2>}
|
||||||
|
|
||||||
|
As before, <n> may be a group number or a name. The first form speci-
|
||||||
|
fies a default value. If group <n> is set, its value is inserted; if
|
||||||
|
not, <string> is expanded and the result inserted. The second form
|
||||||
|
specifies strings that are expanded and inserted when group <n> is set
|
||||||
|
or unset, respectively. The first form is just a convenient shorthand
|
||||||
|
for
|
||||||
|
|
||||||
|
${<n>:+${<n>}:<string>}
|
||||||
|
|
||||||
|
Backslash can be used to escape colons and closing curly brackets in
|
||||||
|
the replacement strings. A change of the case forcing state within a
|
||||||
|
replacement string remains in force afterwards, as shown in this
|
||||||
|
pcre2test example:
|
||||||
|
|
||||||
|
/(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
|
||||||
|
body
|
||||||
|
1: hello
|
||||||
|
somebody
|
||||||
|
1: HELLO
|
||||||
|
|
||||||
|
If successful, the function returns the number of replacements that
|
||||||
|
were made. This may be zero if no matches were found, and is never
|
||||||
|
greater than 1 unless PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||||
|
|
||||||
|
In the event of an error, a negative error code is returned. Except for
|
||||||
|
PCRE2_ERROR_NOMATCH (which is never returned), errors from
|
||||||
|
pcre2_match() are passed straight back. PCRE2_ERROR_NOMEMORY is
|
||||||
|
returned if the output buffer is not big enough.
|
||||||
|
PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in
|
||||||
|
the replacement string, with more particular errors being
|
||||||
|
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REP-
|
||||||
|
MISSING_BRACE (closing curly bracket not found), and PCRE2_BADSUBSTITU-
|
||||||
|
TION (syntax error in extended group substitution). As for all PCRE2
|
||||||
|
errors, a text message that describes the error can be obtained by
|
||||||
|
calling pcre2_get_error_message().
|
||||||
|
|
||||||
|
|
||||||
DUPLICATE SUBPATTERN NAMES
|
DUPLICATE SUBPATTERN NAMES
|
||||||
|
@ -2845,8 +2912,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
PCRE2_ERROR_DFA_UITEM
|
PCRE2_ERROR_DFA_UITEM
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() encounters an item in the
|
This return is given if pcre2_dfa_match() encounters an item in the
|
||||||
pattern that it does not support, for instance, the use of \C or a back
|
pattern that it does not support, for instance, the use of \C in a UTF
|
||||||
reference.
|
mode or a back reference.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_UCOND
|
PCRE2_ERROR_DFA_UCOND
|
||||||
|
|
||||||
|
@ -2890,7 +2957,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 22 September 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -3010,10 +3077,18 @@ UNICODE AND UTF SUPPORT
|
||||||
PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a
|
PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a
|
||||||
pattern may also request this by starting with (*UCP).
|
pattern may also request this by starting with (*UCP).
|
||||||
|
|
||||||
|
|
||||||
|
DISABLING THE USE OF \C
|
||||||
|
|
||||||
The \C escape sequence, which matches a single code unit, even in a UTF
|
The \C escape sequence, which matches a single code unit, even in a UTF
|
||||||
mode, can cause unpredictable behaviour because it may leave the cur-
|
mode, can cause unpredictable behaviour because it may leave the cur-
|
||||||
rent matching point in the middle of a multi-code-unit character. It
|
rent matching point in the middle of a multi-code-unit character. The
|
||||||
can be locked out by setting the PCRE2_NEVER_BACKSLASH_C option.
|
application can lock it out by setting the PCRE2_NEVER_BACKSLASH_C
|
||||||
|
option when calling pcre2_compile(). There is also a build-time option
|
||||||
|
|
||||||
|
--enable-never-backslash-C
|
||||||
|
|
||||||
|
(note the upper case C) which locks out the use of \C entirely.
|
||||||
|
|
||||||
|
|
||||||
JUST-IN-TIME COMPILER SUPPORT
|
JUST-IN-TIME COMPILER SUPPORT
|
||||||
|
@ -3366,7 +3441,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 24 April 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -6028,12 +6103,18 @@ MATCHING A SINGLE CODE UNIT
|
||||||
results, because PCRE2 assumes that it is matching character by charac-
|
results, because PCRE2 assumes that it is matching character by charac-
|
||||||
ter in a valid UTF string (by default it checks the subject string's
|
ter in a valid UTF string (by default it checks the subject string's
|
||||||
validity at the start of processing unless the PCRE2_NO_UTF_CHECK
|
validity at the start of processing unless the PCRE2_NO_UTF_CHECK
|
||||||
option is used). An application can lock out the use of \C by setting
|
option is used).
|
||||||
the PCRE2_NEVER_BACKSLASH_C option.
|
|
||||||
|
An application can lock out the use of \C by setting the
|
||||||
|
PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also
|
||||||
|
possible to build PCRE2 with the use of \C permanently disabled.
|
||||||
|
|
||||||
PCRE2 does not allow \C to appear in lookbehind assertions (described
|
PCRE2 does not allow \C to appear in lookbehind assertions (described
|
||||||
below) in a UTF mode, because this would make it impossible to calcu-
|
below) in a UTF mode, because this would make it impossible to calcu-
|
||||||
late the length of the lookbehind.
|
late the length of the lookbehind. Neither the alternative matching
|
||||||
|
function pcre2_dfa_match() not the JIT optimizer support \C in a UTF
|
||||||
|
mode. The former gives a match-time error; the latter fails to optimize
|
||||||
|
and so the match is always run using the interpreter.
|
||||||
|
|
||||||
In general, the \C escape sequence is best avoided. However, one way of
|
In general, the \C escape sequence is best avoided. However, one way of
|
||||||
using it that avoids the problem of malformed UTF characters is to use
|
using it that avoids the problem of malformed UTF characters is to use
|
||||||
|
@ -8036,7 +8117,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 01 September 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -8966,10 +9047,10 @@ CHARACTER TYPES
|
||||||
\W a "non-word" character
|
\W a "non-word" character
|
||||||
\X a Unicode extended grapheme cluster
|
\X a Unicode extended grapheme cluster
|
||||||
|
|
||||||
The application can lock out the use of \C by setting the
|
\C is dangerous because it may leave the current matching point in the
|
||||||
PCRE2_NEVER_BACKSLASH_C option. It is dangerous because it may leave
|
middle of a UTF-8 or UTF-16 character. The application can lock out the
|
||||||
the current matching point in the middle of a UTF-8 or UTF-16 charac-
|
use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also
|
||||||
ter.
|
possible to build PCRE2 with the use of \C permanently disabled.
|
||||||
|
|
||||||
By default, \d, \s, and \w match only ASCII characters, even in UTF-8
|
By default, \d, \s, and \w match only ASCII characters, even in UTF-8
|
||||||
mode or in the 16-bit and 32-bit libraries. However, if locale-specific
|
mode or in the 16-bit and 32-bit libraries. However, if locale-specific
|
||||||
|
@ -9325,7 +9406,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 17 July 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -9384,11 +9465,12 @@ WIDE CHARACTERS AND UTF MODES
|
||||||
The escape sequence \C can be used to match a single code unit, in a
|
The escape sequence \C can be used to match a single code unit, in a
|
||||||
UTF mode, but its use can lead to some strange effects because it
|
UTF mode, but its use can lead to some strange effects because it
|
||||||
breaks up multi-unit characters (see the description of \C in the
|
breaks up multi-unit characters (see the description of \C in the
|
||||||
pcre2pattern documentation). The use of \C is not supported in the
|
pcre2pattern documentation). The use of \C is not supported by the
|
||||||
alternative matching function pcre2_dfa_match(), nor is it supported in
|
alternative matching function pcre2_dfa_match() when in UTF mode. Its
|
||||||
UTF mode by the JIT optimization. If JIT optimization is requested for
|
use provokes a match-time error. The JIT optimization also does not
|
||||||
a UTF pattern that contains \C, it will not succeed, and so the match-
|
support \C in UTF mode. If JIT optimization is requested for a UTF
|
||||||
ing will be carried out by the normal interpretive function.
|
pattern that contains \C, it will not succeed, and so the matching will
|
||||||
|
be carried out by the normal interpretive function.
|
||||||
|
|
||||||
The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
|
The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
|
||||||
characters of any code value, but, by default, the characters that
|
characters of any code value, but, by default, the characters that
|
||||||
|
@ -9563,7 +9645,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 18 August 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "07 October 2015" "PCRE2 10.21"
|
.TH PCRE2API 3 "16 October 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -1209,7 +1209,8 @@ This option locks out the use of \eC in the pattern that is being compiled.
|
||||||
This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because
|
This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because
|
||||||
it may leave the current matching point in the middle of a multi-code-unit
|
it may leave the current matching point in the middle of a multi-code-unit
|
||||||
character. This option may be useful in applications that process patterns from
|
character. This option may be useful in applications that process patterns from
|
||||||
external sources.
|
external sources. Note that there is also a build-time option that permanently
|
||||||
|
locks out the use of \eC.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_NEVER_UCP
|
PCRE2_NEVER_UCP
|
||||||
.sp
|
.sp
|
||||||
|
@ -3014,8 +3015,8 @@ There are in addition the following errors that are specific to
|
||||||
PCRE2_ERROR_DFA_UITEM
|
PCRE2_ERROR_DFA_UITEM
|
||||||
.sp
|
.sp
|
||||||
This return is given if \fBpcre2_dfa_match()\fP encounters an item in the
|
This return is given if \fBpcre2_dfa_match()\fP encounters an item in the
|
||||||
pattern that it does not support, for instance, the use of \eC or a back
|
pattern that it does not support, for instance, the use of \eC in a UTF mode or
|
||||||
reference.
|
a back reference.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ERROR_DFA_UCOND
|
PCRE2_ERROR_DFA_UCOND
|
||||||
.sp
|
.sp
|
||||||
|
@ -3065,6 +3066,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 07 October 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2BUILD 3 "23 April 2015" "PCRE2 10.20"
|
.TH PCRE2BUILD 3 "16 October 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.
|
.
|
||||||
|
@ -132,11 +132,20 @@ Pattern escapes such as \ed and \ew do not by default make use of Unicode
|
||||||
properties. The application can request that they do by setting the PCRE2_UCP
|
properties. The application can request that they do by setting the PCRE2_UCP
|
||||||
option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also
|
option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also
|
||||||
request this by starting with (*UCP).
|
request this by starting with (*UCP).
|
||||||
.P
|
.
|
||||||
|
.
|
||||||
|
.SH "DISABLING THE USE OF \eC"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
The \eC escape sequence, which matches a single code unit, even in a UTF mode,
|
The \eC escape sequence, which matches a single code unit, even in a UTF mode,
|
||||||
can cause unpredictable behaviour because it may leave the current matching
|
can cause unpredictable behaviour because it may leave the current matching
|
||||||
point in the middle of a multi-code-unit character. It can be locked out by
|
point in the middle of a multi-code-unit character. The application can lock it
|
||||||
setting the PCRE2_NEVER_BACKSLASH_C option.
|
out by setting the PCRE2_NEVER_BACKSLASH_C option when calling
|
||||||
|
\fBpcre2_compile()\fP. There is also a build-time option
|
||||||
|
.sp
|
||||||
|
--enable-never-backslash-C
|
||||||
|
.sp
|
||||||
|
(note the upper case C) which locks out the use of \eC entirely.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "JUST-IN-TIME COMPILER SUPPORT"
|
.SH "JUST-IN-TIME COMPILER SUPPORT"
|
||||||
|
@ -510,6 +519,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 24 April 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2PATTERN 3 "01 September 2015" "PCRE2 10.21"
|
.TH PCRE2PATTERN 3 "16 October 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||||
|
@ -1233,8 +1233,11 @@ with \eC in UTF-8 or UTF-16 mode means that the rest of the string may start
|
||||||
with a malformed UTF character. This has undefined results, because PCRE2
|
with a malformed UTF character. This has undefined results, because PCRE2
|
||||||
assumes that it is matching character by character in a valid UTF string (by
|
assumes that it is matching character by character in a valid UTF string (by
|
||||||
default it checks the subject string's validity at the start of processing
|
default it checks the subject string's validity at the start of processing
|
||||||
unless the PCRE2_NO_UTF_CHECK option is used). An application can lock out the
|
unless the PCRE2_NO_UTF_CHECK option is used).
|
||||||
use of \eC by setting the PCRE2_NEVER_BACKSLASH_C option.
|
.P
|
||||||
|
An application can lock out the use of \eC by setting the
|
||||||
|
PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to
|
||||||
|
build PCRE2 with the use of \eC permanently disabled.
|
||||||
.P
|
.P
|
||||||
PCRE2 does not allow \eC to appear in lookbehind assertions
|
PCRE2 does not allow \eC to appear in lookbehind assertions
|
||||||
.\" HTML <a href="#lookbehind">
|
.\" HTML <a href="#lookbehind">
|
||||||
|
@ -1242,7 +1245,10 @@ PCRE2 does not allow \eC to appear in lookbehind assertions
|
||||||
(described below)
|
(described below)
|
||||||
.\"
|
.\"
|
||||||
in a UTF mode, because this would make it impossible to calculate the length of
|
in a UTF mode, because this would make it impossible to calculate the length of
|
||||||
the lookbehind.
|
the lookbehind. Neither the alternative matching function
|
||||||
|
\fBpcre2_dfa_match()\fP not the JIT optimizer support \eC in a UTF mode. The
|
||||||
|
former gives a match-time error; the latter fails to optimize and so the match
|
||||||
|
is always run using the interpreter.
|
||||||
.P
|
.P
|
||||||
In general, the \eC escape sequence is best avoided. However, one way of using
|
In general, the \eC escape sequence is best avoided. However, one way of using
|
||||||
it that avoids the problem of malformed UTF characters is to use a lookahead to
|
it that avoids the problem of malformed UTF characters is to use a lookahead to
|
||||||
|
@ -3386,6 +3392,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 01 September 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2SYNTAX 3 "17 July 2015" "PCRE2 10.21"
|
.TH PCRE2SYNTAX 3 "16 October 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
|
.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
|
||||||
|
@ -81,9 +81,10 @@ it matches a literal "u".
|
||||||
\eW a "non-word" character
|
\eW a "non-word" character
|
||||||
\eX a Unicode extended grapheme cluster
|
\eX a Unicode extended grapheme cluster
|
||||||
.sp
|
.sp
|
||||||
The application can lock out the use of \eC by setting the
|
\eC is dangerous because it may leave the current matching point in the middle
|
||||||
PCRE2_NEVER_BACKSLASH_C option. It is dangerous because it may leave the
|
of a UTF-8 or UTF-16 character. The application can lock out the use of \eC by
|
||||||
current matching point in the middle of a UTF-8 or UTF-16 character.
|
setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2
|
||||||
|
with the use of \eC permanently disabled.
|
||||||
.P
|
.P
|
||||||
By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode
|
By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode
|
||||||
or in the 16-bit and 32-bit libraries. However, if locale-specific matching is
|
or in the 16-bit and 32-bit libraries. However, if locale-specific matching is
|
||||||
|
@ -576,6 +577,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 17 July 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "23 September 2015" "PCRE 10.21"
|
.TH PCRE2TEST 1 "17 October 2015" "PCRE 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -122,6 +122,7 @@ following options output the value and set the exit code as indicated:
|
||||||
The following options output 1 for true or 0 for false, and set the exit code
|
The following options output 1 for true or 0 for false, and set the exit code
|
||||||
to the same value:
|
to the same value:
|
||||||
.sp
|
.sp
|
||||||
|
backslash-C \eC is supported (not locked out)
|
||||||
ebcdic compiled for an EBCDIC environment
|
ebcdic compiled for an EBCDIC environment
|
||||||
jit just-in-time support is available
|
jit just-in-time support is available
|
||||||
pcre2-16 the 16-bit library was built
|
pcre2-16 the 16-bit library was built
|
||||||
|
@ -1559,6 +1560,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 23 September 2015
|
Last updated: 17 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -119,6 +119,7 @@ COMMAND LINE OPTIONS
|
||||||
The following options output 1 for true or 0 for false, and
|
The following options output 1 for true or 0 for false, and
|
||||||
set the exit code to the same value:
|
set the exit code to the same value:
|
||||||
|
|
||||||
|
backslash-C \C is supported (not locked out)
|
||||||
ebcdic compiled for an EBCDIC environment
|
ebcdic compiled for an EBCDIC environment
|
||||||
jit just-in-time support is available
|
jit just-in-time support is available
|
||||||
pcre2-16 the 16-bit library was built
|
pcre2-16 the 16-bit library was built
|
||||||
|
@ -457,7 +458,7 @@ PATTERN MODIFIERS
|
||||||
Setting compilation options
|
Setting compilation options
|
||||||
|
|
||||||
The following modifiers set options for pcre2_compile(). The most com-
|
The following modifiers set options for pcre2_compile(). The most com-
|
||||||
mon ones have single-letter abbreviations. See pcreapi for a descrip-
|
mon ones have single-letter abbreviations. See pcre2api for a descrip-
|
||||||
tion of their effects.
|
tion of their effects.
|
||||||
|
|
||||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||||
|
@ -484,6 +485,7 @@ PATTERN MODIFIERS
|
||||||
no_utf_check set PCRE2_NO_UTF_CHECK
|
no_utf_check set PCRE2_NO_UTF_CHECK
|
||||||
ucp set PCRE2_UCP
|
ucp set PCRE2_UCP
|
||||||
ungreedy set PCRE2_UNGREEDY
|
ungreedy set PCRE2_UNGREEDY
|
||||||
|
use_offset_limit set PCRE2_USE_OFFSET_LIMIT
|
||||||
utf set PCRE2_UTF
|
utf set PCRE2_UTF
|
||||||
|
|
||||||
As well as turning on the PCRE2_UTF option, the utf modifier causes all
|
As well as turning on the PCRE2_UTF option, the utf modifier causes all
|
||||||
|
@ -509,6 +511,7 @@ PATTERN MODIFIERS
|
||||||
locale=<name> use this locale
|
locale=<name> use this locale
|
||||||
memory show memory used
|
memory show memory used
|
||||||
newline=<type> set newline type
|
newline=<type> set newline type
|
||||||
|
null_context compile with a NULL context
|
||||||
parens_nest_limit=<n> set maximum parentheses depth
|
parens_nest_limit=<n> set maximum parentheses depth
|
||||||
posix use the POSIX API
|
posix use the POSIX API
|
||||||
push push compiled pattern onto the stack
|
push push compiled pattern onto the stack
|
||||||
|
@ -579,6 +582,13 @@ PATTERN MODIFIERS
|
||||||
mation that is requested. For each callout, either its number or string
|
mation that is requested. For each callout, either its number or string
|
||||||
is given, followed by the item that follows it in the pattern.
|
is given, followed by the item that follows it in the pattern.
|
||||||
|
|
||||||
|
Passing a NULL context
|
||||||
|
|
||||||
|
Normally, pcre2test passes a context block to pcre2_compile(). If the
|
||||||
|
null_context modifier is set, however, NULL is passed. This is for
|
||||||
|
testing that pcre2_compile() behaves correctly in this case (it uses
|
||||||
|
default values).
|
||||||
|
|
||||||
Specifying a pattern in hex
|
Specifying a pattern in hex
|
||||||
|
|
||||||
The hex modifier specifies that the characters of the pattern are to be
|
The hex modifier specifies that the characters of the pattern are to be
|
||||||
|
@ -823,9 +833,11 @@ SUBJECT MODIFIERS
|
||||||
/g global global matching
|
/g global global matching
|
||||||
jitstack=<n> set size of JIT stack
|
jitstack=<n> set size of JIT stack
|
||||||
mark show mark values
|
mark show mark values
|
||||||
match_limit=>n> set a match limit
|
match_limit=<n> set a match limit
|
||||||
memory show memory usage
|
memory show memory usage
|
||||||
|
null_context match with a NULL context
|
||||||
offset=<n> set starting offset
|
offset=<n> set starting offset
|
||||||
|
offset_limit=<n> set offset limit
|
||||||
ovector=<n> set size of output vector
|
ovector=<n> set size of output vector
|
||||||
recursion_limit=<n> set a recursion limit
|
recursion_limit=<n> set a recursion limit
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
|
@ -1046,6 +1058,14 @@ SUBJECT MODIFIERS
|
||||||
The offset modifier sets an offset in the subject string at which
|
The offset modifier sets an offset in the subject string at which
|
||||||
matching starts. Its value is a number of code units, not characters.
|
matching starts. Its value is a number of code units, not characters.
|
||||||
|
|
||||||
|
Setting an offset limit
|
||||||
|
|
||||||
|
The offset_limit modifier sets a limit for unanchored matches. If a
|
||||||
|
match cannot be found starting at or before this offset in the subject,
|
||||||
|
a "no match" return is given. The data value is a number of code units,
|
||||||
|
not characters. When this modifier is used, the use_offset_limit modi-
|
||||||
|
fier must have been set for the pattern; if not, an error is generated.
|
||||||
|
|
||||||
Setting the size of the output vector
|
Setting the size of the output vector
|
||||||
|
|
||||||
The ovector modifier applies only to the subject line in which it
|
The ovector modifier applies only to the subject line in which it
|
||||||
|
@ -1073,6 +1093,15 @@ SUBJECT MODIFIERS
|
||||||
When testing pcre2_substitute(), this modifier also has the effect of
|
When testing pcre2_substitute(), this modifier also has the effect of
|
||||||
passing the replacement string as zero-terminated.
|
passing the replacement string as zero-terminated.
|
||||||
|
|
||||||
|
Passing a NULL context
|
||||||
|
|
||||||
|
Normally, pcre2test passes a context block to pcre2_match(),
|
||||||
|
pcre2_dfa_match() or pcre2_jit_match(). If the null_context modifier is
|
||||||
|
set, however, NULL is passed. This is for testing that the matching
|
||||||
|
functions behave correctly in this case (they use default values). This
|
||||||
|
modifier cannot be used with the find_limits modifier or when testing
|
||||||
|
the substitution function.
|
||||||
|
|
||||||
|
|
||||||
THE ALTERNATIVE MATCHING FUNCTION
|
THE ALTERNATIVE MATCHING FUNCTION
|
||||||
|
|
||||||
|
@ -1398,5 +1427,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 14 September 2015
|
Last updated: 17 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2UNICODE 3 "18 August 2015" "PCRE2 10.21"
|
.TH PCRE2UNICODE 3 "16 October 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions (revised API)
|
PCRE - Perl-compatible regular expressions (revised API)
|
||||||
.SH "UNICODE AND UTF SUPPORT"
|
.SH "UNICODE AND UTF SUPPORT"
|
||||||
|
@ -63,11 +63,12 @@ characters (see the description of \eC in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2pattern\fP
|
\fBpcre2pattern\fP
|
||||||
.\"
|
.\"
|
||||||
documentation). The use of \eC is not supported in the alternative matching
|
documentation). The use of \eC is not supported by the alternative matching
|
||||||
function \fBpcre2_dfa_match()\fP, nor is it supported in UTF mode by the JIT
|
function \fBpcre2_dfa_match()\fP when in UTF mode. Its use provokes a
|
||||||
optimization. If JIT optimization is requested for a UTF pattern that contains
|
match-time error. The JIT optimization also does not support \eC in UTF mode.
|
||||||
\eC, it will not succeed, and so the matching will be carried out by the normal
|
If JIT optimization is requested for a UTF pattern that contains \eC, it will
|
||||||
interpretive function.
|
not succeed, and so the matching will be carried out by the normal interpretive
|
||||||
|
function.
|
||||||
.P
|
.P
|
||||||
The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test
|
The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test
|
||||||
characters of any code value, but, by default, the characters that PCRE2
|
characters of any code value, but, by default, the characters that PCRE2
|
||||||
|
@ -262,6 +263,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 18 August 2015
|
Last updated: 16 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -182,6 +182,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#define MAX_NAME_SIZE 32
|
#define MAX_NAME_SIZE 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||||
|
/* #undef NEVER_BACKSLASH_C */
|
||||||
|
|
||||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||||
sequence. PCRE2 client programs can override this by selecting other values
|
sequence. PCRE2 client programs can override this by selecting other values
|
||||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||||
|
|
|
@ -169,6 +169,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
overflow caused by enormously large patterns. */
|
overflow caused by enormously large patterns. */
|
||||||
#undef MAX_NAME_SIZE
|
#undef MAX_NAME_SIZE
|
||||||
|
|
||||||
|
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||||
|
#undef NEVER_BACKSLASH_C
|
||||||
|
|
||||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||||
sequence. PCRE2 client programs can override this by selecting other values
|
sequence. PCRE2 client programs can override this by selecting other values
|
||||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||||
|
|
|
@ -583,7 +583,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
||||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||||
ERR81, ERR82, ERR83, ERR84 };
|
ERR81, ERR82, ERR83, ERR84, ERR85 };
|
||||||
|
|
||||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||||
|
@ -7053,11 +7053,19 @@ for (;; ptr++)
|
||||||
|
|
||||||
/* The use of \C can be locked out. */
|
/* The use of \C can be locked out. */
|
||||||
|
|
||||||
|
#ifdef NEVER_BACKSLASH_C
|
||||||
|
else if (escape == ESC_C)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR85;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
#else
|
||||||
else if (escape == ESC_C && (options & PCRE2_NEVER_BACKSLASH_C) != 0)
|
else if (escape == ESC_C && (options & PCRE2_NEVER_BACKSLASH_C) != 0)
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR83;
|
*errorcodeptr = ERR83;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* For the rest (including \X when Unicode properties are supported), we
|
/* For the rest (including \X when Unicode properties are supported), we
|
||||||
can obtain the OP value by negating the escape value in the default
|
can obtain the OP value by negating the escape value in the default
|
||||||
|
|
|
@ -168,6 +168,8 @@ static const char compile_error_texts[] =
|
||||||
"unrecognized string delimiter follows (?C\0"
|
"unrecognized string delimiter follows (?C\0"
|
||||||
"using \\C is disabled by the application\0"
|
"using \\C is disabled by the application\0"
|
||||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||||
|
/* 85 */
|
||||||
|
"using \\C is disabled in this PCRE2 library\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Match-time and UTF error texts are in the same format. */
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
|
|
@ -106,7 +106,7 @@ static const int eint1[] = {
|
||||||
|
|
||||||
static const int eint2[] = {
|
static const int eint2[] = {
|
||||||
30, REG_ECTYPE, /* unknown POSIX class name */
|
30, REG_ECTYPE, /* unknown POSIX class name */
|
||||||
32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */
|
32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */
|
||||||
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
|
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
|
||||||
56, REG_INVARG, /* internal error: unknown newline setting */
|
56, REG_INVARG, /* internal error: unknown newline setting */
|
||||||
};
|
};
|
||||||
|
|
|
@ -667,6 +667,12 @@ table itself easier to read. */
|
||||||
#define EBCDIC_NL 0
|
#define EBCDIC_NL 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef NEVER_BACKSLASH_C
|
||||||
|
#define BACKSLASH_C 0
|
||||||
|
#else
|
||||||
|
#define BACKSLASH_C 1
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct coptstruct {
|
typedef struct coptstruct {
|
||||||
const char *name;
|
const char *name;
|
||||||
uint32_t type;
|
uint32_t type;
|
||||||
|
@ -681,6 +687,7 @@ enum { CONF_BSR,
|
||||||
};
|
};
|
||||||
|
|
||||||
static coptstruct coptlist[] = {
|
static coptstruct coptlist[] = {
|
||||||
|
{ "backslash-C", CONF_FIX, BACKSLASH_C },
|
||||||
{ "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
|
{ "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
|
||||||
{ "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
|
{ "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
|
||||||
{ "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
|
{ "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
|
||||||
|
@ -6467,6 +6474,7 @@ printf(" -b set default pattern control 'fullbincode'\n");
|
||||||
printf(" -C show PCRE2 compile-time options and exit\n");
|
printf(" -C show PCRE2 compile-time options and exit\n");
|
||||||
printf(" -C arg show a specific compile-time option and exit with its\n");
|
printf(" -C arg show a specific compile-time option and exit with its\n");
|
||||||
printf(" value if numeric (else 0). The arg can be:\n");
|
printf(" value if numeric (else 0). The arg can be:\n");
|
||||||
|
printf(" backslash-C use of \\C is enabled [0, 1]\n");
|
||||||
printf(" bsr \\R type [ANYCRLF, ANY]\n");
|
printf(" bsr \\R type [ANYCRLF, ANY]\n");
|
||||||
printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
|
printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
|
||||||
printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
|
printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
|
||||||
|
@ -6618,6 +6626,11 @@ print_newline_config(optval, FALSE);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
|
||||||
printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" :
|
printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" :
|
||||||
"all Unicode newlines");
|
"all Unicode newlines");
|
||||||
|
#ifdef NEVER_BACKSLASH_C
|
||||||
|
printf(" \\C is not supported\n");
|
||||||
|
#else
|
||||||
|
printf(" \\C is supported\n");
|
||||||
|
#endif
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
|
||||||
printf(" Internal link size = %d\n", optval);
|
printf(" Internal link size = %d\n", optval);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
|
||||||
|
|
|
@ -1,46 +1,6 @@
|
||||||
# This set of tests is for UTF-8 support and Unicode property support, with
|
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||||
# relevance only for the 8-bit library.
|
# relevance only for the 8-bit library.
|
||||||
|
|
||||||
/X(\C{3})/utf
|
|
||||||
X\x{1234}
|
|
||||||
|
|
||||||
/X(\C{4})/utf
|
|
||||||
X\x{1234}YZ
|
|
||||||
|
|
||||||
/X\C*/utf
|
|
||||||
XYZabcdce
|
|
||||||
|
|
||||||
/X\C*?/utf
|
|
||||||
XYZabcde
|
|
||||||
|
|
||||||
/X\C{3,5}/utf
|
|
||||||
Xabcdefg
|
|
||||||
X\x{1234}
|
|
||||||
X\x{1234}YZ
|
|
||||||
X\x{1234}\x{512}
|
|
||||||
X\x{1234}\x{512}YZ
|
|
||||||
|
|
||||||
/X\C{3,5}?/utf
|
|
||||||
Xabcdefg
|
|
||||||
X\x{1234}
|
|
||||||
X\x{1234}YZ
|
|
||||||
X\x{1234}\x{512}
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
aXb
|
|
||||||
a\nb
|
|
||||||
|
|
||||||
/a\C\Cb/utf
|
|
||||||
a\x{100}b
|
|
||||||
|
|
||||||
/ab\Cde/utf
|
|
||||||
abXde
|
|
||||||
|
|
||||||
/a\C\Cb/utf
|
|
||||||
a\x{100}b
|
|
||||||
\= Expect no match
|
|
||||||
a\x{12257}b
|
|
||||||
|
|
||||||
# The next 3 patterns have UTF-8 errors
|
# The next 3 patterns have UTF-8 errors
|
||||||
|
|
||||||
/[Ã]/utf
|
/[Ã]/utf
|
||||||
|
@ -212,21 +172,6 @@
|
||||||
|
|
||||||
/\x{212ab}/IB,utf
|
/\x{212ab}/IB,utf
|
||||||
|
|
||||||
# This one is here not because it's different to Perl, but because the way
|
|
||||||
# the captured single-byte is displayed. (In Perl it becomes a character, and you
|
|
||||||
# can't tell the difference.)
|
|
||||||
|
|
||||||
/X(\C)(.*)/utf
|
|
||||||
X\x{1234}
|
|
||||||
X\nabc
|
|
||||||
|
|
||||||
# This one is here because Perl gives out a grumbly error message (quite
|
|
||||||
# correctly, but that messes up comparisons).
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
\= Expect no match
|
|
||||||
a\x{100}b
|
|
||||||
|
|
||||||
/[^ab\xC0-\xF0]/IB,utf
|
/[^ab\xC0-\xF0]/IB,utf
|
||||||
\x{f1}
|
\x{f1}
|
||||||
\x{bf}
|
\x{bf}
|
||||||
|
|
|
@ -6,10 +6,6 @@
|
||||||
#forbid_utf
|
#forbid_utf
|
||||||
#newline_default LF ANY ANYCRLF
|
#newline_default LF ANY ANYCRLF
|
||||||
|
|
||||||
/a\Cb/
|
|
||||||
aXb
|
|
||||||
a\nb
|
|
||||||
|
|
||||||
/[^\x{c4}]/IB
|
/[^\x{c4}]/IB
|
||||||
|
|
||||||
/\x{100}/I
|
/\x{100}/I
|
||||||
|
@ -344,7 +340,7 @@
|
||||||
|
|
||||||
# Non-UTF characters
|
# Non-UTF characters
|
||||||
|
|
||||||
/\C{2,3}/
|
/.{2,3}/
|
||||||
\x{400000}\x{400001}\x{400002}\x{400003}
|
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||||
|
|
||||||
/\x{400000}\x{800000}/IBi
|
/\x{400000}\x{800000}/IBi
|
||||||
|
|
|
@ -7,49 +7,6 @@
|
||||||
/abc/utf
|
/abc/utf
|
||||||
Ã]
|
Ã]
|
||||||
|
|
||||||
/X(\C{3})/utf
|
|
||||||
X\x{11234}Y
|
|
||||||
X\x{11234}YZ
|
|
||||||
|
|
||||||
/X(\C{4})/utf
|
|
||||||
X\x{11234}YZ
|
|
||||||
X\x{11234}YZW
|
|
||||||
|
|
||||||
/X\C*/utf
|
|
||||||
XYZabcdce
|
|
||||||
|
|
||||||
/X\C*?/utf
|
|
||||||
XYZabcde
|
|
||||||
|
|
||||||
/X\C{3,5}/utf
|
|
||||||
Xabcdefg
|
|
||||||
X\x{11234}Y
|
|
||||||
X\x{11234}YZ
|
|
||||||
X\x{11234}\x{512}
|
|
||||||
X\x{11234}\x{512}YZ
|
|
||||||
X\x{11234}\x{512}\x{11234}Z
|
|
||||||
|
|
||||||
/X\C{3,5}?/utf
|
|
||||||
Xabcdefg
|
|
||||||
X\x{11234}Y
|
|
||||||
X\x{11234}YZ
|
|
||||||
X\x{11234}\x{512}YZ
|
|
||||||
\= Expect no match
|
|
||||||
X\x{11234}
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
aXb
|
|
||||||
a\nb
|
|
||||||
|
|
||||||
/a\C\Cb/utf
|
|
||||||
a\x{12257}b
|
|
||||||
\= Expect no match
|
|
||||||
a\x{12257}\x{11234}b
|
|
||||||
a\x{100}b
|
|
||||||
|
|
||||||
/ab\Cde/utf
|
|
||||||
abXde
|
|
||||||
|
|
||||||
# Check maximum character size
|
# Check maximum character size
|
||||||
|
|
||||||
/\x{ffff}/IB,utf
|
/\x{ffff}/IB,utf
|
||||||
|
@ -90,16 +47,6 @@
|
||||||
|
|
||||||
/\x{212ab}/IB,utf
|
/\x{212ab}/IB,utf
|
||||||
|
|
||||||
# These two \C tests, copied from the UTF-8 input file, do not have any
|
|
||||||
# problems in 16 or 32 bits.
|
|
||||||
|
|
||||||
/X(\C)(.*)/utf
|
|
||||||
X\x{1234}
|
|
||||||
X\nabc
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
a\x{100}b
|
|
||||||
|
|
||||||
/[^ab\xC0-\xF0]/IB,utf
|
/[^ab\xC0-\xF0]/IB,utf
|
||||||
\x{f1}
|
\x{f1}
|
||||||
\x{bf}
|
\x{bf}
|
||||||
|
@ -336,9 +283,6 @@
|
||||||
|
|
||||||
/\o{4200000}/utf
|
/\o{4200000}/utf
|
||||||
|
|
||||||
/\C/utf
|
|
||||||
\x{110000}
|
|
||||||
|
|
||||||
/\x{100}*A/IB,utf
|
/\x{100}*A/IB,utf
|
||||||
A
|
A
|
||||||
|
|
||||||
|
@ -396,4 +340,7 @@
|
||||||
|
|
||||||
/\x{3a3}B/IBi,utf
|
/\x{3a3}B/IBi,utf
|
||||||
|
|
||||||
|
/./utf
|
||||||
|
\x{110000}
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -3739,41 +3739,40 @@
|
||||||
|
|
||||||
/[bcd]*a/B
|
/[bcd]*a/B
|
||||||
|
|
||||||
# A complete set of tests for auto-possessification of character types.
|
# A complete set of tests for auto-possessification of character types, but
|
||||||
|
# omitting \C because it might be disabled (it has its own tests).
|
||||||
|
|
||||||
/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\C \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx
|
/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx
|
||||||
|
|
||||||
/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\C \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx
|
/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx
|
||||||
|
|
||||||
/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\C \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx
|
/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx
|
||||||
|
|
||||||
/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\C \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx
|
/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx
|
||||||
|
|
||||||
/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\C \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx
|
/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx
|
||||||
|
|
||||||
/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\C \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx
|
/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx
|
||||||
|
|
||||||
/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\C \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx
|
/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx
|
||||||
|
|
||||||
/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\C \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx
|
/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx
|
||||||
|
|
||||||
/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\C \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx
|
/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx
|
||||||
|
|
||||||
/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\C \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx
|
/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx
|
||||||
|
|
||||||
/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\C \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx
|
/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx
|
||||||
|
|
||||||
/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\C \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx
|
/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx
|
||||||
|
|
||||||
/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\C a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx
|
/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx
|
||||||
|
|
||||||
/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\C \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx
|
/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx
|
||||||
|
|
||||||
/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\C .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx
|
/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx
|
||||||
|
|
||||||
/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\C .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx
|
/ \D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx
|
||||||
|
|
||||||
/\D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \C+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx
|
|
||||||
|
|
||||||
/(?=a+)a(a+)++a/B
|
/(?=a+)a(a+)++a/B
|
||||||
|
|
||||||
|
@ -4327,8 +4326,6 @@
|
||||||
|
|
||||||
/((?2){73}(?2))((?1))/info
|
/((?2){73}(?2))((?1))/info
|
||||||
|
|
||||||
/ab\Cde/never_backslash_c
|
|
||||||
|
|
||||||
/abc/
|
/abc/
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
\[9x!xxx(]{9999}
|
\[9x!xxx(]{9999}
|
||||||
|
@ -4446,12 +4443,6 @@
|
||||||
/\x0{ab}/
|
/\x0{ab}/
|
||||||
\0{ab}
|
\0{ab}
|
||||||
|
|
||||||
/ab\Cde/
|
|
||||||
abXde
|
|
||||||
|
|
||||||
/(?<=ab\Cde)X/
|
|
||||||
abZdeX
|
|
||||||
|
|
||||||
/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/
|
/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/
|
||||||
ababababbbabZXXXX
|
ababababbbabZXXXX
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
# These are tests of \C that do not involve UTF. They are not run when \C is
|
||||||
|
# disabled by compiling with --enable-never-backslash-C.
|
||||||
|
|
||||||
|
/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx
|
||||||
|
|
||||||
|
/\D+\C \d+\C \S+\C \s+\C \W+\C \w+\C .+\C \R+\C \H+\C \h+\C \V+\C \v+\C a+\C \n+\C \C+\C/Bx
|
||||||
|
|
||||||
|
/ab\Cde/never_backslash_c
|
||||||
|
|
||||||
|
/ab\Cde/
|
||||||
|
abXde
|
||||||
|
|
||||||
|
/(?<=ab\Cde)X/
|
||||||
|
abZdeX
|
||||||
|
|
||||||
|
# End of testinput21
|
|
@ -0,0 +1,95 @@
|
||||||
|
# Tests of \C when Unicode support is available. Note that \C is not supported
|
||||||
|
# for DFA matching in UTF mode, so this test is not run with -dfa. The output
|
||||||
|
# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
|
||||||
|
# in some widths and not in others.
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
|
||||||
|
# This should produce an error diagnostic (\C in UTF lookbehind)
|
||||||
|
|
||||||
|
/(?<=ab\Cde)X/utf
|
||||||
|
|
||||||
|
# Autopossessification tests
|
||||||
|
|
||||||
|
/\C+\X \X+\C/Bx
|
||||||
|
|
||||||
|
/\C+\X \X+\C/Bx,utf
|
||||||
|
|
||||||
|
/\C\X*TӅ;
|
||||||
|
{0,6}\v+
F
|
||||||
|
/utf
|
||||||
|
\= Expect no match
|
||||||
|
Ӆ\x0a
|
||||||
|
|
||||||
|
/\C(\W?ſ)'?{{/utf
|
||||||
|
\= Expect no match
|
||||||
|
\\C(\\W?ſ)'?{{
|
||||||
|
|
||||||
|
/X(\C{3})/utf
|
||||||
|
X\x{1234}
|
||||||
|
X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
|
||||||
|
/X(\C{4})/utf
|
||||||
|
X\x{1234}YZ
|
||||||
|
X\x{11234}YZ
|
||||||
|
X\x{11234}YZW
|
||||||
|
|
||||||
|
/X\C*/utf
|
||||||
|
XYZabcdce
|
||||||
|
|
||||||
|
/X\C*?/utf
|
||||||
|
XYZabcde
|
||||||
|
|
||||||
|
/X\C{3,5}/utf
|
||||||
|
Xabcdefg
|
||||||
|
X\x{1234}
|
||||||
|
X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
X\x{1234}\x{512}YZ
|
||||||
|
X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
X\x{11234}\x{512}
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
X\x{11234}\x{512}\x{11234}Z
|
||||||
|
|
||||||
|
/X\C{3,5}?/utf
|
||||||
|
Xabcdefg
|
||||||
|
X\x{1234}
|
||||||
|
X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
X\x{11234}
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
aXb
|
||||||
|
a\nb
|
||||||
|
a\x{100}b
|
||||||
|
|
||||||
|
/a\C\Cb/utf
|
||||||
|
a\x{100}b
|
||||||
|
a\x{12257}b
|
||||||
|
a\x{12257}\x{11234}b
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
|
||||||
|
# This one is here not because it's different to Perl, but because the way
|
||||||
|
# the captured single code unit is displayed. (In Perl it becomes a character,
|
||||||
|
# and you can't tell the difference.)
|
||||||
|
|
||||||
|
/X(\C)(.*)/utf
|
||||||
|
X\x{1234}
|
||||||
|
X\nabc
|
||||||
|
|
||||||
|
# This one is here because Perl gives out a grumbly error message (quite
|
||||||
|
# correctly, but that messes up comparisons).
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
\= Expect no match in 8-bit mode
|
||||||
|
a\x{100}b
|
||||||
|
|
||||||
|
# End of testinput22
|
|
@ -0,0 +1,7 @@
|
||||||
|
# This test is run when PCRE2 has been built with --enable-never-backslash-C,
|
||||||
|
# which disables the use of \C. All we can do is check that it gives the
|
||||||
|
# correct error message.
|
||||||
|
|
||||||
|
/a\Cb/
|
||||||
|
|
||||||
|
# End of testinput23
|
|
@ -111,9 +111,6 @@
|
||||||
/.{3,5}?/IB,utf
|
/.{3,5}?/IB,utf
|
||||||
\x{212ab}\x{212ab}\x{212ab}\x{861}
|
\x{212ab}\x{212ab}\x{212ab}\x{861}
|
||||||
|
|
||||||
/(?<=\C)X/utf
|
|
||||||
Should produce an error diagnostic
|
|
||||||
|
|
||||||
/^[ab]/IB,utf
|
/^[ab]/IB,utf
|
||||||
bar
|
bar
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
|
@ -1367,8 +1364,6 @@
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
aAz
|
aAz
|
||||||
|
|
||||||
/(?<=ab\Cde)X/utf
|
|
||||||
|
|
||||||
/\X/
|
/\X/
|
||||||
a\=ps
|
a\=ps
|
||||||
a\=ph
|
a\=ph
|
||||||
|
@ -1617,13 +1612,13 @@
|
||||||
|
|
||||||
/[\p{L}ab]{2,3}+/B,no_auto_possess
|
/[\p{L}ab]{2,3}+/B,no_auto_possess
|
||||||
|
|
||||||
/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
|
/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
|
||||||
|
|
||||||
/.+\X/Bsx
|
/.+\X/Bsx
|
||||||
|
|
||||||
/\X+$/Bmx
|
/\X+$/Bmx
|
||||||
|
|
||||||
/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
|
/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
|
||||||
|
|
||||||
/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
|
/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
|
||||||
|
|
||||||
|
@ -1665,16 +1660,6 @@
|
||||||
|
|
||||||
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
|
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
|
||||||
|
|
||||||
/\C\X*TӅ;
|
|
||||||
{0,6}\v+
F
|
|
||||||
/utf
|
|
||||||
\= Expect no match
|
|
||||||
Ӆ\x0a
|
|
||||||
|
|
||||||
/\C(\W?ſ)'?{{/utf
|
|
||||||
\= Expect no match
|
|
||||||
\\C(\\W?ſ)'?{{
|
|
||||||
|
|
||||||
/[\pS#moq]/
|
/[\pS#moq]/
|
||||||
=
|
=
|
||||||
|
|
||||||
|
|
|
@ -4645,12 +4645,6 @@
|
||||||
aaaa\=ovector=3
|
aaaa\=ovector=3
|
||||||
aaaa\=ovector=4
|
aaaa\=ovector=4
|
||||||
|
|
||||||
/ab\Cde/
|
|
||||||
abXde
|
|
||||||
|
|
||||||
/(?<=ab\Cde)X/
|
|
||||||
abZdeX
|
|
||||||
|
|
||||||
/^\R/
|
/^\R/
|
||||||
\r\=ps
|
\r\=ps
|
||||||
\r\=ph
|
\r\=ph
|
||||||
|
|
|
@ -671,11 +671,6 @@
|
||||||
the cat\=ps
|
the cat\=ps
|
||||||
the cat\=ph
|
the cat\=ph
|
||||||
|
|
||||||
/ab\Cde/utf
|
|
||||||
abXde
|
|
||||||
|
|
||||||
/(?<=ab\Cde)X/utf
|
|
||||||
|
|
||||||
/./newline=crlf,utf
|
/./newline=crlf,utf
|
||||||
\r\=ps
|
\r\=ps
|
||||||
\r\=ph
|
\r\=ph
|
||||||
|
|
|
@ -4,10 +4,8 @@
|
||||||
#forbid_utf
|
#forbid_utf
|
||||||
#newline_default lf any anycrlf
|
#newline_default lf any anycrlf
|
||||||
|
|
||||||
/a\Cb/
|
/ab/
|
||||||
aXb
|
\= Expect error message (too big char) and no match
|
||||||
a\nb
|
|
||||||
\= Expect no match and error message (too big char)
|
|
||||||
A\x{123}B
|
A\x{123}B
|
||||||
A\o{443}B
|
A\o{443}B
|
||||||
|
|
||||||
|
|
|
@ -1,67 +1,6 @@
|
||||||
# This set of tests is for UTF-8 support and Unicode property support, with
|
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||||
# relevance only for the 8-bit library.
|
# relevance only for the 8-bit library.
|
||||||
|
|
||||||
/X(\C{3})/utf
|
|
||||||
X\x{1234}
|
|
||||||
0: X\x{1234}
|
|
||||||
1: \x{1234}
|
|
||||||
|
|
||||||
/X(\C{4})/utf
|
|
||||||
X\x{1234}YZ
|
|
||||||
0: X\x{1234}Y
|
|
||||||
1: \x{1234}Y
|
|
||||||
|
|
||||||
/X\C*/utf
|
|
||||||
XYZabcdce
|
|
||||||
0: XYZabcdce
|
|
||||||
|
|
||||||
/X\C*?/utf
|
|
||||||
XYZabcde
|
|
||||||
0: X
|
|
||||||
|
|
||||||
/X\C{3,5}/utf
|
|
||||||
Xabcdefg
|
|
||||||
0: Xabcde
|
|
||||||
X\x{1234}
|
|
||||||
0: X\x{1234}
|
|
||||||
X\x{1234}YZ
|
|
||||||
0: X\x{1234}YZ
|
|
||||||
X\x{1234}\x{512}
|
|
||||||
0: X\x{1234}\x{512}
|
|
||||||
X\x{1234}\x{512}YZ
|
|
||||||
0: X\x{1234}\x{512}
|
|
||||||
|
|
||||||
/X\C{3,5}?/utf
|
|
||||||
Xabcdefg
|
|
||||||
0: Xabc
|
|
||||||
X\x{1234}
|
|
||||||
0: X\x{1234}
|
|
||||||
X\x{1234}YZ
|
|
||||||
0: X\x{1234}
|
|
||||||
X\x{1234}\x{512}
|
|
||||||
0: X\x{1234}
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
aXb
|
|
||||||
0: aXb
|
|
||||||
a\nb
|
|
||||||
0: a\x{0a}b
|
|
||||||
|
|
||||||
/a\C\Cb/utf
|
|
||||||
a\x{100}b
|
|
||||||
0: a\x{100}b
|
|
||||||
|
|
||||||
/ab\Cde/utf
|
|
||||||
abXde
|
|
||||||
0: abXde
|
|
||||||
|
|
||||||
/a\C\Cb/utf
|
|
||||||
a\x{100}b
|
|
||||||
0: a\x{100}b
|
|
||||||
\= Expect no match
|
|
||||||
a\x{12257}b
|
|
||||||
No match
|
|
||||||
|
|
||||||
# The next 3 patterns have UTF-8 errors
|
# The next 3 patterns have UTF-8 errors
|
||||||
|
|
||||||
/[Ã]/utf
|
/[Ã]/utf
|
||||||
|
@ -511,28 +450,6 @@ First code unit = \xf0
|
||||||
Last code unit = \xab
|
Last code unit = \xab
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
# This one is here not because it's different to Perl, but because the way
|
|
||||||
# the captured single-byte is displayed. (In Perl it becomes a character, and you
|
|
||||||
# can't tell the difference.)
|
|
||||||
|
|
||||||
/X(\C)(.*)/utf
|
|
||||||
X\x{1234}
|
|
||||||
0: X\x{1234}
|
|
||||||
1: \x{e1}
|
|
||||||
2: \x{88}\x{b4}
|
|
||||||
X\nabc
|
|
||||||
0: X\x{0a}abc
|
|
||||||
1: \x{0a}
|
|
||||||
2: abc
|
|
||||||
|
|
||||||
# This one is here because Perl gives out a grumbly error message (quite
|
|
||||||
# correctly, but that messes up comparisons).
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
\= Expect no match
|
|
||||||
a\x{100}b
|
|
||||||
No match
|
|
||||||
|
|
||||||
/[^ab\xC0-\xF0]/IB,utf
|
/[^ab\xC0-\xF0]/IB,utf
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
|
|
@ -6,12 +6,6 @@
|
||||||
#forbid_utf
|
#forbid_utf
|
||||||
#newline_default LF ANY ANYCRLF
|
#newline_default LF ANY ANYCRLF
|
||||||
|
|
||||||
/a\Cb/
|
|
||||||
aXb
|
|
||||||
0: aXb
|
|
||||||
a\nb
|
|
||||||
0: a\x0ab
|
|
||||||
|
|
||||||
/[^\x{c4}]/IB
|
/[^\x{c4}]/IB
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
@ -582,7 +576,7 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
|
||||||
|
|
||||||
# Non-UTF characters
|
# Non-UTF characters
|
||||||
|
|
||||||
/\C{2,3}/
|
/.{2,3}/
|
||||||
\x{400000}\x{400001}\x{400002}\x{400003}
|
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||||
** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled.
|
** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||||
** Truncation will probably give the wrong result.
|
** Truncation will probably give the wrong result.
|
||||||
|
|
|
@ -6,12 +6,6 @@
|
||||||
#forbid_utf
|
#forbid_utf
|
||||||
#newline_default LF ANY ANYCRLF
|
#newline_default LF ANY ANYCRLF
|
||||||
|
|
||||||
/a\Cb/
|
|
||||||
aXb
|
|
||||||
0: aXb
|
|
||||||
a\nb
|
|
||||||
0: a\x0ab
|
|
||||||
|
|
||||||
/[^\x{c4}]/IB
|
/[^\x{c4}]/IB
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
@ -583,7 +577,7 @@ Subject length lower bound = 2
|
||||||
|
|
||||||
# Non-UTF characters
|
# Non-UTF characters
|
||||||
|
|
||||||
/\C{2,3}/
|
/.{2,3}/
|
||||||
\x{400000}\x{400001}\x{400002}\x{400003}
|
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||||
0: \x{400000}\x{400001}\x{400002}
|
0: \x{400000}\x{400001}\x{400002}
|
||||||
|
|
||||||
|
|
|
@ -9,76 +9,6 @@
|
||||||
Ã]
|
Ã]
|
||||||
** Failed: invalid UTF-8 string cannot be used as input in UTF mode
|
** Failed: invalid UTF-8 string cannot be used as input in UTF mode
|
||||||
|
|
||||||
/X(\C{3})/utf
|
|
||||||
X\x{11234}Y
|
|
||||||
0: X\x{11234}Y
|
|
||||||
1: \x{11234}Y
|
|
||||||
X\x{11234}YZ
|
|
||||||
0: X\x{11234}Y
|
|
||||||
1: \x{11234}Y
|
|
||||||
|
|
||||||
/X(\C{4})/utf
|
|
||||||
X\x{11234}YZ
|
|
||||||
0: X\x{11234}YZ
|
|
||||||
1: \x{11234}YZ
|
|
||||||
X\x{11234}YZW
|
|
||||||
0: X\x{11234}YZ
|
|
||||||
1: \x{11234}YZ
|
|
||||||
|
|
||||||
/X\C*/utf
|
|
||||||
XYZabcdce
|
|
||||||
0: XYZabcdce
|
|
||||||
|
|
||||||
/X\C*?/utf
|
|
||||||
XYZabcde
|
|
||||||
0: X
|
|
||||||
|
|
||||||
/X\C{3,5}/utf
|
|
||||||
Xabcdefg
|
|
||||||
0: Xabcde
|
|
||||||
X\x{11234}Y
|
|
||||||
0: X\x{11234}Y
|
|
||||||
X\x{11234}YZ
|
|
||||||
0: X\x{11234}YZ
|
|
||||||
X\x{11234}\x{512}
|
|
||||||
0: X\x{11234}\x{512}
|
|
||||||
X\x{11234}\x{512}YZ
|
|
||||||
0: X\x{11234}\x{512}YZ
|
|
||||||
X\x{11234}\x{512}\x{11234}Z
|
|
||||||
0: X\x{11234}\x{512}\x{11234}
|
|
||||||
|
|
||||||
/X\C{3,5}?/utf
|
|
||||||
Xabcdefg
|
|
||||||
0: Xabc
|
|
||||||
X\x{11234}Y
|
|
||||||
0: X\x{11234}Y
|
|
||||||
X\x{11234}YZ
|
|
||||||
0: X\x{11234}Y
|
|
||||||
X\x{11234}\x{512}YZ
|
|
||||||
0: X\x{11234}\x{512}
|
|
||||||
\= Expect no match
|
|
||||||
X\x{11234}
|
|
||||||
No match
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
aXb
|
|
||||||
0: aXb
|
|
||||||
a\nb
|
|
||||||
0: a\x{0a}b
|
|
||||||
|
|
||||||
/a\C\Cb/utf
|
|
||||||
a\x{12257}b
|
|
||||||
0: a\x{12257}b
|
|
||||||
\= Expect no match
|
|
||||||
a\x{12257}\x{11234}b
|
|
||||||
No match
|
|
||||||
a\x{100}b
|
|
||||||
No match
|
|
||||||
|
|
||||||
/ab\Cde/utf
|
|
||||||
abXde
|
|
||||||
0: abXde
|
|
||||||
|
|
||||||
# Check maximum character size
|
# Check maximum character size
|
||||||
|
|
||||||
/\x{ffff}/IB,utf
|
/\x{ffff}/IB,utf
|
||||||
|
@ -308,23 +238,6 @@ First code unit = \x{d844}
|
||||||
Last code unit = \x{deab}
|
Last code unit = \x{deab}
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
# These two \C tests, copied from the UTF-8 input file, do not have any
|
|
||||||
# problems in 16 or 32 bits.
|
|
||||||
|
|
||||||
/X(\C)(.*)/utf
|
|
||||||
X\x{1234}
|
|
||||||
0: X\x{1234}
|
|
||||||
1: \x{1234}
|
|
||||||
2:
|
|
||||||
X\nabc
|
|
||||||
0: X\x{0a}abc
|
|
||||||
1: \x{0a}
|
|
||||||
2: abc
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
a\x{100}b
|
|
||||||
0: a\x{100}b
|
|
||||||
|
|
||||||
/[^ab\xC0-\xF0]/IB,utf
|
/[^ab\xC0-\xF0]/IB,utf
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
@ -1127,10 +1040,6 @@ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too
|
||||||
/\o{4200000}/utf
|
/\o{4200000}/utf
|
||||||
Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
|
Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
/\C/utf
|
|
||||||
\x{110000}
|
|
||||||
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
|
||||||
|
|
||||||
/\x{100}*A/IB,utf
|
/\x{100}*A/IB,utf
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
@ -1454,4 +1363,8 @@ Starting code units: \xff
|
||||||
Last code unit = 'B' (caseless)
|
Last code unit = 'B' (caseless)
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
/./utf
|
||||||
|
\x{110000}
|
||||||
|
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -9,74 +9,6 @@
|
||||||
Ã]
|
Ã]
|
||||||
** Failed: invalid UTF-8 string cannot be used as input in UTF mode
|
** Failed: invalid UTF-8 string cannot be used as input in UTF mode
|
||||||
|
|
||||||
/X(\C{3})/utf
|
|
||||||
X\x{11234}Y
|
|
||||||
No match
|
|
||||||
X\x{11234}YZ
|
|
||||||
0: X\x{11234}YZ
|
|
||||||
1: \x{11234}YZ
|
|
||||||
|
|
||||||
/X(\C{4})/utf
|
|
||||||
X\x{11234}YZ
|
|
||||||
No match
|
|
||||||
X\x{11234}YZW
|
|
||||||
0: X\x{11234}YZW
|
|
||||||
1: \x{11234}YZW
|
|
||||||
|
|
||||||
/X\C*/utf
|
|
||||||
XYZabcdce
|
|
||||||
0: XYZabcdce
|
|
||||||
|
|
||||||
/X\C*?/utf
|
|
||||||
XYZabcde
|
|
||||||
0: X
|
|
||||||
|
|
||||||
/X\C{3,5}/utf
|
|
||||||
Xabcdefg
|
|
||||||
0: Xabcde
|
|
||||||
X\x{11234}Y
|
|
||||||
No match
|
|
||||||
X\x{11234}YZ
|
|
||||||
0: X\x{11234}YZ
|
|
||||||
X\x{11234}\x{512}
|
|
||||||
No match
|
|
||||||
X\x{11234}\x{512}YZ
|
|
||||||
0: X\x{11234}\x{512}YZ
|
|
||||||
X\x{11234}\x{512}\x{11234}Z
|
|
||||||
0: X\x{11234}\x{512}\x{11234}Z
|
|
||||||
|
|
||||||
/X\C{3,5}?/utf
|
|
||||||
Xabcdefg
|
|
||||||
0: Xabc
|
|
||||||
X\x{11234}Y
|
|
||||||
No match
|
|
||||||
X\x{11234}YZ
|
|
||||||
0: X\x{11234}YZ
|
|
||||||
X\x{11234}\x{512}YZ
|
|
||||||
0: X\x{11234}\x{512}Y
|
|
||||||
\= Expect no match
|
|
||||||
X\x{11234}
|
|
||||||
No match
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
aXb
|
|
||||||
0: aXb
|
|
||||||
a\nb
|
|
||||||
0: a\x{0a}b
|
|
||||||
|
|
||||||
/a\C\Cb/utf
|
|
||||||
a\x{12257}b
|
|
||||||
No match
|
|
||||||
\= Expect no match
|
|
||||||
a\x{12257}\x{11234}b
|
|
||||||
0: a\x{12257}\x{11234}b
|
|
||||||
a\x{100}b
|
|
||||||
No match
|
|
||||||
|
|
||||||
/ab\Cde/utf
|
|
||||||
abXde
|
|
||||||
0: abXde
|
|
||||||
|
|
||||||
# Check maximum character size
|
# Check maximum character size
|
||||||
|
|
||||||
/\x{ffff}/IB,utf
|
/\x{ffff}/IB,utf
|
||||||
|
@ -301,23 +233,6 @@ Options: utf
|
||||||
First code unit = \x{212ab}
|
First code unit = \x{212ab}
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
# These two \C tests, copied from the UTF-8 input file, do not have any
|
|
||||||
# problems in 16 or 32 bits.
|
|
||||||
|
|
||||||
/X(\C)(.*)/utf
|
|
||||||
X\x{1234}
|
|
||||||
0: X\x{1234}
|
|
||||||
1: \x{1234}
|
|
||||||
2:
|
|
||||||
X\nabc
|
|
||||||
0: X\x{0a}abc
|
|
||||||
1: \x{0a}
|
|
||||||
2: abc
|
|
||||||
|
|
||||||
/a\Cb/utf
|
|
||||||
a\x{100}b
|
|
||||||
0: a\x{100}b
|
|
||||||
|
|
||||||
/[^ab\xC0-\xF0]/IB,utf
|
/[^ab\xC0-\xF0]/IB,utf
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
@ -1119,10 +1034,6 @@ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too
|
||||||
/\o{4200000}/utf
|
/\o{4200000}/utf
|
||||||
Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
|
Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
/\C/utf
|
|
||||||
\x{110000}
|
|
||||||
Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0
|
|
||||||
|
|
||||||
/\x{100}*A/IB,utf
|
/\x{100}*A/IB,utf
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
@ -1446,4 +1357,8 @@ Starting code units: \xff
|
||||||
Last code unit = 'B' (caseless)
|
Last code unit = 'B' (caseless)
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
/./utf
|
||||||
|
\x{110000}
|
||||||
|
Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -11948,9 +11948,10 @@ Subject length lower bound = 5
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
# A complete set of tests for auto-possessification of character types.
|
# A complete set of tests for auto-possessification of character types, but
|
||||||
|
# omitting \C because it might be disabled (it has its own tests).
|
||||||
|
|
||||||
/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\C \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx
|
/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
\D+
|
\D+
|
||||||
|
@ -11968,8 +11969,6 @@ Subject length lower bound = 5
|
||||||
\D+
|
\D+
|
||||||
Any
|
Any
|
||||||
\D+
|
\D+
|
||||||
AllAny
|
|
||||||
\D+
|
|
||||||
\R
|
\R
|
||||||
\D+
|
\D+
|
||||||
\H
|
\H
|
||||||
|
@ -11989,7 +11988,7 @@ Subject length lower bound = 5
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\C \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx
|
/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
\d++
|
\d++
|
||||||
|
@ -12006,8 +12005,6 @@ Subject length lower bound = 5
|
||||||
\w
|
\w
|
||||||
\d+
|
\d+
|
||||||
Any
|
Any
|
||||||
\d+
|
|
||||||
AllAny
|
|
||||||
\d++
|
\d++
|
||||||
\R
|
\R
|
||||||
\d+
|
\d+
|
||||||
|
@ -12028,7 +12025,7 @@ Subject length lower bound = 5
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\C \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx
|
/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
\S+
|
\S+
|
||||||
|
@ -12045,8 +12042,6 @@ Subject length lower bound = 5
|
||||||
\w
|
\w
|
||||||
\S+
|
\S+
|
||||||
Any
|
Any
|
||||||
\S+
|
|
||||||
AllAny
|
|
||||||
\S++
|
\S++
|
||||||
\R
|
\R
|
||||||
\S+
|
\S+
|
||||||
|
@ -12067,7 +12062,7 @@ Subject length lower bound = 5
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\C \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx
|
/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
\s+
|
\s+
|
||||||
|
@ -12085,8 +12080,6 @@ Subject length lower bound = 5
|
||||||
\s+
|
\s+
|
||||||
Any
|
Any
|
||||||
\s+
|
\s+
|
||||||
AllAny
|
|
||||||
\s+
|
|
||||||
\R
|
\R
|
||||||
\s+
|
\s+
|
||||||
\H
|
\H
|
||||||
|
@ -12106,7 +12099,7 @@ Subject length lower bound = 5
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\C \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx
|
/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
\W+
|
\W+
|
||||||
|
@ -12124,8 +12117,6 @@ Subject length lower bound = 5
|
||||||
\W+
|
\W+
|
||||||
Any
|
Any
|
||||||
\W+
|
\W+
|
||||||
AllAny
|
|
||||||
\W+
|
|
||||||
\R
|
\R
|
||||||
\W+
|
\W+
|
||||||
\H
|
\H
|
||||||
|
@ -12145,7 +12136,7 @@ Subject length lower bound = 5
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\C \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx
|
/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
\w+
|
\w+
|
||||||
|
@ -12162,8 +12153,6 @@ Subject length lower bound = 5
|
||||||
\w
|
\w
|
||||||
\w+
|
\w+
|
||||||
Any
|
Any
|
||||||
\w+
|
|
||||||
AllAny
|
|
||||||
\w++
|
\w++
|
||||||
\R
|
\R
|
||||||
\w+
|
\w+
|
||||||
|
@ -12184,7 +12173,303 @@ Subject length lower bound = 5
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\C \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx
|
/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\R+
|
||||||
|
\D
|
||||||
|
\R++
|
||||||
|
\d
|
||||||
|
\R+
|
||||||
|
\S
|
||||||
|
\R++
|
||||||
|
\s
|
||||||
|
\R+
|
||||||
|
\W
|
||||||
|
\R++
|
||||||
|
\w
|
||||||
|
\R++
|
||||||
|
Any
|
||||||
|
\R+
|
||||||
|
\R
|
||||||
|
\R+
|
||||||
|
\H
|
||||||
|
\R++
|
||||||
|
\h
|
||||||
|
\R+
|
||||||
|
\V
|
||||||
|
\R+
|
||||||
|
\v
|
||||||
|
\R+
|
||||||
|
\Z
|
||||||
|
\R++
|
||||||
|
\z
|
||||||
|
\R+
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\H+
|
||||||
|
\D
|
||||||
|
\H+
|
||||||
|
\d
|
||||||
|
\H+
|
||||||
|
\S
|
||||||
|
\H+
|
||||||
|
\s
|
||||||
|
\H+
|
||||||
|
\W
|
||||||
|
\H+
|
||||||
|
\w
|
||||||
|
\H+
|
||||||
|
Any
|
||||||
|
\H+
|
||||||
|
\R
|
||||||
|
\H+
|
||||||
|
\H
|
||||||
|
\H++
|
||||||
|
\h
|
||||||
|
\H+
|
||||||
|
\V
|
||||||
|
\H+
|
||||||
|
\v
|
||||||
|
\H+
|
||||||
|
\Z
|
||||||
|
\H++
|
||||||
|
\z
|
||||||
|
\H+
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\h+
|
||||||
|
\D
|
||||||
|
\h++
|
||||||
|
\d
|
||||||
|
\h++
|
||||||
|
\S
|
||||||
|
\h+
|
||||||
|
\s
|
||||||
|
\h+
|
||||||
|
\W
|
||||||
|
\h++
|
||||||
|
\w
|
||||||
|
\h+
|
||||||
|
Any
|
||||||
|
\h++
|
||||||
|
\R
|
||||||
|
\h++
|
||||||
|
\H
|
||||||
|
\h+
|
||||||
|
\h
|
||||||
|
\h+
|
||||||
|
\V
|
||||||
|
\h++
|
||||||
|
\v
|
||||||
|
\h+
|
||||||
|
\Z
|
||||||
|
\h++
|
||||||
|
\z
|
||||||
|
\h+
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\V+
|
||||||
|
\D
|
||||||
|
\V+
|
||||||
|
\d
|
||||||
|
\V+
|
||||||
|
\S
|
||||||
|
\V+
|
||||||
|
\s
|
||||||
|
\V+
|
||||||
|
\W
|
||||||
|
\V+
|
||||||
|
\w
|
||||||
|
\V+
|
||||||
|
Any
|
||||||
|
\V++
|
||||||
|
\R
|
||||||
|
\V+
|
||||||
|
\H
|
||||||
|
\V+
|
||||||
|
\h
|
||||||
|
\V+
|
||||||
|
\V
|
||||||
|
\V++
|
||||||
|
\v
|
||||||
|
\V+
|
||||||
|
\Z
|
||||||
|
\V++
|
||||||
|
\z
|
||||||
|
\V+
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\v+
|
||||||
|
\D
|
||||||
|
\v++
|
||||||
|
\d
|
||||||
|
\v++
|
||||||
|
\S
|
||||||
|
\v+
|
||||||
|
\s
|
||||||
|
\v+
|
||||||
|
\W
|
||||||
|
\v++
|
||||||
|
\w
|
||||||
|
\v+
|
||||||
|
Any
|
||||||
|
\v+
|
||||||
|
\R
|
||||||
|
\v+
|
||||||
|
\H
|
||||||
|
\v++
|
||||||
|
\h
|
||||||
|
\v++
|
||||||
|
\V
|
||||||
|
\v+
|
||||||
|
\v
|
||||||
|
\v+
|
||||||
|
\Z
|
||||||
|
\v++
|
||||||
|
\z
|
||||||
|
\v+
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
a+
|
||||||
|
\D
|
||||||
|
a++
|
||||||
|
\d
|
||||||
|
a+
|
||||||
|
\S
|
||||||
|
a++
|
||||||
|
\s
|
||||||
|
a++
|
||||||
|
\W
|
||||||
|
a+
|
||||||
|
\w
|
||||||
|
a+
|
||||||
|
Any
|
||||||
|
a++
|
||||||
|
\R
|
||||||
|
a+
|
||||||
|
\H
|
||||||
|
a++
|
||||||
|
\h
|
||||||
|
a+
|
||||||
|
\V
|
||||||
|
a++
|
||||||
|
\v
|
||||||
|
a++
|
||||||
|
\Z
|
||||||
|
a++
|
||||||
|
\z
|
||||||
|
a++
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\x0a+
|
||||||
|
\D
|
||||||
|
\x0a++
|
||||||
|
\d
|
||||||
|
\x0a++
|
||||||
|
\S
|
||||||
|
\x0a+
|
||||||
|
\s
|
||||||
|
\x0a+
|
||||||
|
\W
|
||||||
|
\x0a++
|
||||||
|
\w
|
||||||
|
\x0a+
|
||||||
|
Any
|
||||||
|
\x0a+
|
||||||
|
\R
|
||||||
|
\x0a+
|
||||||
|
\H
|
||||||
|
\x0a++
|
||||||
|
\h
|
||||||
|
\x0a++
|
||||||
|
\V
|
||||||
|
\x0a+
|
||||||
|
\v
|
||||||
|
\x0a+
|
||||||
|
\Z
|
||||||
|
\x0a++
|
||||||
|
\z
|
||||||
|
\x0a+
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Any+
|
||||||
|
\D
|
||||||
|
Any+
|
||||||
|
\d
|
||||||
|
Any+
|
||||||
|
\S
|
||||||
|
Any+
|
||||||
|
\s
|
||||||
|
Any+
|
||||||
|
\W
|
||||||
|
Any+
|
||||||
|
\w
|
||||||
|
Any+
|
||||||
|
Any
|
||||||
|
Any++
|
||||||
|
\R
|
||||||
|
Any+
|
||||||
|
\H
|
||||||
|
Any+
|
||||||
|
\h
|
||||||
|
Any+
|
||||||
|
\V
|
||||||
|
Any+
|
||||||
|
\v
|
||||||
|
Any+
|
||||||
|
\Z
|
||||||
|
Any++
|
||||||
|
\z
|
||||||
|
Any+
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
AllAny+
|
AllAny+
|
||||||
|
@ -12200,8 +12485,6 @@ Subject length lower bound = 5
|
||||||
AllAny+
|
AllAny+
|
||||||
\w
|
\w
|
||||||
AllAny+
|
AllAny+
|
||||||
Any
|
|
||||||
AllAny+
|
|
||||||
AllAny
|
AllAny
|
||||||
AllAny+
|
AllAny+
|
||||||
\R
|
\R
|
||||||
|
@ -12223,358 +12506,7 @@ Subject length lower bound = 5
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\C \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx
|
/ \D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
\R+
|
|
||||||
\D
|
|
||||||
\R++
|
|
||||||
\d
|
|
||||||
\R+
|
|
||||||
\S
|
|
||||||
\R++
|
|
||||||
\s
|
|
||||||
\R+
|
|
||||||
\W
|
|
||||||
\R++
|
|
||||||
\w
|
|
||||||
\R++
|
|
||||||
Any
|
|
||||||
\R+
|
|
||||||
AllAny
|
|
||||||
\R+
|
|
||||||
\R
|
|
||||||
\R+
|
|
||||||
\H
|
|
||||||
\R++
|
|
||||||
\h
|
|
||||||
\R+
|
|
||||||
\V
|
|
||||||
\R+
|
|
||||||
\v
|
|
||||||
\R+
|
|
||||||
\Z
|
|
||||||
\R++
|
|
||||||
\z
|
|
||||||
\R+
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\C \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
\H+
|
|
||||||
\D
|
|
||||||
\H+
|
|
||||||
\d
|
|
||||||
\H+
|
|
||||||
\S
|
|
||||||
\H+
|
|
||||||
\s
|
|
||||||
\H+
|
|
||||||
\W
|
|
||||||
\H+
|
|
||||||
\w
|
|
||||||
\H+
|
|
||||||
Any
|
|
||||||
\H+
|
|
||||||
AllAny
|
|
||||||
\H+
|
|
||||||
\R
|
|
||||||
\H+
|
|
||||||
\H
|
|
||||||
\H++
|
|
||||||
\h
|
|
||||||
\H+
|
|
||||||
\V
|
|
||||||
\H+
|
|
||||||
\v
|
|
||||||
\H+
|
|
||||||
\Z
|
|
||||||
\H++
|
|
||||||
\z
|
|
||||||
\H+
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\C \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
\h+
|
|
||||||
\D
|
|
||||||
\h++
|
|
||||||
\d
|
|
||||||
\h++
|
|
||||||
\S
|
|
||||||
\h+
|
|
||||||
\s
|
|
||||||
\h+
|
|
||||||
\W
|
|
||||||
\h++
|
|
||||||
\w
|
|
||||||
\h+
|
|
||||||
Any
|
|
||||||
\h+
|
|
||||||
AllAny
|
|
||||||
\h++
|
|
||||||
\R
|
|
||||||
\h++
|
|
||||||
\H
|
|
||||||
\h+
|
|
||||||
\h
|
|
||||||
\h+
|
|
||||||
\V
|
|
||||||
\h++
|
|
||||||
\v
|
|
||||||
\h+
|
|
||||||
\Z
|
|
||||||
\h++
|
|
||||||
\z
|
|
||||||
\h+
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\C \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
\V+
|
|
||||||
\D
|
|
||||||
\V+
|
|
||||||
\d
|
|
||||||
\V+
|
|
||||||
\S
|
|
||||||
\V+
|
|
||||||
\s
|
|
||||||
\V+
|
|
||||||
\W
|
|
||||||
\V+
|
|
||||||
\w
|
|
||||||
\V+
|
|
||||||
Any
|
|
||||||
\V+
|
|
||||||
AllAny
|
|
||||||
\V++
|
|
||||||
\R
|
|
||||||
\V+
|
|
||||||
\H
|
|
||||||
\V+
|
|
||||||
\h
|
|
||||||
\V+
|
|
||||||
\V
|
|
||||||
\V++
|
|
||||||
\v
|
|
||||||
\V+
|
|
||||||
\Z
|
|
||||||
\V++
|
|
||||||
\z
|
|
||||||
\V+
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\C \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
\v+
|
|
||||||
\D
|
|
||||||
\v++
|
|
||||||
\d
|
|
||||||
\v++
|
|
||||||
\S
|
|
||||||
\v+
|
|
||||||
\s
|
|
||||||
\v+
|
|
||||||
\W
|
|
||||||
\v++
|
|
||||||
\w
|
|
||||||
\v+
|
|
||||||
Any
|
|
||||||
\v+
|
|
||||||
AllAny
|
|
||||||
\v+
|
|
||||||
\R
|
|
||||||
\v+
|
|
||||||
\H
|
|
||||||
\v++
|
|
||||||
\h
|
|
||||||
\v++
|
|
||||||
\V
|
|
||||||
\v+
|
|
||||||
\v
|
|
||||||
\v+
|
|
||||||
\Z
|
|
||||||
\v++
|
|
||||||
\z
|
|
||||||
\v+
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\C a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
a+
|
|
||||||
\D
|
|
||||||
a++
|
|
||||||
\d
|
|
||||||
a+
|
|
||||||
\S
|
|
||||||
a++
|
|
||||||
\s
|
|
||||||
a++
|
|
||||||
\W
|
|
||||||
a+
|
|
||||||
\w
|
|
||||||
a+
|
|
||||||
Any
|
|
||||||
a+
|
|
||||||
AllAny
|
|
||||||
a++
|
|
||||||
\R
|
|
||||||
a+
|
|
||||||
\H
|
|
||||||
a++
|
|
||||||
\h
|
|
||||||
a+
|
|
||||||
\V
|
|
||||||
a++
|
|
||||||
\v
|
|
||||||
a++
|
|
||||||
\Z
|
|
||||||
a++
|
|
||||||
\z
|
|
||||||
a++
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\C \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
\x0a+
|
|
||||||
\D
|
|
||||||
\x0a++
|
|
||||||
\d
|
|
||||||
\x0a++
|
|
||||||
\S
|
|
||||||
\x0a+
|
|
||||||
\s
|
|
||||||
\x0a+
|
|
||||||
\W
|
|
||||||
\x0a++
|
|
||||||
\w
|
|
||||||
\x0a+
|
|
||||||
Any
|
|
||||||
\x0a+
|
|
||||||
AllAny
|
|
||||||
\x0a+
|
|
||||||
\R
|
|
||||||
\x0a+
|
|
||||||
\H
|
|
||||||
\x0a++
|
|
||||||
\h
|
|
||||||
\x0a++
|
|
||||||
\V
|
|
||||||
\x0a+
|
|
||||||
\v
|
|
||||||
\x0a+
|
|
||||||
\Z
|
|
||||||
\x0a++
|
|
||||||
\z
|
|
||||||
\x0a+
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\C .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
Any+
|
|
||||||
\D
|
|
||||||
Any+
|
|
||||||
\d
|
|
||||||
Any+
|
|
||||||
\S
|
|
||||||
Any+
|
|
||||||
\s
|
|
||||||
Any+
|
|
||||||
\W
|
|
||||||
Any+
|
|
||||||
\w
|
|
||||||
Any+
|
|
||||||
Any
|
|
||||||
Any+
|
|
||||||
AllAny
|
|
||||||
Any++
|
|
||||||
\R
|
|
||||||
Any+
|
|
||||||
\H
|
|
||||||
Any+
|
|
||||||
\h
|
|
||||||
Any+
|
|
||||||
\V
|
|
||||||
Any+
|
|
||||||
\v
|
|
||||||
Any+
|
|
||||||
\Z
|
|
||||||
Any++
|
|
||||||
\z
|
|
||||||
Any+
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\C .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
AllAny+
|
|
||||||
\D
|
|
||||||
AllAny+
|
|
||||||
\d
|
|
||||||
AllAny+
|
|
||||||
\S
|
|
||||||
AllAny+
|
|
||||||
\s
|
|
||||||
AllAny+
|
|
||||||
\W
|
|
||||||
AllAny+
|
|
||||||
\w
|
|
||||||
AllAny+
|
|
||||||
AllAny
|
|
||||||
AllAny+
|
|
||||||
AllAny
|
|
||||||
AllAny+
|
|
||||||
\R
|
|
||||||
AllAny+
|
|
||||||
\H
|
|
||||||
AllAny+
|
|
||||||
\h
|
|
||||||
AllAny+
|
|
||||||
\V
|
|
||||||
AllAny+
|
|
||||||
\v
|
|
||||||
AllAny+
|
|
||||||
\Z
|
|
||||||
AllAny++
|
|
||||||
\z
|
|
||||||
AllAny+
|
|
||||||
$
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
/\D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \C+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx
|
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
\D+
|
\D+
|
||||||
|
@ -12588,8 +12520,6 @@ Subject length lower bound = 5
|
||||||
\W+
|
\W+
|
||||||
/m $
|
/m $
|
||||||
\w++
|
\w++
|
||||||
/m $
|
|
||||||
AllAny+
|
|
||||||
/m $
|
/m $
|
||||||
\R+
|
\R+
|
||||||
/m $
|
/m $
|
||||||
|
@ -14210,9 +14140,6 @@ Capturing subpattern count = 2
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 0
|
Subject length lower bound = 0
|
||||||
|
|
||||||
/ab\Cde/never_backslash_c
|
|
||||||
Failed: error 183 at offset 3: using \C is disabled by the application
|
|
||||||
|
|
||||||
/abc/
|
/abc/
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
\[9x!xxx(]{9999}
|
\[9x!xxx(]{9999}
|
||||||
|
@ -14532,14 +14459,6 @@ Subject length lower bound = 0
|
||||||
\0{ab}
|
\0{ab}
|
||||||
0: \x00{ab}
|
0: \x00{ab}
|
||||||
|
|
||||||
/ab\Cde/
|
|
||||||
abXde
|
|
||||||
0: abXde
|
|
||||||
|
|
||||||
/(?<=ab\Cde)X/
|
|
||||||
abZdeX
|
|
||||||
0: X
|
|
||||||
|
|
||||||
/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/
|
/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/
|
||||||
ababababbbabZXXXX
|
ababababbbabZXXXX
|
||||||
0: ababababbbabZ
|
0: ababababbbabZ
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
# These are tests of \C that do not involve UTF. They are not run when \C is
|
||||||
|
# disabled by compiling with --enable-never-backslash-C.
|
||||||
|
|
||||||
|
/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
AllAny+
|
||||||
|
\D
|
||||||
|
AllAny+
|
||||||
|
\d
|
||||||
|
AllAny+
|
||||||
|
\S
|
||||||
|
AllAny+
|
||||||
|
\s
|
||||||
|
AllAny+
|
||||||
|
\W
|
||||||
|
AllAny+
|
||||||
|
\w
|
||||||
|
AllAny+
|
||||||
|
Any
|
||||||
|
AllAny+
|
||||||
|
\R
|
||||||
|
AllAny+
|
||||||
|
\H
|
||||||
|
AllAny+
|
||||||
|
\h
|
||||||
|
AllAny+
|
||||||
|
\V
|
||||||
|
AllAny+
|
||||||
|
\v
|
||||||
|
AllAny+
|
||||||
|
\Z
|
||||||
|
AllAny++
|
||||||
|
\z
|
||||||
|
AllAny+
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\D+\C \d+\C \S+\C \s+\C \W+\C \w+\C .+\C \R+\C \H+\C \h+\C \V+\C \v+\C a+\C \n+\C \C+\C/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\D+
|
||||||
|
AllAny
|
||||||
|
\d+
|
||||||
|
AllAny
|
||||||
|
\S+
|
||||||
|
AllAny
|
||||||
|
\s+
|
||||||
|
AllAny
|
||||||
|
\W+
|
||||||
|
AllAny
|
||||||
|
\w+
|
||||||
|
AllAny
|
||||||
|
Any+
|
||||||
|
AllAny
|
||||||
|
\R+
|
||||||
|
AllAny
|
||||||
|
\H+
|
||||||
|
AllAny
|
||||||
|
\h+
|
||||||
|
AllAny
|
||||||
|
\V+
|
||||||
|
AllAny
|
||||||
|
\v+
|
||||||
|
AllAny
|
||||||
|
a+
|
||||||
|
AllAny
|
||||||
|
\x0a+
|
||||||
|
AllAny
|
||||||
|
AllAny+
|
||||||
|
AllAny
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/ab\Cde/never_backslash_c
|
||||||
|
Failed: error 183 at offset 3: using \C is disabled by the application
|
||||||
|
|
||||||
|
/ab\Cde/
|
||||||
|
abXde
|
||||||
|
0: abXde
|
||||||
|
|
||||||
|
/(?<=ab\Cde)X/
|
||||||
|
abZdeX
|
||||||
|
0: X
|
||||||
|
|
||||||
|
# End of testinput21
|
|
@ -0,0 +1,161 @@
|
||||||
|
# Tests of \C when Unicode support is available. Note that \C is not supported
|
||||||
|
# for DFA matching in UTF mode, so this test is not run with -dfa. The output
|
||||||
|
# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
|
||||||
|
# in some widths and not in others.
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
0: abXde
|
||||||
|
|
||||||
|
# This should produce an error diagnostic (\C in UTF lookbehind)
|
||||||
|
|
||||||
|
/(?<=ab\Cde)X/utf
|
||||||
|
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
|
||||||
|
|
||||||
|
# Autopossessification tests
|
||||||
|
|
||||||
|
/\C+\X \X+\C/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
AllAny+
|
||||||
|
extuni
|
||||||
|
extuni+
|
||||||
|
AllAny
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\C+\X \X+\C/Bx,utf
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Anybyte+
|
||||||
|
extuni
|
||||||
|
extuni+
|
||||||
|
Anybyte
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\C\X*TӅ;
|
||||||
|
{0,6}\v+
F
|
||||||
|
/utf
|
||||||
|
\= Expect no match
|
||||||
|
Ӆ\x0a
|
||||||
|
No match
|
||||||
|
|
||||||
|
/\C(\W?ſ)'?{{/utf
|
||||||
|
\= Expect no match
|
||||||
|
\\C(\\W?ſ)'?{{
|
||||||
|
No match
|
||||||
|
|
||||||
|
/X(\C{3})/utf
|
||||||
|
X\x{1234}
|
||||||
|
No match
|
||||||
|
X\x{11234}Y
|
||||||
|
0: X\x{11234}Y
|
||||||
|
1: \x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}Y
|
||||||
|
1: \x{11234}Y
|
||||||
|
|
||||||
|
/X(\C{4})/utf
|
||||||
|
X\x{1234}YZ
|
||||||
|
No match
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}YZ
|
||||||
|
1: \x{11234}YZ
|
||||||
|
X\x{11234}YZW
|
||||||
|
0: X\x{11234}YZ
|
||||||
|
1: \x{11234}YZ
|
||||||
|
|
||||||
|
/X\C*/utf
|
||||||
|
XYZabcdce
|
||||||
|
0: XYZabcdce
|
||||||
|
|
||||||
|
/X\C*?/utf
|
||||||
|
XYZabcde
|
||||||
|
0: X
|
||||||
|
|
||||||
|
/X\C{3,5}/utf
|
||||||
|
Xabcdefg
|
||||||
|
0: Xabcde
|
||||||
|
X\x{1234}
|
||||||
|
No match
|
||||||
|
X\x{1234}YZ
|
||||||
|
0: X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
No match
|
||||||
|
X\x{1234}\x{512}YZ
|
||||||
|
0: X\x{1234}\x{512}YZ
|
||||||
|
X\x{11234}Y
|
||||||
|
0: X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}YZ
|
||||||
|
X\x{11234}\x{512}
|
||||||
|
0: X\x{11234}\x{512}
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
0: X\x{11234}\x{512}YZ
|
||||||
|
X\x{11234}\x{512}\x{11234}Z
|
||||||
|
0: X\x{11234}\x{512}\x{11234}
|
||||||
|
|
||||||
|
/X\C{3,5}?/utf
|
||||||
|
Xabcdefg
|
||||||
|
0: Xabc
|
||||||
|
X\x{1234}
|
||||||
|
No match
|
||||||
|
X\x{1234}YZ
|
||||||
|
0: X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
No match
|
||||||
|
X\x{11234}Y
|
||||||
|
0: X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}Y
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
0: X\x{11234}\x{512}
|
||||||
|
X\x{11234}
|
||||||
|
No match
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
aXb
|
||||||
|
0: aXb
|
||||||
|
a\nb
|
||||||
|
0: a\x{0a}b
|
||||||
|
a\x{100}b
|
||||||
|
0: a\x{100}b
|
||||||
|
|
||||||
|
/a\C\Cb/utf
|
||||||
|
a\x{100}b
|
||||||
|
No match
|
||||||
|
a\x{12257}b
|
||||||
|
0: a\x{12257}b
|
||||||
|
a\x{12257}\x{11234}b
|
||||||
|
No match
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
0: abXde
|
||||||
|
|
||||||
|
# This one is here not because it's different to Perl, but because the way
|
||||||
|
# the captured single code unit is displayed. (In Perl it becomes a character,
|
||||||
|
# and you can't tell the difference.)
|
||||||
|
|
||||||
|
/X(\C)(.*)/utf
|
||||||
|
X\x{1234}
|
||||||
|
0: X\x{1234}
|
||||||
|
1: \x{1234}
|
||||||
|
2:
|
||||||
|
X\nabc
|
||||||
|
0: X\x{0a}abc
|
||||||
|
1: \x{0a}
|
||||||
|
2: abc
|
||||||
|
|
||||||
|
# This one is here because Perl gives out a grumbly error message (quite
|
||||||
|
# correctly, but that messes up comparisons).
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
\= Expect no match in 8-bit mode
|
||||||
|
a\x{100}b
|
||||||
|
0: a\x{100}b
|
||||||
|
|
||||||
|
# End of testinput22
|
|
@ -0,0 +1,159 @@
|
||||||
|
# Tests of \C when Unicode support is available. Note that \C is not supported
|
||||||
|
# for DFA matching in UTF mode, so this test is not run with -dfa. The output
|
||||||
|
# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
|
||||||
|
# in some widths and not in others.
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
0: abXde
|
||||||
|
|
||||||
|
# This should produce an error diagnostic (\C in UTF lookbehind)
|
||||||
|
|
||||||
|
/(?<=ab\Cde)X/utf
|
||||||
|
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
|
||||||
|
|
||||||
|
# Autopossessification tests
|
||||||
|
|
||||||
|
/\C+\X \X+\C/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
AllAny+
|
||||||
|
extuni
|
||||||
|
extuni+
|
||||||
|
AllAny
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\C+\X \X+\C/Bx,utf
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Anybyte+
|
||||||
|
extuni
|
||||||
|
extuni+
|
||||||
|
Anybyte
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\C\X*TӅ;
|
||||||
|
{0,6}\v+
F
|
||||||
|
/utf
|
||||||
|
\= Expect no match
|
||||||
|
Ӆ\x0a
|
||||||
|
No match
|
||||||
|
|
||||||
|
/\C(\W?ſ)'?{{/utf
|
||||||
|
\= Expect no match
|
||||||
|
\\C(\\W?ſ)'?{{
|
||||||
|
No match
|
||||||
|
|
||||||
|
/X(\C{3})/utf
|
||||||
|
X\x{1234}
|
||||||
|
No match
|
||||||
|
X\x{11234}Y
|
||||||
|
No match
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}YZ
|
||||||
|
1: \x{11234}YZ
|
||||||
|
|
||||||
|
/X(\C{4})/utf
|
||||||
|
X\x{1234}YZ
|
||||||
|
No match
|
||||||
|
X\x{11234}YZ
|
||||||
|
No match
|
||||||
|
X\x{11234}YZW
|
||||||
|
0: X\x{11234}YZW
|
||||||
|
1: \x{11234}YZW
|
||||||
|
|
||||||
|
/X\C*/utf
|
||||||
|
XYZabcdce
|
||||||
|
0: XYZabcdce
|
||||||
|
|
||||||
|
/X\C*?/utf
|
||||||
|
XYZabcde
|
||||||
|
0: X
|
||||||
|
|
||||||
|
/X\C{3,5}/utf
|
||||||
|
Xabcdefg
|
||||||
|
0: Xabcde
|
||||||
|
X\x{1234}
|
||||||
|
No match
|
||||||
|
X\x{1234}YZ
|
||||||
|
0: X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
No match
|
||||||
|
X\x{1234}\x{512}YZ
|
||||||
|
0: X\x{1234}\x{512}YZ
|
||||||
|
X\x{11234}Y
|
||||||
|
No match
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}YZ
|
||||||
|
X\x{11234}\x{512}
|
||||||
|
No match
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
0: X\x{11234}\x{512}YZ
|
||||||
|
X\x{11234}\x{512}\x{11234}Z
|
||||||
|
0: X\x{11234}\x{512}\x{11234}Z
|
||||||
|
|
||||||
|
/X\C{3,5}?/utf
|
||||||
|
Xabcdefg
|
||||||
|
0: Xabc
|
||||||
|
X\x{1234}
|
||||||
|
No match
|
||||||
|
X\x{1234}YZ
|
||||||
|
0: X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
No match
|
||||||
|
X\x{11234}Y
|
||||||
|
No match
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}YZ
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
0: X\x{11234}\x{512}Y
|
||||||
|
X\x{11234}
|
||||||
|
No match
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
aXb
|
||||||
|
0: aXb
|
||||||
|
a\nb
|
||||||
|
0: a\x{0a}b
|
||||||
|
a\x{100}b
|
||||||
|
0: a\x{100}b
|
||||||
|
|
||||||
|
/a\C\Cb/utf
|
||||||
|
a\x{100}b
|
||||||
|
No match
|
||||||
|
a\x{12257}b
|
||||||
|
No match
|
||||||
|
a\x{12257}\x{11234}b
|
||||||
|
0: a\x{12257}\x{11234}b
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
0: abXde
|
||||||
|
|
||||||
|
# This one is here not because it's different to Perl, but because the way
|
||||||
|
# the captured single code unit is displayed. (In Perl it becomes a character,
|
||||||
|
# and you can't tell the difference.)
|
||||||
|
|
||||||
|
/X(\C)(.*)/utf
|
||||||
|
X\x{1234}
|
||||||
|
0: X\x{1234}
|
||||||
|
1: \x{1234}
|
||||||
|
2:
|
||||||
|
X\nabc
|
||||||
|
0: X\x{0a}abc
|
||||||
|
1: \x{0a}
|
||||||
|
2: abc
|
||||||
|
|
||||||
|
# This one is here because Perl gives out a grumbly error message (quite
|
||||||
|
# correctly, but that messes up comparisons).
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
\= Expect no match in 8-bit mode
|
||||||
|
a\x{100}b
|
||||||
|
0: a\x{100}b
|
||||||
|
|
||||||
|
# End of testinput22
|
|
@ -0,0 +1,163 @@
|
||||||
|
# Tests of \C when Unicode support is available. Note that \C is not supported
|
||||||
|
# for DFA matching in UTF mode, so this test is not run with -dfa. The output
|
||||||
|
# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
|
||||||
|
# in some widths and not in others.
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
0: abXde
|
||||||
|
|
||||||
|
# This should produce an error diagnostic (\C in UTF lookbehind)
|
||||||
|
|
||||||
|
/(?<=ab\Cde)X/utf
|
||||||
|
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
|
||||||
|
|
||||||
|
# Autopossessification tests
|
||||||
|
|
||||||
|
/\C+\X \X+\C/Bx
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
AllAny+
|
||||||
|
extuni
|
||||||
|
extuni+
|
||||||
|
AllAny
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\C+\X \X+\C/Bx,utf
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Anybyte+
|
||||||
|
extuni
|
||||||
|
extuni+
|
||||||
|
Anybyte
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\C\X*TӅ;
|
||||||
|
{0,6}\v+
F
|
||||||
|
/utf
|
||||||
|
\= Expect no match
|
||||||
|
Ӆ\x0a
|
||||||
|
No match
|
||||||
|
|
||||||
|
/\C(\W?ſ)'?{{/utf
|
||||||
|
\= Expect no match
|
||||||
|
\\C(\\W?ſ)'?{{
|
||||||
|
No match
|
||||||
|
|
||||||
|
/X(\C{3})/utf
|
||||||
|
X\x{1234}
|
||||||
|
0: X\x{1234}
|
||||||
|
1: \x{1234}
|
||||||
|
X\x{11234}Y
|
||||||
|
0: X\x{f0}\x{91}\x{88}
|
||||||
|
1: \x{f0}\x{91}\x{88}
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{f0}\x{91}\x{88}
|
||||||
|
1: \x{f0}\x{91}\x{88}
|
||||||
|
|
||||||
|
/X(\C{4})/utf
|
||||||
|
X\x{1234}YZ
|
||||||
|
0: X\x{1234}Y
|
||||||
|
1: \x{1234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}
|
||||||
|
1: \x{11234}
|
||||||
|
X\x{11234}YZW
|
||||||
|
0: X\x{11234}
|
||||||
|
1: \x{11234}
|
||||||
|
|
||||||
|
/X\C*/utf
|
||||||
|
XYZabcdce
|
||||||
|
0: XYZabcdce
|
||||||
|
|
||||||
|
/X\C*?/utf
|
||||||
|
XYZabcde
|
||||||
|
0: X
|
||||||
|
|
||||||
|
/X\C{3,5}/utf
|
||||||
|
Xabcdefg
|
||||||
|
0: Xabcde
|
||||||
|
X\x{1234}
|
||||||
|
0: X\x{1234}
|
||||||
|
X\x{1234}YZ
|
||||||
|
0: X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
0: X\x{1234}\x{512}
|
||||||
|
X\x{1234}\x{512}YZ
|
||||||
|
0: X\x{1234}\x{512}
|
||||||
|
X\x{11234}Y
|
||||||
|
0: X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{11234}Y
|
||||||
|
X\x{11234}\x{512}
|
||||||
|
0: X\x{11234}\x{d4}
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
0: X\x{11234}\x{d4}
|
||||||
|
X\x{11234}\x{512}\x{11234}Z
|
||||||
|
0: X\x{11234}\x{d4}
|
||||||
|
|
||||||
|
/X\C{3,5}?/utf
|
||||||
|
Xabcdefg
|
||||||
|
0: Xabc
|
||||||
|
X\x{1234}
|
||||||
|
0: X\x{1234}
|
||||||
|
X\x{1234}YZ
|
||||||
|
0: X\x{1234}
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
0: X\x{1234}
|
||||||
|
X\x{11234}Y
|
||||||
|
0: X\x{f0}\x{91}\x{88}
|
||||||
|
X\x{11234}YZ
|
||||||
|
0: X\x{f0}\x{91}\x{88}
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
0: X\x{f0}\x{91}\x{88}
|
||||||
|
X\x{11234}
|
||||||
|
0: X\x{f0}\x{91}\x{88}
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
aXb
|
||||||
|
0: aXb
|
||||||
|
a\nb
|
||||||
|
0: a\x{0a}b
|
||||||
|
a\x{100}b
|
||||||
|
No match
|
||||||
|
|
||||||
|
/a\C\Cb/utf
|
||||||
|
a\x{100}b
|
||||||
|
0: a\x{100}b
|
||||||
|
a\x{12257}b
|
||||||
|
No match
|
||||||
|
a\x{12257}\x{11234}b
|
||||||
|
No match
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
0: abXde
|
||||||
|
|
||||||
|
# This one is here not because it's different to Perl, but because the way
|
||||||
|
# the captured single code unit is displayed. (In Perl it becomes a character,
|
||||||
|
# and you can't tell the difference.)
|
||||||
|
|
||||||
|
/X(\C)(.*)/utf
|
||||||
|
X\x{1234}
|
||||||
|
0: X\x{1234}
|
||||||
|
1: \x{e1}
|
||||||
|
2: \x{88}\x{b4}
|
||||||
|
X\nabc
|
||||||
|
0: X\x{0a}abc
|
||||||
|
1: \x{0a}
|
||||||
|
2: abc
|
||||||
|
|
||||||
|
# This one is here because Perl gives out a grumbly error message (quite
|
||||||
|
# correctly, but that messes up comparisons).
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
\= Expect no match in 8-bit mode
|
||||||
|
a\x{100}b
|
||||||
|
No match
|
||||||
|
|
||||||
|
# End of testinput22
|
|
@ -0,0 +1,8 @@
|
||||||
|
# This test is run when PCRE2 has been built with --enable-never-backslash-C,
|
||||||
|
# which disables the use of \C. All we can do is check that it gives the
|
||||||
|
# correct error message.
|
||||||
|
|
||||||
|
/a\Cb/
|
||||||
|
Failed: error 185 at offset 2: using \C is disabled in this PCRE2 library
|
||||||
|
|
||||||
|
# End of testinput23
|
|
@ -181,10 +181,6 @@ Subject length lower bound = 3
|
||||||
\x{212ab}\x{212ab}\x{212ab}\x{861}
|
\x{212ab}\x{212ab}\x{212ab}\x{861}
|
||||||
0: \x{212ab}\x{212ab}\x{212ab}
|
0: \x{212ab}\x{212ab}\x{212ab}
|
||||||
|
|
||||||
/(?<=\C)X/utf
|
|
||||||
Failed: error 136 at offset 6: \C is not allowed in a lookbehind assertion
|
|
||||||
Should produce an error diagnostic
|
|
||||||
|
|
||||||
/^[ab]/IB,utf
|
/^[ab]/IB,utf
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
@ -2905,9 +2901,6 @@ No match
|
||||||
aAz
|
aAz
|
||||||
No match
|
No match
|
||||||
|
|
||||||
/(?<=ab\Cde)X/utf
|
|
||||||
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
|
|
||||||
|
|
||||||
/\X/
|
/\X/
|
||||||
a\=ps
|
a\=ps
|
||||||
0: a
|
0: a
|
||||||
|
@ -3803,7 +3796,7 @@ No match
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
|
/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
\D+
|
\D+
|
||||||
|
@ -3818,8 +3811,6 @@ No match
|
||||||
extuni
|
extuni
|
||||||
\w+
|
\w+
|
||||||
extuni
|
extuni
|
||||||
AllAny+
|
|
||||||
extuni
|
|
||||||
\R+
|
\R+
|
||||||
extuni
|
extuni
|
||||||
\H+
|
\H+
|
||||||
|
@ -3858,7 +3849,7 @@ No match
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
|
/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
extuni+
|
extuni+
|
||||||
|
@ -3876,8 +3867,6 @@ No match
|
||||||
extuni+
|
extuni+
|
||||||
Any
|
Any
|
||||||
extuni+
|
extuni+
|
||||||
AllAny
|
|
||||||
extuni+
|
|
||||||
\R
|
\R
|
||||||
extuni+
|
extuni+
|
||||||
\H
|
\H
|
||||||
|
@ -4010,18 +3999,6 @@ Failed: error 122 at offset 1227: unmatched closing parenthesis
|
||||||
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
|
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
|
||||||
Failed: error 124 at offset 113: letter or underscore expected after (?< or (?'
|
Failed: error 124 at offset 113: letter or underscore expected after (?< or (?'
|
||||||
|
|
||||||
/\C\X*TӅ;
|
|
||||||
{0,6}\v+
F
|
|
||||||
/utf
|
|
||||||
\= Expect no match
|
|
||||||
Ӆ\x0a
|
|
||||||
No match
|
|
||||||
|
|
||||||
/\C(\W?ſ)'?{{/utf
|
|
||||||
\= Expect no match
|
|
||||||
\\C(\\W?ſ)'?{{
|
|
||||||
No match
|
|
||||||
|
|
||||||
/[\pS#moq]/
|
/[\pS#moq]/
|
||||||
=
|
=
|
||||||
0: =
|
0: =
|
||||||
|
|
|
@ -7174,14 +7174,6 @@ Matched, but offsets vector is too small to show all matches
|
||||||
2: aa
|
2: aa
|
||||||
3: a
|
3: a
|
||||||
|
|
||||||
/ab\Cde/
|
|
||||||
abXde
|
|
||||||
0: abXde
|
|
||||||
|
|
||||||
/(?<=ab\Cde)X/
|
|
||||||
abZdeX
|
|
||||||
0: X
|
|
||||||
|
|
||||||
/^\R/
|
/^\R/
|
||||||
\r\=ps
|
\r\=ps
|
||||||
0: \x0d
|
0: \x0d
|
||||||
|
|
|
@ -1141,13 +1141,6 @@ Partial match: abcde
|
||||||
the cat\=ph
|
the cat\=ph
|
||||||
Partial match: the cat
|
Partial match: the cat
|
||||||
|
|
||||||
/ab\Cde/utf
|
|
||||||
abXde
|
|
||||||
Failed: error -42: pattern contains an item that is not supported for DFA matching
|
|
||||||
|
|
||||||
/(?<=ab\Cde)X/utf
|
|
||||||
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
|
|
||||||
|
|
||||||
/./newline=crlf,utf
|
/./newline=crlf,utf
|
||||||
\r\=ps
|
\r\=ps
|
||||||
0: \x{0d}
|
0: \x{0d}
|
||||||
|
|
|
@ -4,12 +4,8 @@
|
||||||
#forbid_utf
|
#forbid_utf
|
||||||
#newline_default lf any anycrlf
|
#newline_default lf any anycrlf
|
||||||
|
|
||||||
/a\Cb/
|
/ab/
|
||||||
aXb
|
\= Expect error message (too big char) and no match
|
||||||
0: aXb
|
|
||||||
a\nb
|
|
||||||
0: a\x0ab
|
|
||||||
\= Expect no match and error message (too big char)
|
|
||||||
A\x{123}B
|
A\x{123}B
|
||||||
** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
|
** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
|
||||||
** Truncation will probably give the wrong result.
|
** Truncation will probably give the wrong result.
|
||||||
|
|
Loading…
Reference in New Issue