Got pcre2test as far as it can go without real compile and match functions.
This commit is contained in:
parent
d455d11b62
commit
1b4bcb79ae
|
@ -190,7 +190,10 @@ the \fB-help\fP option states whether or not \fBreadline()\fP will be used.
|
|||
The program handles any number of tests, each of which consists of a set of
|
||||
input lines. Each set starts with a regular expression pattern, followed by any
|
||||
number of subject lines to be matched against that pattern. In between sets of
|
||||
test data, command lines that begin with a hash (#) character may appear.
|
||||
test data, command lines that begin with a hash (#) character may appear. This
|
||||
file format, with some restrictions, can also be processed by the
|
||||
\fBperltest.pl\fP script that is distributed with PCRE2 as a means of checking
|
||||
that the behaviour of PCRE2 and Perl is the same.
|
||||
.P
|
||||
Each subject line is matched separately and independently. If you want to do
|
||||
multi-line matches, you have to use the \en escape sequence (or \er or \er\en,
|
||||
|
@ -223,6 +226,16 @@ pattern.
|
|||
.sp
|
||||
This command sets a default modifier list that applies to all subsequent
|
||||
patterns. Modifiers on a pattern can change these settings.
|
||||
.sp
|
||||
#perltest
|
||||
.sp
|
||||
The appearance of this line causes all subsequent modifier settings to be
|
||||
checked for compatibility with the \fBperltest.pl\fP script, which is used to
|
||||
confirm that Perl gives the same results as PCRE2. Also, apart from comment
|
||||
lines, none of the other command lines are permitted, because they and many
|
||||
of the modifiers are specific to \fBpcre2test\fP, and should not be used in
|
||||
test files that are also processed by \fBperltest.pl\fP. The \fP#perltest\fB
|
||||
command helps detect tests that are accidentally put in the wrong file.
|
||||
.sp
|
||||
#subject <modifier-list>
|
||||
.sp
|
||||
|
@ -416,7 +429,7 @@ about the pattern:
|
|||
posix use the POSIX API
|
||||
save=<file name> save compiled pattern
|
||||
stackguard=<number> test the stackguard feature
|
||||
tables=[0|1] select internal tables
|
||||
tables=[0|1|2] select internal tables
|
||||
.sp
|
||||
The effects of these modifiers are described in the following sections.
|
||||
FIXME: Give more examples.
|
||||
|
@ -480,11 +493,12 @@ to 7:
|
|||
3 normal match and soft partial match
|
||||
4 hard partial match only
|
||||
6 soft and hard partial match
|
||||
7 all three modes (default)
|
||||
7 all three modes
|
||||
.sp
|
||||
If JIT compilation is successful, the compiled JIT code will automatically be
|
||||
used when \fBpcre2_match()\fP is run, except when incompatible run-time
|
||||
options are specified. For more details, see the
|
||||
If no number is given, 7 is assumed. If JIT compilation is successful, the
|
||||
compiled JIT code will automatically be used when \fBpcre2_match()\fP is run,
|
||||
except when incompatible run-time options are specified. For more details, see
|
||||
the
|
||||
.\" HREF
|
||||
\fBpcre2jit\fP
|
||||
.\"
|
||||
|
@ -503,13 +517,13 @@ The \fB/locale\fP modifier must specify the name of a locale, for example:
|
|||
.sp
|
||||
/pattern/locale=fr_FR
|
||||
.sp
|
||||
The given locale is set,
|
||||
\fBpcre2_maketables()\fP is called to build a set of character tables for
|
||||
the locale, and this is then passed to \fBpcre2_compile()\fP when compiling the
|
||||
regular expression. The same tables are used when matching the following
|
||||
subject lines. The \fB/locale\fP modifier applies only to the pattern on which
|
||||
it appears, but can be given in a \fB#pattern\fP command if a default is
|
||||
needed.
|
||||
The given locale is set, \fBpcre2_maketables()\fP is called to build a set of
|
||||
character tables for the locale, and this is then passed to
|
||||
\fBpcre2_compile()\fP when compiling the regular expression. The same tables
|
||||
are used when matching the following subject lines. The \fB/locale\fP modifier
|
||||
applies only to the pattern on which it appears, but can be given in a
|
||||
\fB#pattern\fP command if a default is needed. Setting a locale and alternate
|
||||
character tables are mutually exclusive.
|
||||
.
|
||||
.
|
||||
.SS "Showing pattern memory"
|
||||
|
@ -571,12 +585,14 @@ specific set of built-in character tables to be passed to
|
|||
\fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour with
|
||||
different character tables. The digit specifies the tables as follows:
|
||||
.sp
|
||||
0 the default ASCII tables, as distributed in
|
||||
0 do not pass any special character tables
|
||||
1 the default ASCII tables, as distributed in
|
||||
pcre2_chartables.c.dist
|
||||
1 a set of tables defining ISO 8859 characters
|
||||
2 a set of tables defining ISO 8859 characters
|
||||
.sp
|
||||
In table 1, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc.
|
||||
In table 2, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc. Setting alternate character tables and a locale
|
||||
are mutually exclusive.
|
||||
.
|
||||
.
|
||||
.SS "Locking out certain modifiers"
|
||||
|
@ -627,7 +643,8 @@ not affect the compilation process.
|
|||
jitverify verify JIT usage
|
||||
mark show mark values
|
||||
.sp
|
||||
These modifiers may not appear in a \fB#pattern\fP command.
|
||||
These modifiers may not appear in a \fB#pattern\fP command. If you want them as
|
||||
defaults, set them in a \fB#subject\fP command.
|
||||
.
|
||||
.
|
||||
.SH "SUBJECT MODIFIERS"
|
||||
|
@ -730,7 +747,7 @@ set, the current captured groups are output when a callout occurs.
|
|||
The \fBcallout_fail\fP modifier can be given one or two numbers. If there is
|
||||
only one number, 1 is returned instead of 0 when a callout of that number is
|
||||
reached. If two numbers are given, 1 is returned when callout <n> is reached
|
||||
for the <m>th time. FIXME: this needs checking.
|
||||
for the <m>th time. FIXME: this needs checking. Disable by m=0?
|
||||
.
|
||||
.
|
||||
.SS "Testing substring extraction functions"
|
||||
|
@ -738,7 +755,15 @@ for the <m>th time. FIXME: this needs checking.
|
|||
.sp
|
||||
The \fBcopy\fP and \fBget\fP modifiers can be used to test the
|
||||
\fBpcre2_substring_copy_xxx()\fP and \fBpcre2_substring_get_xxx()\fP functions.
|
||||
They can be given more than once, and each can specify a group name or number.
|
||||
They can be given more than once, and each can specify a group name or number,
|
||||
for example:
|
||||
.sp
|
||||
abcd\=copy=1,copy=3,get=G1
|
||||
.sp
|
||||
If the \fB#subject\fP command is used to set default copy and get lists, these
|
||||
can be unset by specifying a negative number for numbered groups and an empty
|
||||
name for named groups.
|
||||
.P
|
||||
The \fBgetall\fP modifier tests \fBpcre2_substring_list_get()\fP, which
|
||||
extracts all captured substrings.
|
||||
.P
|
||||
|
|
31
src/pcre2.h
31
src/pcre2.h
|
@ -160,22 +160,21 @@ D is inspected during pcre2_dfa_exec() execution
|
|||
#define PCRE2_ERROR_BADMODE (-6)
|
||||
#define PCRE2_ERROR_BADOFFSET (-7)
|
||||
#define PCRE2_ERROR_BADOPTION (-8)
|
||||
#define PCRE2_ERROR_BADPARTIAL (-9)
|
||||
#define PCRE2_ERROR_BADUTF (-10)
|
||||
#define PCRE2_ERROR_BADUTF_OFFSET (-11)
|
||||
#define PCRE2_ERROR_CALLOUT (-12) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_INTERNAL (-13)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-14)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-15)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-16)
|
||||
#define PCRE2_ERROR_NOMEMORY (-17)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-18)
|
||||
#define PCRE2_ERROR_NULL (-19)
|
||||
#define PCRE2_ERROR_PARTIAL (-20)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-21)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-22)
|
||||
#define PCRE2_ERROR_UNKNOWN_OPCODE (-23)
|
||||
#define PCRE2_ERROR_UNSET (-24)
|
||||
#define PCRE2_ERROR_BADUTF (-9)
|
||||
#define PCRE2_ERROR_BADUTF_OFFSET (-10)
|
||||
#define PCRE2_ERROR_CALLOUT (-11) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_INTERNAL (-12)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-13)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-14)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-15)
|
||||
#define PCRE2_ERROR_NOMEMORY (-16)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-17)
|
||||
#define PCRE2_ERROR_NULL (-18)
|
||||
#define PCRE2_ERROR_PARTIAL (-19)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-20)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE2_ERROR_UNKNOWN_OPCODE (-22)
|
||||
#define PCRE2_ERROR_UNSET (-23)
|
||||
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-30)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-31)
|
||||
|
|
|
@ -160,22 +160,21 @@ D is inspected during pcre2_dfa_exec() execution
|
|||
#define PCRE2_ERROR_BADMODE (-6)
|
||||
#define PCRE2_ERROR_BADOFFSET (-7)
|
||||
#define PCRE2_ERROR_BADOPTION (-8)
|
||||
#define PCRE2_ERROR_BADPARTIAL (-9)
|
||||
#define PCRE2_ERROR_BADUTF (-10)
|
||||
#define PCRE2_ERROR_BADUTF_OFFSET (-11)
|
||||
#define PCRE2_ERROR_CALLOUT (-12) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_INTERNAL (-13)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-14)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-15)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-16)
|
||||
#define PCRE2_ERROR_NOMEMORY (-17)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-18)
|
||||
#define PCRE2_ERROR_NULL (-19)
|
||||
#define PCRE2_ERROR_PARTIAL (-20)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-21)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-22)
|
||||
#define PCRE2_ERROR_UNKNOWN_OPCODE (-23)
|
||||
#define PCRE2_ERROR_UNSET (-24)
|
||||
#define PCRE2_ERROR_BADUTF (-9)
|
||||
#define PCRE2_ERROR_BADUTF_OFFSET (-10)
|
||||
#define PCRE2_ERROR_CALLOUT (-11) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_INTERNAL (-12)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-13)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-14)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-15)
|
||||
#define PCRE2_ERROR_NOMEMORY (-16)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-17)
|
||||
#define PCRE2_ERROR_NULL (-18)
|
||||
#define PCRE2_ERROR_PARTIAL (-19)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-20)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE2_ERROR_UNKNOWN_OPCODE (-22)
|
||||
#define PCRE2_ERROR_UNSET (-23)
|
||||
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-30)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-31)
|
||||
|
|
|
@ -46,7 +46,185 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* FIXME: this is currently a placeholder function */
|
||||
/* FIXME: check out the errors */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* The texts of compile-time error messages. Do not ever re-use any error
|
||||
number, because they are documented. Always add a new error instead. Messages
|
||||
marked DEAD below are no longer used. This used to be a table of strings, but
|
||||
in order to reduce the number of relocations needed when a shared library is
|
||||
loaded dynamically, it is now one long string. We cannot use a table of
|
||||
offsets, because the lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not
|
||||
known. Instead, pcre2_get_error_message() counts through to the one it wants -
|
||||
this isn't a performance issue because these strings are used only when there
|
||||
is an error.
|
||||
|
||||
Each substring ends with \0 to insert a null character. This includes the final
|
||||
substring, so that the whole string ends with \0\0, which can be detected when
|
||||
counting through. */
|
||||
|
||||
static const char compile_error_texts[] =
|
||||
"no error\0"
|
||||
"\\ at end of pattern\0"
|
||||
"\\c at end of pattern\0"
|
||||
"unrecognized character follows \\\0"
|
||||
"numbers out of order in {} quantifier\0"
|
||||
/* 5 */
|
||||
"number too big in {} quantifier\0"
|
||||
"missing terminating ] for character class\0"
|
||||
"invalid escape sequence in character class\0"
|
||||
"range out of order in character class\0"
|
||||
"nothing to repeat\0"
|
||||
/* 10 */
|
||||
"operand of unlimited repeat could match the empty string\0" /** DEAD **/
|
||||
"internal error: unexpected repeat\0"
|
||||
"unrecognized character after (? or (?-\0"
|
||||
"POSIX named classes are supported only within a class\0"
|
||||
"missing )\0"
|
||||
/* 15 */
|
||||
"reference to non-existent subpattern\0"
|
||||
"erroffset passed as NULL\0"
|
||||
"unknown option bit(s) set\0"
|
||||
"missing ) after comment\0"
|
||||
"parentheses nested too deeply\0" /** DEAD **/
|
||||
/* 20 */
|
||||
"regular expression is too large\0"
|
||||
"failed to get memory\0"
|
||||
"unmatched parentheses\0"
|
||||
"internal error: code overflow\0"
|
||||
"unrecognized character after (?<\0"
|
||||
/* 25 */
|
||||
"lookbehind assertion is not fixed length\0"
|
||||
"malformed number or name after (?(\0"
|
||||
"conditional group contains more than two branches\0"
|
||||
"assertion expected after (?(\0"
|
||||
"(?R or (?[+-]digits must be followed by )\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"POSIX collating elements are not supported\0"
|
||||
"this version of PCRE is compiled without UTF support\0"
|
||||
"spare error\0" /** DEAD **/
|
||||
"character value in \\x{} or \\o{} is too large\0"
|
||||
/* 35 */
|
||||
"invalid condition (?(0)\0"
|
||||
"\\C not allowed in lookbehind assertion\0"
|
||||
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||
"number after (?C is > 255\0"
|
||||
"closing ) for (?C expected\0"
|
||||
/* 40 */
|
||||
"recursive call could loop indefinitely\0"
|
||||
"unrecognized character after (?P\0"
|
||||
"syntax error in subpattern name (missing terminator)\0"
|
||||
"two named subpatterns have the same name\0"
|
||||
"invalid UTF-8 string\0"
|
||||
/* 45 */
|
||||
"support for \\P, \\p, and \\X has not been compiled\0"
|
||||
"malformed \\P or \\p sequence\0"
|
||||
"unknown property name after \\P or \\p\0"
|
||||
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
|
||||
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
|
||||
/* 50 */
|
||||
"repeated subpattern is too long\0" /** DEAD **/
|
||||
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
|
||||
"internal error: overran compiling workspace\0"
|
||||
"internal error: previously-checked referenced subpattern not found\0"
|
||||
"DEFINE group contains more than one branch\0"
|
||||
/* 55 */
|
||||
"repeating a DEFINE group is not allowed\0" /** DEAD **/
|
||||
"inconsistent NEWLINE options\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"a numbered reference must not be zero\0"
|
||||
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
|
||||
/* 60 */
|
||||
"(*VERB) not recognized or malformed\0"
|
||||
"number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"digit expected after (?+\0"
|
||||
"] is an invalid data character in JavaScript compatibility mode\0"
|
||||
/* 65 */
|
||||
"different names for subpatterns of the same number are not allowed\0"
|
||||
"(*MARK) must have an argument\0"
|
||||
"this version of PCRE is not compiled with Unicode property support\0"
|
||||
"\\c must be followed by an ASCII character\0"
|
||||
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
|
||||
/* 70 */
|
||||
"internal error: unknown opcode in find_fixedlength()\0"
|
||||
"\\N is not supported in a class\0"
|
||||
"too many forward references\0"
|
||||
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||
"invalid UTF-16 string\0"
|
||||
/* 75 */
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character value in \\u.... sequence is too large\0"
|
||||
"invalid UTF-32 string\0"
|
||||
"setting UTF is disabled by the application\0"
|
||||
"non-hex character in \\x{} (closing brace missing?)\0"
|
||||
/* 80 */
|
||||
"non-octal character in \\o{} (closing brace missing?)\0"
|
||||
"missing opening brace after \\o\0"
|
||||
"parentheses are too deeply nested\0"
|
||||
"invalid range in character class\0"
|
||||
"group name must start with a non-digit\0"
|
||||
/* 85 */
|
||||
"parentheses are too deeply nested (stack check)\0"
|
||||
;
|
||||
|
||||
/* Match-time error texts are in the same format. */
|
||||
|
||||
static const char match_error_texts[] =
|
||||
"no error\0"
|
||||
"no match\0"
|
||||
"bad count value\0"
|
||||
"pattern compiled with other endianness\0"
|
||||
"bad length\0"
|
||||
/* -5 */
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
"bad option value\0"
|
||||
"bad UTF string\0"
|
||||
/* -10 */
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"internal error - pattern overwritten?\0"
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
/* -15 */
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown substring\0"
|
||||
"NULL argument passed\0"
|
||||
"partial match\0"
|
||||
/* -20 */
|
||||
"nested recursion at the same subject position\0"
|
||||
"recursion limit exceeded\0"
|
||||
"unknown opcode - pattern overwritten?\0"
|
||||
"value unset\0" /* Used by pcre2_pattern_info() */
|
||||
"spare -24\0"
|
||||
/* -25 */
|
||||
"spare -25\0"
|
||||
"spare -26\0"
|
||||
"spare -27\0"
|
||||
"spare -28\0"
|
||||
"spare -29\0"
|
||||
/* -30 */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
"backreference condition or recursion test not supported for DFA matching\0"
|
||||
"item unsupported for DFA matching\0"
|
||||
"match limit not supported for DFA matching\0"
|
||||
/* -35 */
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"spare -36\0"
|
||||
"spare -37\0"
|
||||
"spare -38\0"
|
||||
"spare -39\0"
|
||||
/* -40 */
|
||||
"spare -39\0"
|
||||
;
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return error message *
|
||||
|
@ -62,26 +240,51 @@ Arguments:
|
|||
size size of the buffer
|
||||
|
||||
Returns: length of message if all is well
|
||||
-1 on error
|
||||
negative on error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
|
||||
{
|
||||
char xbuff[128];
|
||||
const char *message;
|
||||
size_t i;
|
||||
const char *message = "Dummy error message";
|
||||
enumber=enumber;
|
||||
uint32_t n;
|
||||
|
||||
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (enumber > 0) /* Compile-time error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber;
|
||||
}
|
||||
|
||||
else /* Match-time error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
}
|
||||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NULL) {};
|
||||
if (*message == CHAR_NULL)
|
||||
{
|
||||
sprintf(xbuff, "Internal error: no text for error %d", enumber);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (size == 0) return -1;
|
||||
for (i = 0; *message != 0; i++)
|
||||
{
|
||||
if (i >= size - 1)
|
||||
{
|
||||
buffer[i] = 0; /* Terminate partial message */
|
||||
return -1;
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
buffer[i] = *message++;
|
||||
}
|
||||
|
||||
buffer[i] = 0;
|
||||
return i;
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ Arguments:
|
|||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
/* FIXME: this is currently incomplete */
|
||||
/* FIXME: this is currently incomplete. Also, check int vs uint32_t */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||
|
|
759
src/pcre2test.c
759
src/pcre2test.c
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue