Allow \R and newline handling to be specified at match time (as for PCRE1).
This commit is contained in:
parent
200ca3f1b4
commit
4739ccde40
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "13 May 2014" "PCRE 9.00"
|
||||
.TH PCRE2TEST 1 "24 June 2014" "PCRE 10.00"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -18,13 +18,6 @@ options, see the
|
|||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
,
|
||||
.\" HREF
|
||||
\fBpcre16\fP
|
||||
and
|
||||
.\" HREF
|
||||
\fBpcre32\fP
|
||||
.\"
|
||||
documentation.
|
||||
.P
|
||||
The input for \fBpcre2test\fP is a sequence of regular expression patterns and
|
||||
|
@ -170,6 +163,9 @@ compile phase.
|
|||
\fB-T\fP \fB-TM\fP
|
||||
These behave like \fB-t\fP and \fB-tm\fP, but in addition, at the end of a run,
|
||||
the total times for all compiles and matches are output.
|
||||
.TP 10
|
||||
\fB-version\fP
|
||||
Output the PCRE2 version number and then exit.
|
||||
.
|
||||
.
|
||||
.SH "DESCRIPTION"
|
||||
|
@ -446,8 +442,11 @@ set to "anycrlf", \eR matches CR, LF, or CRLF only. If it is set to "unicode",
|
|||
is built, with the default default being Unicode.
|
||||
.P
|
||||
The \fBnewline\fP modifier specifies which characters are to be interpreted as
|
||||
newlines, both in the pattern and in subject lines. The type must be one of
|
||||
CR, LF, CRLF, ANYCRLF, or ANY.
|
||||
newlines, both in the pattern and (by default) in subject lines. The type must
|
||||
be one of CR, LF, CRLF, ANYCRLF, or ANY.
|
||||
.P
|
||||
Both the \eR and newline settings can be changed at match time, but if this is
|
||||
done, JIT matching is disabled.
|
||||
.
|
||||
.
|
||||
.SS "Information about a pattern"
|
||||
|
@ -685,6 +684,7 @@ pattern.
|
|||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
/gg altglobal alternative global matching
|
||||
bsr=[anycrlf|unicode] specify \eR handling
|
||||
callout_capture show captures at callout time
|
||||
callout_fail=<n>[,<m>] control callout failure
|
||||
callout_none do not supply a callout function
|
||||
|
@ -699,6 +699,7 @@ pattern.
|
|||
mark show mark values
|
||||
match_limit=>n> set a match limit
|
||||
memory show memory usage
|
||||
newline=<type> set newline type
|
||||
offset=<n> set starting offset
|
||||
ovector=<n> set size of output vector
|
||||
recursion_limit=<n> set a recursion limit
|
||||
|
@ -707,6 +708,14 @@ The effects of these modifiers are described in the following sections.
|
|||
FIXME: Give more examples.
|
||||
.
|
||||
.
|
||||
.SS "Newline and \eR handling"
|
||||
.rs
|
||||
.sp
|
||||
These modifiers set the newline and \eR processing conventions for the subject
|
||||
line, overriding any values that were set at compile time (as described above).
|
||||
JIT matching is disabled if these settings are changed at match time.
|
||||
.
|
||||
.
|
||||
.SS "Showing more text"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -1191,6 +1200,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 08 June 2014
|
||||
Last updated: 24 June 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
||||
|
|
18
src/pcre2.h
18
src/pcre2.h
|
@ -136,7 +136,9 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_DFA_RESTART 0x00000040
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080
|
||||
|
||||
/* Newline and \R settings, for use in the compile context. */
|
||||
/* Newline and \R settings, for use in the compile and match contexts. The
|
||||
newline values must be kept in step with values set in config.h and both sets
|
||||
must all be greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
|
@ -361,11 +363,11 @@ PCRE2_EXP_DECL \
|
|||
PCRE2_EXP_DECL \
|
||||
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_compile_context *, \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_compile_context *, \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -378,10 +380,14 @@ PCRE2_EXP_DECL \
|
|||
PCRE2_EXP_DECL \
|
||||
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *)); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||
|
@ -556,12 +562,14 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_)
|
||||
#define pcre2_set_bsr_convention PCRE2_SUFFIX(pcre2_set_bsr_convention_)
|
||||
#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_)
|
||||
#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_newline_convention PCRE2_SUFFIX(pcre2_set_newline_convention_)
|
||||
#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_)
|
||||
#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
|
|
@ -136,7 +136,9 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_DFA_RESTART 0x00000040
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080
|
||||
|
||||
/* Newline and \R settings, for use in the compile context. */
|
||||
/* Newline and \R settings, for use in the compile and match contexts. The
|
||||
newline values must be kept in step with values set in config.h and both sets
|
||||
must all be greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
|
@ -361,11 +363,11 @@ PCRE2_EXP_DECL \
|
|||
PCRE2_EXP_DECL \
|
||||
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_compile_context *, \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_compile_context *, \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -378,10 +380,14 @@ PCRE2_EXP_DECL \
|
|||
PCRE2_EXP_DECL \
|
||||
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *)); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||
|
@ -556,12 +562,14 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_)
|
||||
#define pcre2_set_bsr_convention PCRE2_SUFFIX(pcre2_set_bsr_convention_)
|
||||
#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_)
|
||||
#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_newline_convention PCRE2_SUFFIX(pcre2_set_newline_convention_)
|
||||
#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_)
|
||||
#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
|
|
@ -172,6 +172,8 @@ mcontext->stack_malloc = mcontext->malloc;
|
|||
mcontext->stack_free = mcontext->free;
|
||||
#endif
|
||||
mcontext->callout = NULL;
|
||||
mcontext->newline_convention = 0;
|
||||
mcontext->bsr_convention = 0;
|
||||
mcontext->match_limit = MATCH_LIMIT;
|
||||
mcontext->recursion_limit = MATCH_LIMIT_RECURSION;
|
||||
}
|
||||
|
@ -269,8 +271,19 @@ if (mcontext != NULL)
|
|||
/* All these functions return 1 for success or 0 if invalid data is given. Only
|
||||
some of the functions are able to test the validity of the data. */
|
||||
|
||||
|
||||
/* ------------ Compile contexts ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr_convention(pcre2_compile_context *ccontext, uint32_t value)
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const unsigned char *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 1;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr_compile(pcre2_compile_context *ccontext, uint32_t value)
|
||||
{
|
||||
switch(value)
|
||||
{
|
||||
|
@ -284,18 +297,8 @@ switch(value)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const unsigned char *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline_convention(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
pcre2_set_newline_compile(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
switch(newline)
|
||||
{
|
||||
|
@ -312,7 +315,6 @@ switch(newline)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
|
@ -320,7 +322,6 @@ ccontext->parens_nest_limit = limit;
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
int (*guard)(uint32_t))
|
||||
|
@ -330,6 +331,41 @@ return 1;
|
|||
}
|
||||
|
||||
|
||||
/* ------------ Match contexts ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr_match(pcre2_match_context *mcontext, uint32_t value)
|
||||
{
|
||||
switch(value)
|
||||
{
|
||||
case PCRE2_BSR_ANYCRLF:
|
||||
case PCRE2_BSR_UNICODE:
|
||||
mcontext->bsr_convention = value;
|
||||
return 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline_match(pcre2_match_context *mcontext, uint32_t newline)
|
||||
{
|
||||
switch(newline)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
case PCRE2_NEWLINE_LF:
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
mcontext->newline_convention = newline;
|
||||
return 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
int (*callout)(pcre2_callout_block *, void *))
|
||||
|
@ -338,7 +374,6 @@ mcontext->callout = callout;
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
|
@ -353,7 +388,6 @@ mcontext->recursion_limit = limit;
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||
void *(*mymalloc)(size_t, void *),
|
||||
|
@ -370,5 +404,4 @@ mcontext->stack_free = myfree;
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* End of pcre2_context.c */
|
||||
|
|
|
@ -558,6 +558,8 @@ typedef struct pcre2_real_match_context {
|
|||
void (*stack_free)(void *, void *);
|
||||
#endif
|
||||
int (*callout)(pcre2_callout_block *, void *);
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t match_limit;
|
||||
uint32_t recursion_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
|
|
@ -81,7 +81,6 @@ int rc = PCRE2_ERROR_NOMATCH;
|
|||
mcontext=mcontext;length=length;
|
||||
options=options;
|
||||
|
||||
|
||||
/* Fudges for testing pcre2test */
|
||||
|
||||
if (subject[0] == 'Y')
|
||||
|
|
|
@ -304,7 +304,8 @@ static const char *newlines[] = {
|
|||
|
||||
/* Modifier types and applicability */
|
||||
|
||||
enum { MOD_CTC, /* Applies to a compile context */
|
||||
enum { MOD_CTB, /* Applies to a compile or a match context */
|
||||
MOD_CTC, /* Applies to a compile context */
|
||||
MOD_CTM, /* Applies to a match context */
|
||||
MOD_PAT, /* Applies to a pattern */
|
||||
MOD_PATP, /* Ditto, OK for Perl test */
|
||||
|
@ -421,7 +422,7 @@ static modstruct modlist[] = {
|
|||
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
|
||||
{ "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
|
||||
{ "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
|
||||
{ "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
|
||||
{ "bsr", MOD_CTB, MOD_BSR, MO(bsr_convention), CO(bsr_convention) },
|
||||
{ "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
|
||||
{ "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
|
||||
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
|
||||
|
@ -455,7 +456,7 @@ static modstruct modlist[] = {
|
|||
{ "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
|
||||
{ "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
|
||||
{ "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
|
||||
{ "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
|
||||
{ "newline", MOD_CTB, MOD_NL, MO(newline_convention), CO(newline_convention) },
|
||||
{ "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
|
||||
{ "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
|
||||
{ "no_start_optimize", MOD_PDP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PD(options) },
|
||||
|
@ -2270,6 +2271,7 @@ static void *
|
|||
check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
|
||||
{
|
||||
void *field = NULL;
|
||||
size_t offset = m->offset;
|
||||
|
||||
if (restrict_for_perl_test) switch(m->which)
|
||||
{
|
||||
|
@ -2286,10 +2288,16 @@ if (restrict_for_perl_test) switch(m->which)
|
|||
|
||||
switch (m->which)
|
||||
{
|
||||
case MOD_CTB: /* Compile or match context modifier */
|
||||
case MOD_CTC: /* Compile context modifier */
|
||||
if (ctx == CTX_DEFPAT || ctx == CTX_DEFANY) field = PTR(default_pat_context);
|
||||
else if (ctx == CTX_PAT) field = PTR(pat_context);
|
||||
break;
|
||||
if (field != NULL || m->which == MOD_CTC) break;
|
||||
|
||||
/* Fall through for something that can also be in a match context. In this
|
||||
case the offset is taken from the other field. */
|
||||
|
||||
offset = (size_t)(m->value);
|
||||
|
||||
case MOD_CTM: /* Match context modifier */
|
||||
if (ctx == CTX_DEFDAT || ctx == CTX_DEFANY) field = PTR(default_dat_context);
|
||||
|
@ -2324,7 +2332,7 @@ if (field == NULL)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
return (char *)field + m->offset;
|
||||
return (char *)field + offset;
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue