Allow \R and newline handling to be specified at match time (as for PCRE1).

This commit is contained in:
Philip.Hazel 2014-06-24 09:51:58 +00:00
parent 200ca3f1b4
commit 4739ccde40
7 changed files with 111 additions and 44 deletions

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "13 May 2014" "PCRE 9.00"
.TH PCRE2TEST 1 "24 June 2014" "PCRE 10.00"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -18,13 +18,6 @@ options, see the
.\" HREF
\fBpcre2api\fP
.\"
,
.\" HREF
\fBpcre16\fP
and
.\" HREF
\fBpcre32\fP
.\"
documentation.
.P
The input for \fBpcre2test\fP is a sequence of regular expression patterns and
@ -170,6 +163,9 @@ compile phase.
\fB-T\fP \fB-TM\fP
These behave like \fB-t\fP and \fB-tm\fP, but in addition, at the end of a run,
the total times for all compiles and matches are output.
.TP 10
\fB-version\fP
Output the PCRE2 version number and then exit.
.
.
.SH "DESCRIPTION"
@ -446,8 +442,11 @@ set to "anycrlf", \eR matches CR, LF, or CRLF only. If it is set to "unicode",
is built, with the default default being Unicode.
.P
The \fBnewline\fP modifier specifies which characters are to be interpreted as
newlines, both in the pattern and in subject lines. The type must be one of
CR, LF, CRLF, ANYCRLF, or ANY.
newlines, both in the pattern and (by default) in subject lines. The type must
be one of CR, LF, CRLF, ANYCRLF, or ANY.
.P
Both the \eR and newline settings can be changed at match time, but if this is
done, JIT matching is disabled.
.
.
.SS "Information about a pattern"
@ -685,6 +684,7 @@ pattern.
allaftertext show text after captures
allcaptures show all captures
/gg altglobal alternative global matching
bsr=[anycrlf|unicode] specify \eR handling
callout_capture show captures at callout time
callout_fail=<n>[,<m>] control callout failure
callout_none do not supply a callout function
@ -699,6 +699,7 @@ pattern.
mark show mark values
match_limit=>n> set a match limit
memory show memory usage
newline=<type> set newline type
offset=<n> set starting offset
ovector=<n> set size of output vector
recursion_limit=<n> set a recursion limit
@ -707,6 +708,14 @@ The effects of these modifiers are described in the following sections.
FIXME: Give more examples.
.
.
.SS "Newline and \eR handling"
.rs
.sp
These modifiers set the newline and \eR processing conventions for the subject
line, overriding any values that were set at compile time (as described above).
JIT matching is disabled if these settings are changed at match time.
.
.
.SS "Showing more text"
.rs
.sp
@ -1191,6 +1200,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
Last updated: 08 June 2014
Last updated: 24 June 2014
Copyright (c) 1997-2014 University of Cambridge.
.fi

View File

@ -136,7 +136,9 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_DFA_RESTART 0x00000040
#define PCRE2_DFA_SHORTEST 0x00000080
/* Newline and \R settings, for use in the compile context. */
/* Newline and \R settings, for use in the compile and match contexts. The
newline values must be kept in step with values set in config.h and both sets
must all be greater than zero. */
#define PCRE2_NEWLINE_CR 1
#define PCRE2_NEWLINE_LF 2
@ -361,11 +363,11 @@ PCRE2_EXP_DECL \
PCRE2_EXP_DECL \
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_compile_context *, \
PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
const unsigned char *); \
PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_compile_context *, \
PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
uint32_t); \
@ -378,10 +380,14 @@ PCRE2_EXP_DECL \
PCRE2_EXP_DECL \
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *)); \
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
@ -556,12 +562,14 @@ pcre2_compile are called by application code. */
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
#define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_)
#define pcre2_set_bsr_convention PCRE2_SUFFIX(pcre2_set_bsr_convention_)
#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_)
#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_)
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_newline_convention PCRE2_SUFFIX(pcre2_set_newline_convention_)
#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_)
#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)

View File

@ -136,7 +136,9 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_DFA_RESTART 0x00000040
#define PCRE2_DFA_SHORTEST 0x00000080
/* Newline and \R settings, for use in the compile context. */
/* Newline and \R settings, for use in the compile and match contexts. The
newline values must be kept in step with values set in config.h and both sets
must all be greater than zero. */
#define PCRE2_NEWLINE_CR 1
#define PCRE2_NEWLINE_LF 2
@ -361,11 +363,11 @@ PCRE2_EXP_DECL \
PCRE2_EXP_DECL \
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_compile_context *, \
PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
const unsigned char *); \
PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_compile_context *, \
PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
uint32_t); \
@ -378,10 +380,14 @@ PCRE2_EXP_DECL \
PCRE2_EXP_DECL \
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *)); \
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
@ -556,12 +562,14 @@ pcre2_compile are called by application code. */
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
#define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_)
#define pcre2_set_bsr_convention PCRE2_SUFFIX(pcre2_set_bsr_convention_)
#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_)
#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_)
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_newline_convention PCRE2_SUFFIX(pcre2_set_newline_convention_)
#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_)
#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)

View File

@ -172,6 +172,8 @@ mcontext->stack_malloc = mcontext->malloc;
mcontext->stack_free = mcontext->free;
#endif
mcontext->callout = NULL;
mcontext->newline_convention = 0;
mcontext->bsr_convention = 0;
mcontext->match_limit = MATCH_LIMIT;
mcontext->recursion_limit = MATCH_LIMIT_RECURSION;
}
@ -269,8 +271,19 @@ if (mcontext != NULL)
/* All these functions return 1 for success or 0 if invalid data is given. Only
some of the functions are able to test the validity of the data. */
/* ------------ Compile contexts ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_bsr_convention(pcre2_compile_context *ccontext, uint32_t value)
pcre2_set_character_tables(pcre2_compile_context *ccontext,
const unsigned char *tables)
{
ccontext->tables = tables;
return 1;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_bsr_compile(pcre2_compile_context *ccontext, uint32_t value)
{
switch(value)
{
@ -284,18 +297,8 @@ switch(value)
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext,
const unsigned char *tables)
{
ccontext->tables = tables;
return 1;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_newline_convention(pcre2_compile_context *ccontext, uint32_t newline)
pcre2_set_newline_compile(pcre2_compile_context *ccontext, uint32_t newline)
{
switch(newline)
{
@ -312,7 +315,6 @@ switch(newline)
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
{
@ -320,7 +322,6 @@ ccontext->parens_nest_limit = limit;
return 1;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard)(uint32_t))
@ -330,6 +331,41 @@ return 1;
}
/* ------------ Match contexts ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_bsr_match(pcre2_match_context *mcontext, uint32_t value)
{
switch(value)
{
case PCRE2_BSR_ANYCRLF:
case PCRE2_BSR_UNICODE:
mcontext->bsr_convention = value;
return 1;
default:
return 0;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_newline_match(pcre2_match_context *mcontext, uint32_t newline)
{
switch(newline)
{
case PCRE2_NEWLINE_CR:
case PCRE2_NEWLINE_LF:
case PCRE2_NEWLINE_CRLF:
case PCRE2_NEWLINE_ANY:
case PCRE2_NEWLINE_ANYCRLF:
mcontext->newline_convention = newline;
return 1;
default:
return 0;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
int (*callout)(pcre2_callout_block *, void *))
@ -338,7 +374,6 @@ mcontext->callout = callout;
return 1;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
{
@ -353,7 +388,6 @@ mcontext->recursion_limit = limit;
return 1;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *),
@ -370,5 +404,4 @@ mcontext->stack_free = myfree;
return 1;
}
/* End of pcre2_context.c */

View File

@ -558,6 +558,8 @@ typedef struct pcre2_real_match_context {
void (*stack_free)(void *, void *);
#endif
int (*callout)(pcre2_callout_block *, void *);
uint16_t bsr_convention;
uint16_t newline_convention;
uint32_t match_limit;
uint32_t recursion_limit;
} pcre2_real_match_context;

View File

@ -81,7 +81,6 @@ int rc = PCRE2_ERROR_NOMATCH;
mcontext=mcontext;length=length;
options=options;
/* Fudges for testing pcre2test */
if (subject[0] == 'Y')

View File

@ -304,7 +304,8 @@ static const char *newlines[] = {
/* Modifier types and applicability */
enum { MOD_CTC, /* Applies to a compile context */
enum { MOD_CTB, /* Applies to a compile or a match context */
MOD_CTC, /* Applies to a compile context */
MOD_CTM, /* Applies to a match context */
MOD_PAT, /* Applies to a pattern */
MOD_PATP, /* Ditto, OK for Perl test */
@ -421,7 +422,7 @@ static modstruct modlist[] = {
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
{ "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
{ "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
{ "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
{ "bsr", MOD_CTB, MOD_BSR, MO(bsr_convention), CO(bsr_convention) },
{ "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
{ "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
@ -455,7 +456,7 @@ static modstruct modlist[] = {
{ "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
{ "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
{ "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
{ "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
{ "newline", MOD_CTB, MOD_NL, MO(newline_convention), CO(newline_convention) },
{ "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
{ "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
{ "no_start_optimize", MOD_PDP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PD(options) },
@ -2270,6 +2271,7 @@ static void *
check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
{
void *field = NULL;
size_t offset = m->offset;
if (restrict_for_perl_test) switch(m->which)
{
@ -2286,10 +2288,16 @@ if (restrict_for_perl_test) switch(m->which)
switch (m->which)
{
case MOD_CTB: /* Compile or match context modifier */
case MOD_CTC: /* Compile context modifier */
if (ctx == CTX_DEFPAT || ctx == CTX_DEFANY) field = PTR(default_pat_context);
else if (ctx == CTX_PAT) field = PTR(pat_context);
break;
if (field != NULL || m->which == MOD_CTC) break;
/* Fall through for something that can also be in a match context. In this
case the offset is taken from the other field. */
offset = (size_t)(m->value);
case MOD_CTM: /* Match context modifier */
if (ctx == CTX_DEFDAT || ctx == CTX_DEFANY) field = PTR(default_dat_context);
@ -2324,7 +2332,7 @@ if (field == NULL)
return NULL;
}
return (char *)field + m->offset;
return (char *)field + offset;
}