From 4739ccde40152656e73e0932733f93d0342d1ab6 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 24 Jun 2014 09:51:58 +0000 Subject: [PATCH] Allow \R and newline handling to be specified at match time (as for PCRE1). --- doc/pcre2test.1 | 31 ++++++++++++------- src/pcre2.h | 18 ++++++++---- src/pcre2.h.in | 18 ++++++++---- src/pcre2_context.c | 67 +++++++++++++++++++++++++++++++----------- src/pcre2_intmodedep.h | 2 ++ src/pcre2_match.c | 1 - src/pcre2test.c | 18 ++++++++---- 7 files changed, 111 insertions(+), 44 deletions(-) diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index fc4e202..6f038d1 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "13 May 2014" "PCRE 9.00" +.TH PCRE2TEST 1 "24 June 2014" "PCRE 10.00" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -18,13 +18,6 @@ options, see the .\" HREF \fBpcre2api\fP .\" -, -.\" HREF -\fBpcre16\fP -and -.\" HREF -\fBpcre32\fP -.\" documentation. .P The input for \fBpcre2test\fP is a sequence of regular expression patterns and @@ -170,6 +163,9 @@ compile phase. \fB-T\fP \fB-TM\fP These behave like \fB-t\fP and \fB-tm\fP, but in addition, at the end of a run, the total times for all compiles and matches are output. +.TP 10 +\fB-version\fP +Output the PCRE2 version number and then exit. . . .SH "DESCRIPTION" @@ -446,8 +442,11 @@ set to "anycrlf", \eR matches CR, LF, or CRLF only. If it is set to "unicode", is built, with the default default being Unicode. .P The \fBnewline\fP modifier specifies which characters are to be interpreted as -newlines, both in the pattern and in subject lines. The type must be one of -CR, LF, CRLF, ANYCRLF, or ANY. +newlines, both in the pattern and (by default) in subject lines. The type must +be one of CR, LF, CRLF, ANYCRLF, or ANY. +.P +Both the \eR and newline settings can be changed at match time, but if this is +done, JIT matching is disabled. . . .SS "Information about a pattern" @@ -685,6 +684,7 @@ pattern. allaftertext show text after captures allcaptures show all captures /gg altglobal alternative global matching + bsr=[anycrlf|unicode] specify \eR handling callout_capture show captures at callout time callout_fail=[,] control callout failure callout_none do not supply a callout function @@ -699,6 +699,7 @@ pattern. mark show mark values match_limit=>n> set a match limit memory show memory usage + newline= set newline type offset= set starting offset ovector= set size of output vector recursion_limit= set a recursion limit @@ -707,6 +708,14 @@ The effects of these modifiers are described in the following sections. FIXME: Give more examples. . . +.SS "Newline and \eR handling" +.rs +.sp +These modifiers set the newline and \eR processing conventions for the subject +line, overriding any values that were set at compile time (as described above). +JIT matching is disabled if these settings are changed at match time. +. +. .SS "Showing more text" .rs .sp @@ -1191,6 +1200,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 08 June 2014 +Last updated: 24 June 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/src/pcre2.h b/src/pcre2.h index 468802a..2e17c24 100644 --- a/src/pcre2.h +++ b/src/pcre2.h @@ -136,7 +136,9 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_DFA_RESTART 0x00000040 #define PCRE2_DFA_SHORTEST 0x00000080 -/* Newline and \R settings, for use in the compile context. */ +/* Newline and \R settings, for use in the compile and match contexts. The +newline values must be kept in step with values set in config.h and both sets +must all be greater than zero. */ #define PCRE2_NEWLINE_CR 1 #define PCRE2_NEWLINE_LF 2 @@ -361,11 +363,11 @@ PCRE2_EXP_DECL \ PCRE2_EXP_DECL \ pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \ -PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_compile_context *, \ +PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \ const unsigned char *); \ -PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_compile_context *, \ +PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \ uint32_t); \ @@ -378,10 +380,14 @@ PCRE2_EXP_DECL \ PCRE2_EXP_DECL \ pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \ + uint32_t); \ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ int (*)(pcre2_callout_block *, void *)); \ PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \ + uint32_t); \ PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ @@ -556,12 +562,14 @@ pcre2_compile are called by application code. */ #define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) #define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) #define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_) -#define pcre2_set_bsr_convention PCRE2_SUFFIX(pcre2_set_bsr_convention_) +#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_) +#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_) #define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) -#define pcre2_set_newline_convention PCRE2_SUFFIX(pcre2_set_newline_convention_) +#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_) +#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) diff --git a/src/pcre2.h.in b/src/pcre2.h.in index fa21b3f..d027ba3 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -136,7 +136,9 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_DFA_RESTART 0x00000040 #define PCRE2_DFA_SHORTEST 0x00000080 -/* Newline and \R settings, for use in the compile context. */ +/* Newline and \R settings, for use in the compile and match contexts. The +newline values must be kept in step with values set in config.h and both sets +must all be greater than zero. */ #define PCRE2_NEWLINE_CR 1 #define PCRE2_NEWLINE_LF 2 @@ -361,11 +363,11 @@ PCRE2_EXP_DECL \ PCRE2_EXP_DECL \ pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \ -PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_compile_context *, \ +PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \ const unsigned char *); \ -PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_compile_context *, \ +PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \ uint32_t); \ @@ -378,10 +380,14 @@ PCRE2_EXP_DECL \ PCRE2_EXP_DECL \ pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \ + uint32_t); \ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ int (*)(pcre2_callout_block *, void *)); \ PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \ + uint32_t); \ PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ @@ -556,12 +562,14 @@ pcre2_compile are called by application code. */ #define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) #define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) #define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_) -#define pcre2_set_bsr_convention PCRE2_SUFFIX(pcre2_set_bsr_convention_) +#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_) +#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_) #define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) -#define pcre2_set_newline_convention PCRE2_SUFFIX(pcre2_set_newline_convention_) +#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_) +#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) diff --git a/src/pcre2_context.c b/src/pcre2_context.c index 376a807..724a152 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -172,6 +172,8 @@ mcontext->stack_malloc = mcontext->malloc; mcontext->stack_free = mcontext->free; #endif mcontext->callout = NULL; +mcontext->newline_convention = 0; +mcontext->bsr_convention = 0; mcontext->match_limit = MATCH_LIMIT; mcontext->recursion_limit = MATCH_LIMIT_RECURSION; } @@ -269,8 +271,19 @@ if (mcontext != NULL) /* All these functions return 1 for success or 0 if invalid data is given. Only some of the functions are able to test the validity of the data. */ + +/* ------------ Compile contexts ------------ */ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_character_tables(pcre2_compile_context *ccontext, + const unsigned char *tables) +{ +ccontext->tables = tables; +return 1; +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_bsr_convention(pcre2_compile_context *ccontext, uint32_t value) +pcre2_set_bsr_compile(pcre2_compile_context *ccontext, uint32_t value) { switch(value) { @@ -284,18 +297,8 @@ switch(value) } } - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_character_tables(pcre2_compile_context *ccontext, - const unsigned char *tables) -{ -ccontext->tables = tables; -return 1; -} - - PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_newline_convention(pcre2_compile_context *ccontext, uint32_t newline) +pcre2_set_newline_compile(pcre2_compile_context *ccontext, uint32_t newline) { switch(newline) { @@ -312,7 +315,6 @@ switch(newline) } } - PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit) { @@ -320,7 +322,6 @@ ccontext->parens_nest_limit = limit; return 1; } - PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, int (*guard)(uint32_t)) @@ -330,6 +331,41 @@ return 1; } +/* ------------ Match contexts ------------ */ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_bsr_match(pcre2_match_context *mcontext, uint32_t value) +{ +switch(value) + { + case PCRE2_BSR_ANYCRLF: + case PCRE2_BSR_UNICODE: + mcontext->bsr_convention = value; + return 1; + + default: + return 0; + } +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_newline_match(pcre2_match_context *mcontext, uint32_t newline) +{ +switch(newline) + { + case PCRE2_NEWLINE_CR: + case PCRE2_NEWLINE_LF: + case PCRE2_NEWLINE_CRLF: + case PCRE2_NEWLINE_ANY: + case PCRE2_NEWLINE_ANYCRLF: + mcontext->newline_convention = newline; + return 1; + + default: + return 0; + } +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_callout(pcre2_match_context *mcontext, int (*callout)(pcre2_callout_block *, void *)) @@ -338,7 +374,6 @@ mcontext->callout = callout; return 1; } - PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit) { @@ -353,7 +388,6 @@ mcontext->recursion_limit = limit; return 1; } - PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_recursion_memory_management(pcre2_match_context *mcontext, void *(*mymalloc)(size_t, void *), @@ -370,5 +404,4 @@ mcontext->stack_free = myfree; return 1; } - /* End of pcre2_context.c */ diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index bc8e2bf..a02c6ac 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -558,6 +558,8 @@ typedef struct pcre2_real_match_context { void (*stack_free)(void *, void *); #endif int (*callout)(pcre2_callout_block *, void *); + uint16_t bsr_convention; + uint16_t newline_convention; uint32_t match_limit; uint32_t recursion_limit; } pcre2_real_match_context; diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 20c6ee0..67f8f9e 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -81,7 +81,6 @@ int rc = PCRE2_ERROR_NOMATCH; mcontext=mcontext;length=length; options=options; - /* Fudges for testing pcre2test */ if (subject[0] == 'Y') diff --git a/src/pcre2test.c b/src/pcre2test.c index 8f1f085..3dde9b2 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -304,7 +304,8 @@ static const char *newlines[] = { /* Modifier types and applicability */ -enum { MOD_CTC, /* Applies to a compile context */ +enum { MOD_CTB, /* Applies to a compile or a match context */ + MOD_CTC, /* Applies to a compile context */ MOD_CTM, /* Applies to a match context */ MOD_PAT, /* Applies to a pattern */ MOD_PATP, /* Ditto, OK for Perl test */ @@ -421,7 +422,7 @@ static modstruct modlist[] = { { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, - { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, + { "bsr", MOD_CTB, MOD_BSR, MO(bsr_convention), CO(bsr_convention) }, { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, @@ -455,7 +456,7 @@ static modstruct modlist[] = { { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) }, { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, - { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, + { "newline", MOD_CTB, MOD_NL, MO(newline_convention), CO(newline_convention) }, { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, { "no_start_optimize", MOD_PDP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PD(options) }, @@ -2270,6 +2271,7 @@ static void * check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c) { void *field = NULL; +size_t offset = m->offset; if (restrict_for_perl_test) switch(m->which) { @@ -2286,10 +2288,16 @@ if (restrict_for_perl_test) switch(m->which) switch (m->which) { + case MOD_CTB: /* Compile or match context modifier */ case MOD_CTC: /* Compile context modifier */ if (ctx == CTX_DEFPAT || ctx == CTX_DEFANY) field = PTR(default_pat_context); else if (ctx == CTX_PAT) field = PTR(pat_context); - break; + if (field != NULL || m->which == MOD_CTC) break; + + /* Fall through for something that can also be in a match context. In this + case the offset is taken from the other field. */ + + offset = (size_t)(m->value); case MOD_CTM: /* Match context modifier */ if (ctx == CTX_DEFDAT || ctx == CTX_DEFANY) field = PTR(default_dat_context); @@ -2324,7 +2332,7 @@ if (field == NULL) return NULL; } -return (char *)field + m->offset; +return (char *)field + offset; }