From e036c5fba0df38905d79eadfac2471507e348fbf Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Wed, 1 Oct 2014 17:02:33 +0000 Subject: [PATCH] Remove the ability to change newlines and \R at match time. --- doc/pcre2api.3 | 47 ++-------- doc/pcre2test.1 | 13 --- src/pcre2.h.in | 22 ++--- src/pcre2_context.c | 39 +------- src/pcre2_dfa_match.c | 14 +-- src/pcre2_intmodedep.h | 2 - src/pcre2_match.c | 17 +--- src/pcre2grep.c | 2 +- src/pcre2test.c | 15 +--- testdata/testinput2 | 98 ++++++++------------ testdata/testinput5 | 5 -- testdata/testinput6 | 112 +++++++++++------------ testdata/testinput7 | 6 -- testdata/testoutput2 | 183 ++++++++++++++----------------------- testdata/testoutput5 | 10 --- testdata/testoutput6 | 199 ++++++++++++++++++++++------------------- testdata/testoutput7 | 12 --- 17 files changed, 288 insertions(+), 508 deletions(-) diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 8e2d047..7a359b6 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "16 September 2014" "PCRE2 10.00" +.TH PCRE2API 3 "01 October 2014" "PCRE2 10.00" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -88,13 +88,13 @@ document for an overview of all the PCRE2 documentation. .sp .B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); .sp -.B int pcre2_set_bsr_compile(pcre2_compile_context *\fIccontext\fP, +.B int pcre2_set_bsr(pcre2_compile_context *\fIccontext\fP, .B " uint32_t \fIvalue\fP);" .sp .B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, .B " const unsigned char *\fItables\fP);" .sp -.B int pcre2_set_newline_compile(pcre2_compile_context *\fIccontext\fP, +.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, .B " uint32_t \fIvalue\fP);" .sp .B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP, @@ -117,9 +117,6 @@ document for an overview of all the PCRE2 documentation. .sp .B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); .sp -.B int pcre2_set_bsr_match(pcre2_match_context *\fImcontext\fP, -.B " uint32_t \fIvalue\fP);" -.sp .B int pcre2_set_callout(pcre2_match_context *\fImcontext\fP, .B " int (*\fIcallout_function\fP)(pcre2_callout_block *)," .B " void *\fIcallout_data\fP);" @@ -127,9 +124,6 @@ document for an overview of all the PCRE2 documentation. .B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, .B " uint32_t \fIvalue\fP);" .sp -.B int pcre2_set_newline_match(pcre2_match_context *\fImcontext\fP, -.B " uint32_t \fIvalue\fP);" -.sp .B int pcre2_set_recursion_limit(pcre2_match_context *\fImcontext\fP, .B " uint32_t \fIvalue\fP);" .sp @@ -549,7 +543,7 @@ be changed by calling the following functions, which return 0 on success, or PCRE2_ERROR_BADDATA if invalid data is detected. .sp .nf -.B int pcre2_set_bsr_compile(pcre2_compile_context *\fIccontext\fP, +.B int pcre2_set_bsr(pcre2_compile_context *\fIccontext\fP, .B " uint32_t \fIvalue\fP);" .fi .sp @@ -558,8 +552,7 @@ or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line ending sequence. The value of this parameter does not affect what is compiled; it is just saved with the compiled pattern. The value is used by the JIT compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and -\fIpcre2_dfa_match()\fP. You can change the value when calling these functions, -but doing so disables the use of JIT. +\fIpcre2_dfa_match()\fP. .sp .nf .B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, @@ -571,7 +564,7 @@ argument is a general context. This function builds a set of character tables in the current locale. .sp .nf -.B int pcre2_set_newline_compile(pcre2_compile_context *\fIccontext\fP, +.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, .B " uint32_t \fIvalue\fP);" .fi .sp @@ -585,8 +578,7 @@ When a pattern is compiled with the PCRE2_EXTENDED option, the value of this parameter affects the recognition of white space and the end of internal comments starting with #. The value is saved with the compiled pattern for subsequent use by the JIT compiler and by the two interpreted matching -functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP. You can change the -value when calling these functions, but doing so disables the use of JIT. +functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP. .sp .nf .B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP, @@ -647,16 +639,6 @@ be changed by calling the following functions, which return 0 on success, or PCRE2_ERROR_BADDATA if invalid data is detected. .sp .nf -.B int pcre2_set_bsr_match(pcre2_match_context *\fImcontext\fP, -.B " uint32_t \fIvalue\fP);" -.fi -.sp -The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF, -or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line -ending sequence. If you want to make use of JIT matching, you should not use -this function, but instead set the value in a compile context. -.sp -.nf .B int pcre2_set_callout(pcre2_match_context *\fImcontext\fP, .B " int (*\fIcallout_function\fP)(pcre2_callout_block *)," .B " void *\fIcallout_data\fP);" @@ -736,19 +718,6 @@ less than the limit set by the caller of \fBpcre2_match()\fP or, if no such limit is set, less than the default. .sp .nf -.B int pcre2_set_newline_match(pcre2_match_context *\fImcontext\fP, -.B " uint32_t \fIvalue\fP);" -.fi -.sp -This specifies which characters or character sequences are to be recognized as -newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), -PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character -sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or -PCRE2_NEWLINE_ANY (any Unicode newline sequence). If you want to make use of -JIT matching, you should not use this function, but instead set the value in a -compile context. -.sp -.nf .B int pcre2_set_recursion_memory_management( .B " pcre2_match_context *\fImcontext\fP," .B " void *(*\fIprivate_malloc\fP)(PCRE2_SIZE, void *)," @@ -2683,6 +2652,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 16 September 2014 +Last updated: 01 October 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index bb71fbc..dcd0930 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -458,9 +458,6 @@ is built, with the default default being Unicode. The \fBnewline\fP modifier specifies which characters are to be interpreted as newlines, both in the pattern and (by default) in subject lines. The type must be one of CR, LF, CRLF, ANYCRLF, or ANY. -.P -Both the \eR and newline settings can be changed at match time, but if this is -done, JIT matching is disabled. . . .SS "Information about a pattern" @@ -693,7 +690,6 @@ pattern. allcaptures show all captures allusedtext show all consulted text altglobal alternative global matching - bsr=[anycrlf|unicode] specify \eR handling callout_capture show captures at callout time callout_data= set a value to pass via callouts callout_fail=[:] control callout failure @@ -709,7 +705,6 @@ pattern. mark show mark values match_limit=>n> set a match limit memory show memory usage - newline= set newline type offset= set starting offset ovector= set size of output vector recursion_limit= set a recursion limit @@ -718,14 +713,6 @@ The effects of these modifiers are described in the following sections. FIXME: Give more examples. . . -.SS "Newline and \eR handling" -.rs -.sp -These modifiers set the newline and \eR processing conventions for the subject -line, overriding any values that were set at compile time (as described above). -JIT matching is disabled if these settings are changed at match time. -. -. .SS "Showing more text" .rs .sp diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 8ee1776..266d889 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -140,9 +140,9 @@ functions, so take care not to define synonyms by mistake. */ #define PCRE2_DFA_RESTART 0x00000040u #define PCRE2_DFA_SHORTEST 0x00000080u -/* Newline and \R settings, for use in the compile and match contexts. The -newline values must be kept in step with values set in config.h and both sets -must all be greater than zero. */ +/* Newline and \R settings, for use in compile contexts. The newline values +must be kept in step with values set in config.h and both sets must all be +greater than zero. */ #define PCRE2_NEWLINE_CR 1 #define PCRE2_NEWLINE_LF 2 @@ -356,12 +356,10 @@ PCRE2_EXP_DECL \ PCRE2_EXP_DECL \ pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \ -PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \ - uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \ const unsigned char *); \ -PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \ - uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\ @@ -373,14 +371,10 @@ PCRE2_EXP_DECL \ PCRE2_EXP_DECL \ pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \ -PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \ - uint32_t); \ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ int (*)(pcre2_callout_block *), void *); \ PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \ - uint32_t); \ PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ @@ -552,14 +546,12 @@ pcre2_compile are called by application code. */ #define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) #define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) #define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) -#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_) -#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_) +#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_) #define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) -#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_) -#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_) +#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) diff --git a/src/pcre2_context.c b/src/pcre2_context.c index f4e592a..6856f2d 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -169,8 +169,6 @@ mcontext->stack_memctl = mcontext->memctl; #endif mcontext->callout = NULL; mcontext->callout_data = NULL; -mcontext->newline_convention = 0; -mcontext->bsr_convention = 0; mcontext->match_limit = MATCH_LIMIT; mcontext->recursion_limit = MATCH_LIMIT_RECURSION; } @@ -279,7 +277,7 @@ return 0; } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_bsr_compile(pcre2_compile_context *ccontext, uint32_t value) +pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value) { switch(value) { @@ -294,7 +292,7 @@ switch(value) } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_newline_compile(pcre2_compile_context *ccontext, uint32_t newline) +pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline) { switch(newline) { @@ -329,39 +327,6 @@ return 0; /* ------------ Match contexts ------------ */ -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_bsr_match(pcre2_match_context *mcontext, uint32_t value) -{ -switch(value) - { - case PCRE2_BSR_ANYCRLF: - case PCRE2_BSR_UNICODE: - mcontext->bsr_convention = value; - return 0; - - default: - return PCRE2_ERROR_BADDATA; - } -} - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_newline_match(pcre2_match_context *mcontext, uint32_t newline) -{ -switch(newline) - { - case PCRE2_NEWLINE_CR: - case PCRE2_NEWLINE_LF: - case PCRE2_NEWLINE_CRLF: - case PCRE2_NEWLINE_ANY: - case PCRE2_NEWLINE_ANYCRLF: - mcontext->newline_convention = newline; - return 0; - - default: - return PCRE2_ERROR_BADDATA; - } -} - PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_callout(pcre2_match_context *mcontext, int (*callout)(pcre2_callout_block *), void *callout_data) diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 3b3c3ba..d7168b0 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -3069,7 +3069,6 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, { const pcre2_real_code *re = (const pcre2_real_code *)code; pcre2_match_context default_context; /* For use if no context given */ -int newline; PCRE2_SPTR start_match; PCRE2_SPTR end_subject; @@ -3203,18 +3202,11 @@ mb->start_offset = start_offset; mb->moptions = options; mb->poptions = re->overall_options; -/* The match context /R convention, if set, overrides. */ - -mb->bsr_convention = (mcontext->bsr_convention != 0)? - mcontext->bsr_convention : re->bsr_convention; - -/* Process the newline setting. */ - -newline = (mcontext->newline_convention == 0)? - re->newline_convention : mcontext->newline_convention; +/* Process the \R and newline settings. */ +mb->bsr_convention = re->bsr_convention; mb->nltype = NLTYPE_FIXED; -switch(newline) +switch(re->newline_convention) { case PCRE2_NEWLINE_CR: mb->nllen = 1; diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index f0ee6f7..a881132 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -570,8 +570,6 @@ typedef struct pcre2_real_match_context { #endif int (*callout)(pcre2_callout_block *); void *callout_data; - uint16_t bsr_convention; - uint16_t newline_convention; uint32_t match_limit; uint32_t recursion_limit; } pcre2_real_match_context; diff --git a/src/pcre2_match.c b/src/pcre2_match.c index e0feb53..07b5855 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -6346,7 +6346,6 @@ pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, pcre2_match_context *mcontext) { int rc; -int newline; int ocount; const uint8_t *start_bits = NULL; @@ -6482,8 +6481,7 @@ an unsupported option is set or if JIT returns BADOPTION (which means that the selected normal or partial matching mode was not compiled). */ #ifdef SUPPORT_JIT -if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0 && - mcontext->bsr_convention == 0 && mcontext->newline_convention == 0) +if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0) { rc = pcre2_jit_match(code, subject, length, start_offset, options, match_data, mcontext, NULL); @@ -6553,18 +6551,11 @@ mb->lcc = re->tables + lcc_offset; mb->fcc = re->tables + fcc_offset; mb->ctypes = re->tables + ctypes_offset; -/* The match context /R convention, if set, overrides. */ - -mb->bsr_convention = (mcontext->bsr_convention != 0)? - mcontext->bsr_convention : re->bsr_convention; - -/* Process the newline setting. */ - -newline = (mcontext->newline_convention == 0)? - re->newline_convention : mcontext->newline_convention; +/* Process the \R and newline settings. */ +mb->bsr_convention = re->bsr_convention; mb->nltype = NLTYPE_FIXED; -switch(newline) +switch(re->newline_convention) { case PCRE2_NEWLINE_CR: mb->nllen = 1; diff --git a/src/pcre2grep.c b/src/pcre2grep.c index 71022ac..003d8fb 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -3013,7 +3013,7 @@ if (newline_arg != NULL) if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break; } if (endlinetype < (int)(sizeof(newlines)/sizeof(char *))) - pcre2_set_newline_compile(compile_context, endlinetype); + pcre2_set_newline(compile_context, endlinetype); else { fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n", diff --git a/src/pcre2test.c b/src/pcre2test.c index cd1d1b5..7c1f2ae 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -296,8 +296,7 @@ static const char *newlines[] = { /* Modifier types and applicability */ -enum { MOD_CTB, /* Applies to a compile or a match context */ - MOD_CTC, /* Applies to a compile context */ +enum { MOD_CTC, /* Applies to a compile context */ MOD_CTM, /* Applies to a match context */ MOD_PAT, /* Applies to a pattern */ MOD_PATP, /* Ditto, OK for Perl test */ @@ -424,7 +423,7 @@ static modstruct modlist[] = { { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, - { "bsr", MOD_CTB, MOD_BSR, MO(bsr_convention), CO(bsr_convention) }, + { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) }, { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, @@ -458,7 +457,7 @@ static modstruct modlist[] = { { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) }, { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, - { "newline", MOD_CTB, MOD_NL, MO(newline_convention), CO(newline_convention) }, + { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, @@ -2529,16 +2528,10 @@ if (restrict_for_perl_test) switch(m->which) switch (m->which) { - case MOD_CTB: /* Compile or match context modifier */ case MOD_CTC: /* Compile context modifier */ if (ctx == CTX_DEFPAT) field = PTR(default_pat_context); else if (ctx == CTX_PAT) field = PTR(pat_context); - if (field != NULL || m->which == MOD_CTC) break; - - /* Fall through for something that can also be in a match context. In this - case the offset is taken from the other field. */ - - offset = (PCRE2_SIZE)(m->value); + break; case MOD_CTM: /* Match context modifier */ if (ctx == CTX_DEFDAT) field = PTR(default_dat_context); diff --git a/testdata/testinput2 b/testdata/testinput2 index d986f3d..632abe2 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -1540,67 +1540,44 @@ a random value. /Ix \x0b,\x0b \x0c,\x0d -/^abc/Im +/^abc/Im,newline=lf xyz\nabc - xyz\nabc>\=newline=lf - xyz\r\nabc>\=newline=lf - xyz\rabc>\=newline=cr - xyz\r\nabclf>\=newline=crlf - ** Failers - xyz\nabc>\=newline=cr - xyz\r\nabc>\=newline=cr - xyz\nabclf>\=newline=crlf - xyz\rabclf>\=newline=crlf - xyz\rabc>\=newline=lf - -/abc$/Im,newline=lf - xyzabc - xyzabc\n - xyzabc\npqr - xyzabc\r>\=newline=cr - xyzabc\rpqr>\=newline=cr - xyzabc\r\nlf>\=newline=crlf - xyzabc\r\npqrlf>\=newline=crlf + xyz\r\nabc ** Failers + xyz\rabc xyzabc\r xyzabc\rpqr xyzabc\r\n xyzabc\r\npqr -/^abc/Im,newline=cr - xyz\rabcdef - xyz\nabcdef>\=newline=lf - ** Failers - xyz\nabcdef - -/^abc/Im,newline=lf - xyz\nabcdef - xyz\rabcdef>\=newline=cr - ** Failers - xyz\rabcdef - /^abc/Im,newline=crlf - xyz\r\nabcdef - xyz\rabcdef>\=newline=cr + xyz\r\nabclf> ** Failers - xyz\rabcdef + xyz\nabclf + xyz\rabclf + +/^abc/Im,newline=cr + xyz\rabc + ** Failers + xyz\nabc + xyz\r\nabc /^abc/Im,newline=bad -/abc/I - xyz\rabc\=newline=bad - abc - /.*/I,newline=lf abc\ndef abc\rdef abc\r\ndef - abc\ndef\=newline=cr - abc\rdef\=newline=cr - abc\r\ndef\=newline=cr - abc\ndef\=newline=crlf - abc\rdef\=newline=crlf - abc\r\ndef\=newline=crlf + +/.*/I,newline=cr + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=crlf + abc\ndef + abc\rdef + abc\r\ndef /\w+(.)(.)?def/Is abc\ndef @@ -1920,15 +1897,24 @@ a random value. /Ix /^a.b/newline=lf a\rb - a\nb>\=newline=cr - a\x85b>\=newline=anycrlf ** Failers a\nb - a\nb\=newline=any - a\rb>\=newline=cr - a\rb\=newline=any - a\x85b\=newline=any - a\rb>\=newline=anycrlf + +/^a.b/newline=cr + a\nb + ** Failers + a\rb + +/^a.b/newline=anycrlf + a\x85b + ** Failers + a\rb + +/^a.b/newline=any + ** Failers + a\nb + a\rb + a\x85b /^abc./gmx,newline=any abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK @@ -2281,9 +2267,6 @@ a random value. /Ix a\r\nb a\x85b a\x0bb - ** Failers - a\x85b\=bsr=anycrlf - a\x0bb\=bsr=anycrlf /a\R?b/I,bsr=anycrlf a\rb @@ -2299,9 +2282,6 @@ a random value. /Ix a\r\nb a\x85b a\x0bb - ** Failers - a\x85b\=bsr=anycrlf - a\x0bb\=bsr=anycrlf /a\R{2,4}b/I,bsr=anycrlf a\r\n\nb @@ -2319,8 +2299,6 @@ a random value. /Ix a\x0b\x0bb ** Failers a\r\r\r\r\rb - a\x85\x85b\=bsr=anycrlf - a\x0b\x0bb\=bsr=anycrlf /(*BSR_ANYCRLF)a\Rb/I a\nb diff --git a/testdata/testinput5 b/testdata/testinput5 index b394445..eff10f9 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -310,9 +310,6 @@ a\r\nb a\x{85}b a\x0bb - ** Failers - a\x{85}b\=bsr=anycrlf - a\x0bb\=bsr=anycrlf /a\R?b/I,bsr=anycrlf,utf a\rb @@ -329,8 +326,6 @@ a\x{85}b a\x0bb ** Failers - a\x{85}b\=bsr=anycrlf - a\x0bb\=bsr=anycrlf /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR diff --git a/testdata/testinput6 b/testdata/testinput6 index 3da45de..d748136 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4003,61 +4003,47 @@ /Content-Type\x3A[^a]{6,}z/ Content-Type:xxxyyyz -/^abc/m +/^abc/Im,newline=lf xyz\nabc - xyz\nabc\=newline=lf - xyz\r\nabc\=newline=lf - xyz\rabc\=newline=cr - xyz\r\nabc\=newline=crlf - ** Failers - xyz\nabc\=newline=cr - xyz\r\nabc\=newline=cr - xyz\nabc\=newline=crlf - xyz\rabc\=newline=crlf - xyz\rabc\=newline=lf - -/abc$/m,newline=lf - xyzabc - xyzabc\n - xyzabc\npqr - xyzabc\r\=newline=cr - xyzabc\rpqr\=newline=cr - xyzabc\r\n\=newline=crlf - xyzabc\r\npqr\=newline=crlf + xyz\r\nabc ** Failers - xyzabc\r - xyzabc\rpqr - xyzabc\r\n - xyzabc\r\npqr + xyz\rabc + xyzabc\r + xyzabc\rpqr + xyzabc\r\n + xyzabc\r\npqr + +/^abc/Im,newline=crlf + xyz\r\nabclf> + ** Failers + xyz\nabclf + xyz\rabclf -/^abc/m,newline=cr - xyz\rabcdef - xyz\nabcdef\=newline=lf - ** Failers - xyz\nabcdef - -/^abc/m,newline=lf - xyz\nabcdef - xyz\rabcdef\=newline=cr - ** Failers - xyz\rabcdef - -/^abc/m,newline=crlf - xyz\r\nabcdef - xyz\rabcdef\=newline=cr - ** Failers - xyz\rabcdef - -/.*/newline=lf +/^abc/Im,newline=cr + xyz\rabc + ** Failers + xyz\nabc + xyz\r\nabc + +/.*/I,newline=lf + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=cr + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=crlf + abc\ndef + abc\rdef + abc\r\ndef + +/\w+(.)(.)?def/Is abc\ndef abc\rdef abc\r\ndef - abc\ndef\=newline=cr - abc\rdef\=newline=cr - abc\r\ndef\=newline=cr - abc\ndef\=newline=crlf - abc\rdef\=newline=crlf - abc\r\ndef\=newline=crlf /\w+(.)(.)?def/s abc\ndef @@ -4093,12 +4079,24 @@ /^a.b/newline=lf a\rb - a\nb\=newline=cr ** Failers a\nb - a\nb\=newline=any - a\rb\=newline=cr - a\rb\=newline=any + +/^a.b/newline=cr + a\nb + ** Failers + a\rb + +/^a.b/newline=anycrlf + a\x85b + ** Failers + a\rb + +/^a.b/newline=any + ** Failers + a\nb + a\rb + a\x85b /^abc./gmx,newline=any abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK @@ -4269,9 +4267,6 @@ a\r\nb a\x85b a\x0bb - ** Failers - a\x85b\=bsr=anycrlf - a\x0bb\=bsr=anycrlf /a\R?b/I,bsr=anycrlf a\rb @@ -4287,9 +4282,6 @@ a\r\nb a\x85b a\x0bb - ** Failers - a\x85b\=bsr=anycrlf - a\x0bb\=bsr=anycrlf /a\R{2,4}b/I,bsr=anycrlf a\r\n\nb @@ -4307,8 +4299,6 @@ a\x0b\0bb ** Failers a\r\r\r\r\rb - a\x85\x85b\=bsr=anycrlf - a\x0b\0bb\=bsr=anycrlf /a(?!)|\wbc/ abc diff --git a/testdata/testinput7 b/testdata/testinput7 index 2faacea..879a414 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -638,9 +638,6 @@ a\r\nb a\x{85}b a\x0bb - ** Failers - a\x{85}b\=bsr=anycrlf - a\x0bb\=bsr=anycrlf /a\R?b/I,bsr=anycrlf,utf a\rb @@ -656,9 +653,6 @@ a\r\nb a\x{85}b a\x0bb - ** Failers - a\x{85}b\=bsr=anycrlf - a\x0bb\=bsr=anycrlf /X/newline=any,utf,firstline A\x{1ec5}ABCXYZ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 8a0e7c4..aed60b7 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -6024,57 +6024,20 @@ Subject length lower bound = 1 \x0c,\x0d 0: \x0c,\x0d -/^abc/Im +/^abc/Im,newline=lf Capturing subpattern count = 0 Options: multiline +Forced newline is LF First code unit at start or follows newline Last code unit = 'c' Subject length lower bound = 3 xyz\nabc 0: abc - xyz\nabc>\=newline=lf - 0: abc - xyz\r\nabc>\=newline=lf - 0: abc - xyz\rabc>\=newline=cr - 0: abc - xyz\r\nabclf>\=newline=crlf + xyz\r\nabc 0: abc ** Failers No match - xyz\nabc>\=newline=cr -No match - xyz\r\nabc>\=newline=cr -No match - xyz\nabclf>\=newline=crlf -No match - xyz\rabclf>\=newline=crlf -No match - xyz\rabc>\=newline=lf -No match - -/abc$/Im,newline=lf -Capturing subpattern count = 0 -Options: multiline -Forced newline is LF -First code unit = 'a' -Last code unit = 'c' -Subject length lower bound = 3 - xyzabc - 0: abc - xyzabc\n - 0: abc - xyzabc\npqr - 0: abc - xyzabc\r>\=newline=cr - 0: abc - xyzabc\rpqr>\=newline=cr - 0: abc - xyzabc\r\nlf>\=newline=crlf - 0: abc - xyzabc\r\npqrlf>\=newline=crlf - 0: abc - ** Failers + xyz\rabc No match xyzabc\r No match @@ -6085,38 +6048,6 @@ No match xyzabc\r\npqr No match -/^abc/Im,newline=cr -Capturing subpattern count = 0 -Options: multiline -Forced newline is CR -First code unit at start or follows newline -Last code unit = 'c' -Subject length lower bound = 3 - xyz\rabcdef - 0: abc - xyz\nabcdef>\=newline=lf - 0: abc - ** Failers -No match - xyz\nabcdef -No match - -/^abc/Im,newline=lf -Capturing subpattern count = 0 -Options: multiline -Forced newline is LF -First code unit at start or follows newline -Last code unit = 'c' -Subject length lower bound = 3 - xyz\nabcdef - 0: abc - xyz\rabcdef>\=newline=cr - 0: abc - ** Failers -No match - xyz\rabcdef -No match - /^abc/Im,newline=crlf Capturing subpattern count = 0 Options: multiline @@ -6124,28 +6055,34 @@ Forced newline is CRLF First code unit at start or follows newline Last code unit = 'c' Subject length lower bound = 3 - xyz\r\nabcdef - 0: abc - xyz\rabcdef>\=newline=cr + xyz\r\nabclf> 0: abc ** Failers No match - xyz\rabcdef + xyz\nabclf +No match + xyz\rabclf +No match + +/^abc/Im,newline=cr +Capturing subpattern count = 0 +Options: multiline +Forced newline is CR +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\rabc + 0: abc + ** Failers +No match + xyz\nabc +No match + xyz\r\nabc No match /^abc/Im,newline=bad ** Invalid value in 'newline=bad' -/abc/I -Capturing subpattern count = 0 -First code unit = 'a' -Last code unit = 'c' -Subject length lower bound = 3 - xyz\rabc\=newline=bad -** Invalid value in 'newline=bad' - abc - 0: abc - /.*/I,newline=lf Capturing subpattern count = 0 May match empty string @@ -6158,17 +6095,31 @@ Subject length lower bound = 0 0: abc\x0ddef abc\r\ndef 0: abc\x0d - abc\ndef\=newline=cr + +/.*/I,newline=cr +Capturing subpattern count = 0 +May match empty string +Forced newline is CR +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef 0: abc\x0adef - abc\rdef\=newline=cr + abc\rdef 0: abc - abc\r\ndef\=newline=cr + abc\r\ndef 0: abc - abc\ndef\=newline=crlf + +/.*/I,newline=crlf +Capturing subpattern count = 0 +May match empty string +Forced newline is CRLF +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef 0: abc\x0adef - abc\rdef\=newline=crlf + abc\rdef 0: abc\x0ddef - abc\r\ndef\=newline=crlf + abc\r\ndef 0: abc /\w+(.)(.)?def/Is @@ -7457,23 +7408,35 @@ Matched, but too many substrings /^a.b/newline=lf a\rb 0: a\x0db - a\nb>\=newline=cr - 0: a\x0ab - a\x85b>\=newline=anycrlf - 0: a\x85b ** Failers No match a\nb No match - a\nb\=newline=any + +/^a.b/newline=cr + a\nb + 0: a\x0ab + ** Failers No match - a\rb>\=newline=cr + a\rb No match - a\rb\=newline=any + +/^a.b/newline=anycrlf + a\x85b + 0: a\x85b + ** Failers No match - a\x85b\=newline=any + a\rb No match - a\rb>\=newline=anycrlf + +/^a.b/newline=any + ** Failers +No match + a\nb +No match + a\rb +No match + a\x85b No match /^abc./gmx,newline=any @@ -8499,12 +8462,6 @@ Subject length lower bound = 3 0: a\x85b a\x0bb 0: a\x0bb - ** Failers -No match - a\x85b\=bsr=anycrlf -No match - a\x0bb\=bsr=anycrlf -No match /a\R?b/I,bsr=anycrlf Capturing subpattern count = 0 @@ -8541,12 +8498,6 @@ Subject length lower bound = 2 0: a\x85b a\x0bb 0: a\x0bb - ** Failers -No match - a\x85b\=bsr=anycrlf -No match - a\x0bb\=bsr=anycrlf -No match /a\R{2,4}b/I,bsr=anycrlf Capturing subpattern count = 0 @@ -8587,10 +8538,6 @@ Subject length lower bound = 4 No match a\r\r\r\r\rb No match - a\x85\x85b\=bsr=anycrlf -No match - a\x0b\x0bb\=bsr=anycrlf -No match /(*BSR_ANYCRLF)a\Rb/I Capturing subpattern count = 0 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 225556f..d0f3bef 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -779,12 +779,6 @@ Subject length lower bound = 3 0: a\x{85}b a\x0bb 0: a\x{0b}b - ** Failers -No match - a\x{85}b\=bsr=anycrlf -No match - a\x0bb\=bsr=anycrlf -No match /a\R?b/I,bsr=anycrlf,utf Capturing subpattern count = 0 @@ -825,10 +819,6 @@ Subject length lower bound = 2 0: a\x{0b}b ** Failers No match - a\x{85}b\=bsr=anycrlf -No match - a\x0bb\=bsr=anycrlf -No match /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 6ebb69e..9dd1fdc 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -6226,106 +6226,115 @@ Partial match: 123 Content-Type:xxxyyyz 0: Content-Type:xxxyyyz -/^abc/m +/^abc/Im,newline=lf +Capturing subpattern count = 0 +Options: multiline +Forced newline is LF +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 xyz\nabc 0: abc - xyz\nabc\=newline=lf - 0: abc - xyz\r\nabc\=newline=lf - 0: abc - xyz\rabc\=newline=cr - 0: abc - xyz\r\nabc\=newline=crlf - 0: abc - ** Failers -No match - xyz\nabc\=newline=cr -No match - xyz\r\nabc\=newline=cr -No match - xyz\nabc\=newline=crlf -No match - xyz\rabc\=newline=crlf -No match - xyz\rabc\=newline=lf -No match - -/abc$/m,newline=lf - xyzabc - 0: abc - xyzabc\n - 0: abc - xyzabc\npqr - 0: abc - xyzabc\r\=newline=cr - 0: abc - xyzabc\rpqr\=newline=cr - 0: abc - xyzabc\r\n\=newline=crlf - 0: abc - xyzabc\r\npqr\=newline=crlf + xyz\r\nabc 0: abc ** Failers No match - xyzabc\r + xyz\rabc No match - xyzabc\rpqr + xyzabc\r No match - xyzabc\r\n + xyzabc\rpqr No match - xyzabc\r\npqr + xyzabc\r\n +No match + xyzabc\r\npqr +No match + +/^abc/Im,newline=crlf +Capturing subpattern count = 0 +Options: multiline +Forced newline is CRLF +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\r\nabclf> + 0: abc + ** Failers +No match + xyz\nabclf +No match + xyz\rabclf No match -/^abc/m,newline=cr - xyz\rabcdef +/^abc/Im,newline=cr +Capturing subpattern count = 0 +Options: multiline +Forced newline is CR +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\rabc 0: abc - xyz\nabcdef\=newline=lf - 0: abc - ** Failers + ** Failers No match - xyz\nabcdef + xyz\nabc No match - -/^abc/m,newline=lf - xyz\nabcdef - 0: abc - xyz\rabcdef\=newline=cr - 0: abc - ** Failers + xyz\r\nabc No match - xyz\rabcdef -No match - -/^abc/m,newline=crlf - xyz\r\nabcdef - 0: abc - xyz\rabcdef\=newline=cr - 0: abc - ** Failers -No match - xyz\rabcdef -No match - -/.*/newline=lf + +/.*/I,newline=lf +Capturing subpattern count = 0 +May match empty string +Forced newline is LF +First code unit at start or follows newline +Subject length lower bound = 0 abc\ndef 0: abc abc\rdef 0: abc\x0ddef abc\r\ndef 0: abc\x0d - abc\ndef\=newline=cr + +/.*/I,newline=cr +Capturing subpattern count = 0 +May match empty string +Forced newline is CR +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef 0: abc\x0adef - abc\rdef\=newline=cr + abc\rdef 0: abc - abc\r\ndef\=newline=cr + abc\r\ndef 0: abc - abc\ndef\=newline=crlf + +/.*/I,newline=crlf +Capturing subpattern count = 0 +May match empty string +Forced newline is CRLF +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef 0: abc\x0adef - abc\rdef\=newline=crlf + abc\rdef 0: abc\x0ddef - abc\r\ndef\=newline=crlf + abc\r\ndef 0: abc +/\w+(.)(.)?def/Is +Capturing subpattern count = 2 +Options: dotall +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Last code unit = 'f' +Subject length lower bound = 5 + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc\x0d\x0adef + /\w+(.)(.)?def/s abc\ndef 0: abc\x0adef @@ -6397,17 +6406,35 @@ No match /^a.b/newline=lf a\rb 0: a\x0db - a\nb\=newline=cr - 0: a\x0ab ** Failers No match a\nb No match - a\nb\=newline=any + +/^a.b/newline=cr + a\nb + 0: a\x0ab + ** Failers No match - a\rb\=newline=cr + a\rb No match - a\rb\=newline=any + +/^a.b/newline=anycrlf + a\x85b + 0: a\x85b + ** Failers +No match + a\rb +No match + +/^a.b/newline=any + ** Failers +No match + a\nb +No match + a\rb +No match + a\x85b No match /^abc./gmx,newline=any @@ -6718,12 +6745,6 @@ Subject length lower bound = 3 0: a\x85b a\x0bb 0: a\x0bb - ** Failers -No match - a\x85b\=bsr=anycrlf -No match - a\x0bb\=bsr=anycrlf -No match /a\R?b/I,bsr=anycrlf Capturing subpattern count = 0 @@ -6760,12 +6781,6 @@ Subject length lower bound = 2 0: a\x85b a\x0bb 0: a\x0bb - ** Failers -No match - a\x85b\=bsr=anycrlf -No match - a\x0bb\=bsr=anycrlf -No match /a\R{2,4}b/I,bsr=anycrlf Capturing subpattern count = 0 @@ -6806,10 +6821,6 @@ No match No match a\r\r\r\r\rb No match - a\x85\x85b\=bsr=anycrlf -No match - a\x0b\0bb\=bsr=anycrlf -No match /a(?!)|\wbc/ abc diff --git a/testdata/testoutput7 b/testdata/testoutput7 index b3ebbb1..34ead75 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -1139,12 +1139,6 @@ Subject length lower bound = 3 0: a\x{85}b a\x0bb 0: a\x{0b}b - ** Failers -No match - a\x{85}b\=bsr=anycrlf -No match - a\x0bb\=bsr=anycrlf -No match /a\R?b/I,bsr=anycrlf,utf Capturing subpattern count = 0 @@ -1183,12 +1177,6 @@ Subject length lower bound = 2 0: a\x{85}b a\x0bb 0: a\x{0b}b - ** Failers -No match - a\x{85}b\=bsr=anycrlf -No match - a\x0bb\=bsr=anycrlf -No match /X/newline=any,utf,firstline A\x{1ec5}ABCXYZ