diff --git a/HACKING b/HACKING index 5c77601..9fd20c9 100644 --- a/HACKING +++ b/HACKING @@ -677,17 +677,9 @@ repetition is zero, the group is preceded by OP_BRAPOSZERO. Once-only (atomic) groups ------------------------- -These are just like other subpatterns, but they start with the opcode -OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets -within the atomic group; the latter when there are. The distinction is needed -for when there is a backtrack to before the group - any captures within the -group must be reset, so it is necessary to retain backtracking points inside -the group, even after it is complete, in order to do this. When there are no -captures in an atomic group, all the backtracking can be discarded when it is -complete. This is more efficient, and also uses less stack. - +These are just like other subpatterns, but they start with the opcode OP_ONCE. The check for matching an empty string in an unbounded repeat is handled -entirely at runtime, so there are just these two opcodes for atomic groups. +entirely at runtime, so there are just this one opcode for atomic groups. Assertions @@ -795,4 +787,4 @@ not a real opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors. Philip Hazel -November 2016 +March 2017 diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index 64ec6df..98b948a 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -588,7 +588,6 @@ for(;;) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - case OP_ONCE_NC: /* Atomic sub-patterns and assertions can always auto-possessify their last iterator. However, if the group was entered as a result of checking @@ -601,7 +600,6 @@ for(;;) continue; case OP_ONCE: - case OP_ONCE_NC: case OP_BRA: case OP_CBRA: next_code = code + GET(code, 1); @@ -625,8 +623,8 @@ for(;;) case OP_BRAMINZERO: next_code = code + 1; - if (*next_code != OP_BRA && *next_code != OP_CBRA - && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE; + if (*next_code != OP_BRA && *next_code != OP_CBRA && + *next_code != OP_ONCE) return FALSE; do next_code += GET(next_code, 1); while (*next_code == OP_ALT); diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 917159a..1503911 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -4746,7 +4746,6 @@ for (;; pptr++) int class_has_8bitchar; int i; uint32_t mclength; - uint32_t templastcapture; uint32_t skipunits; uint32_t subreqcu, subfirstcu; uint32_t groupnumber; @@ -5753,7 +5752,6 @@ for (;; pptr++) pptr++; tempcode = code; tempreqvary = cb->req_varyopt; /* Save value before group */ - templastcapture = cb->lastcapture; /* Save value before group */ length_prevgroup = 0; /* Initialize for pre-compile phase */ if ((group_return = @@ -5783,12 +5781,6 @@ for (;; pptr++) if (note_group_empty && bravalue != OP_COND && group_return > 0) matched_char = TRUE; - /* If that was an atomic group and there are no capturing groups within it, - generate OP_ONCE_NC instead of OP_ONCE. */ - - if (bravalue == OP_ONCE && cb->lastcapture <= templastcapture) - *code = OP_ONCE_NC; - /* If we've just compiled an assertion, pop the assert depth. */ if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT) @@ -6376,7 +6368,6 @@ for (;; pptr++) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - case OP_ONCE_NC: case OP_BRA: case OP_CBRA: case OP_COND: @@ -6620,14 +6611,12 @@ for (;; pptr++) /* Convert possessive ONCE brackets to non-capturing */ - if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && - possessive_quantifier) *bracode = OP_BRA; + if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA; /* For non-possessive ONCE brackets, all we need to do is to set the KET. */ - if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) - *ketcode = OP_KETRMAX + repeat_type; + if (*bracode == OP_ONCE) *ketcode = OP_KETRMAX + repeat_type; /* Handle non-ONCE brackets and possessive ONCEs (which have been converted to non-capturing above). */ @@ -7621,7 +7610,7 @@ do { /* Atomic groups */ - else if (op == OP_ONCE || op == OP_ONCE_NC) + else if (op == OP_ONCE) { if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert)) return FALSE; @@ -7751,7 +7740,7 @@ do { /* Atomic brackets */ - else if (op == OP_ONCE || op == OP_ONCE_NC) + else if (op == OP_ONCE) { if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert)) return FALSE; @@ -7773,9 +7762,8 @@ do { } /* Check for explicit circumflex; anything else gives a FALSE result. Note - in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC - because the number of characters matched by .* cannot be adjusted inside - them. */ + in particular that this includes atomic brackets OP_ONCE because the number + of characters matched by .* cannot be adjusted inside them. */ else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; @@ -7986,7 +7974,6 @@ do { case OP_SCBRAPOS: case OP_ASSERT: case OP_ONCE: - case OP_ONCE_NC: d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT); if (dflags < 0) return 0; diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index c909d61..471e159 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -172,7 +172,7 @@ static const uint8_t coptable[] = { 0, /* Assert not */ 0, /* Assert behind */ 0, /* Assert behind not */ - 0, 0, /* ONCE, ONCE_NC */ + 0, /* ONCE */ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ 0, 0, /* CREF, DNCREF */ @@ -245,7 +245,7 @@ static const uint8_t poptable[] = { 0, /* Assert not */ 0, /* Assert behind */ 0, /* Assert behind not */ - 0, 0, /* ONCE, ONCE_NC */ + 0, /* ONCE */ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ 0, 0, /* CREF, DNCREF */ @@ -2889,7 +2889,6 @@ for (;;) /*-----------------------------------------------------------------*/ case OP_ONCE: - case OP_ONCE_NC: { PCRE2_SIZE local_offsets[2]; int local_workspace[1000]; diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 2f6d834..654cc62 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -1510,68 +1510,67 @@ enum { OP_ASSERTBACK, /* 128 Positive lookbehind */ OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */ - /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately - after the assertions, with ONCE first, as there's a test for >= ONCE for a - subpattern that isn't an assertion. The POS versions must immediately follow - the non-POS versions in each case. */ + /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the + assertions, with ONCE first, as there's a test for >= ONCE for a subpattern + that isn't an assertion. The POS versions must immediately follow the non-POS + versions in each case. */ OP_ONCE, /* 130 Atomic group, contains captures */ - OP_ONCE_NC, /* 131 Atomic group containing no captures */ - OP_BRA, /* 132 Start of non-capturing bracket */ - OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */ - OP_CBRA, /* 134 Start of capturing bracket */ - OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */ - OP_COND, /* 136 Conditional group */ + OP_BRA, /* 131 Start of non-capturing bracket */ + OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */ + OP_CBRA, /* 133 Start of capturing bracket */ + OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */ + OP_COND, /* 135 Conditional group */ /* These five must follow the previous five, in the same order. There's a check for >= SBRA to distinguish the two sets. */ - OP_SBRA, /* 137 Start of non-capturing bracket, check empty */ - OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */ - OP_SCBRA, /* 139 Start of capturing bracket, check empty */ - OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */ - OP_SCOND, /* 141 Conditional group, check empty */ + OP_SBRA, /* 136 Start of non-capturing bracket, check empty */ + OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */ + OP_SCBRA, /* 138 Start of capturing bracket, check empty */ + OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */ + OP_SCOND, /* 140 Conditional group, check empty */ /* The next two pairs must (respectively) be kept together. */ - OP_CREF, /* 142 Used to hold a capture number as condition */ - OP_DNCREF, /* 143 Used to point to duplicate names as a condition */ - OP_RREF, /* 144 Used to hold a recursion number as condition */ - OP_DNRREF, /* 145 Used to point to duplicate names as a condition */ - OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */ - OP_TRUE, /* 147 Always true (used by VERSION) */ + OP_CREF, /* 141 Used to hold a capture number as condition */ + OP_DNCREF, /* 142 Used to point to duplicate names as a condition */ + OP_RREF, /* 143 Used to hold a recursion number as condition */ + OP_DNRREF, /* 144 Used to point to duplicate names as a condition */ + OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */ + OP_TRUE, /* 146 Always true (used by VERSION) */ - OP_BRAZERO, /* 148 These two must remain together and in this */ - OP_BRAMINZERO, /* 149 order. */ - OP_BRAPOSZERO, /* 150 */ + OP_BRAZERO, /* 147 These two must remain together and in this */ + OP_BRAMINZERO, /* 148 order. */ + OP_BRAPOSZERO, /* 149 */ /* These are backtracking control verbs */ - OP_MARK, /* 151 always has an argument */ - OP_PRUNE, /* 152 */ - OP_PRUNE_ARG, /* 153 same, but with argument */ - OP_SKIP, /* 154 */ - OP_SKIP_ARG, /* 155 same, but with argument */ - OP_THEN, /* 156 */ - OP_THEN_ARG, /* 157 same, but with argument */ - OP_COMMIT, /* 158 */ + OP_MARK, /* 150 always has an argument */ + OP_PRUNE, /* 151 */ + OP_PRUNE_ARG, /* 152 same, but with argument */ + OP_SKIP, /* 153 */ + OP_SKIP_ARG, /* 154 same, but with argument */ + OP_THEN, /* 155 */ + OP_THEN_ARG, /* 156 same, but with argument */ + OP_COMMIT, /* 157 */ /* These are forced failure and success verbs */ - OP_FAIL, /* 159 */ - OP_ACCEPT, /* 160 */ - OP_ASSERT_ACCEPT, /* 161 Used inside assertions */ - OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */ + OP_FAIL, /* 158 */ + OP_ACCEPT, /* 159 */ + OP_ASSERT_ACCEPT, /* 160 Used inside assertions */ + OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */ /* This is used to skip a subpattern with a {0} quantifier */ - OP_SKIPZERO, /* 163 */ + OP_SKIPZERO, /* 162 */ /* This is used to identify a DEFINE group during compilation so that it can be checked for having only one branch. It is changed to OP_FALSE before compilation finishes. */ - OP_DEFINE, /* 164 */ + OP_DEFINE, /* 163 */ /* This is not an opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors - there have been @@ -1618,7 +1617,7 @@ some cases doesn't actually use these names at all). */ "Recurse", "Callout", "CalloutStr", \ "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ "Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \ - "Once", "Once_NC", \ + "Once", \ "Bra", "BraPos", "CBra", "CBraPos", \ "Cond", \ "SBra", "SBraPos", "SCBra", "SCBraPos", \ @@ -1702,7 +1701,6 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+LINK_SIZE, /* Assert behind */ \ 1+LINK_SIZE, /* Assert behind not */ \ 1+LINK_SIZE, /* ONCE */ \ - 1+LINK_SIZE, /* ONCE_NC */ \ 1+LINK_SIZE, /* BRA */ \ 1+LINK_SIZE, /* BRAPOS */ \ 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index adc808d..c505d9b 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -675,7 +675,6 @@ switch(*cc) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - case OP_ONCE_NC: case OP_BRA: case OP_BRAPOS: case OP_CBRA: @@ -1304,7 +1303,7 @@ while (cc < ccend) if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) break; - if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) + if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) { if (detect_repeat(common, cc)) { @@ -1333,7 +1332,6 @@ while (cc < ccend) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - case OP_ONCE_NC: case OP_BRAPOS: case OP_SBRA: case OP_SBRAPOS: @@ -1802,7 +1800,6 @@ while (cc < ccend) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - case OP_ONCE_NC: case OP_BRAPOS: case OP_SBRA: case OP_SBRAPOS: @@ -1982,7 +1979,6 @@ do case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - case OP_ONCE_NC: case OP_BRAPOS: case OP_SBRA: case OP_SBRAPOS: @@ -3583,7 +3579,6 @@ while (TRUE) continue; case OP_ONCE: - case OP_ONCE_NC: case OP_BRA: case OP_BRAPOS: case OP_CBRA: @@ -7826,7 +7821,6 @@ return stacksize; (|) OP_*BRA | OP_ALT ... M A (?()|) OP_*COND | OP_ALT M A (?>|) OP_ONCE | OP_ALT ... [stack trace] M A - (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A Or nothing, if trace is unnecessary */ @@ -7894,8 +7888,6 @@ if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) opcode = OP_SCOND; -if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) - opcode = OP_ONCE; if (opcode == OP_CBRA || opcode == OP_SCBRA) { @@ -9546,7 +9538,6 @@ while (cc < ccend) break; case OP_ONCE: - case OP_ONCE_NC: case OP_BRA: case OP_CBRA: case OP_COND: @@ -9953,8 +9944,6 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA) offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) opcode = OP_SCOND; -if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) - opcode = OP_ONCE; alt_max = has_alternatives ? no_alternatives(ccbegin) : 0; @@ -10627,7 +10616,6 @@ while (current) break; case OP_ONCE: - case OP_ONCE_NC: case OP_BRA: case OP_CBRA: case OP_COND: diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 1f194d0..7667e1d 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -5021,7 +5021,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* Atomic groups and non-capturing brackets that can match an empty string must record a backtracking point and also set up a chained frame. */ - case OP_ONCE_NC: /* Obsolete */ case OP_ONCE: case OP_SBRA: Lframe_type = GF_NOCAPTURE | Fop; @@ -5518,7 +5517,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode); frame so that it points to the final branch. */ case OP_ONCE: - case OP_ONCE_NC: /* Obsolete */ Fback_frame = ((char *)F - (char *)P) + frame_size; for (;;) { diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index 6207497..9a794e9 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -393,7 +393,6 @@ for(;;) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - case OP_ONCE_NC: case OP_COND: case OP_SCOND: case OP_REVERSE: diff --git a/src/pcre2_study.c b/src/pcre2_study.c index 9fa487f..a92fb11 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -171,7 +171,6 @@ for (;;) /* Fall through */ case OP_ONCE: - case OP_ONCE_NC: case OP_SBRA: case OP_BRAPOS: case OP_SBRAPOS: @@ -1068,7 +1067,6 @@ do case OP_CBRAPOS: case OP_SCBRAPOS: case OP_ONCE: - case OP_ONCE_NC: case OP_ASSERT: rc = set_start_bits(re, tcode, utf); if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 7292b1d..951277d 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -11042,7 +11042,7 @@ Subject length lower bound = 0 ------------------------------------------------------------------ Bra ^ - Once_NC + Once a++ Ket Once @@ -12510,7 +12510,7 @@ Subject length lower bound = 5 cc Ket a++ - Once_NC + Once bb Alt cc @@ -12859,7 +12859,7 @@ Subject length lower bound = 5 ------------------------------------------------------------------ Bra [a-f]*+ - Once_NC + Once gg Alt hh @@ -12867,7 +12867,7 @@ Subject length lower bound = 5 # [a-f]*+ Brazero - Once_NC + Once gg Alt hh @@ -12875,7 +12875,7 @@ Subject length lower bound = 5 # [a-f]* Brazero - Once_NC + Once gg Alt hh @@ -12883,7 +12883,7 @@ Subject length lower bound = 5 a# [a-f]*+ Brazero - Once_NC + Once gg Alt hh @@ -13173,7 +13173,7 @@ Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) Bra ^ \w+ - Once_NC + Once \s*+ Ket AssertB