Remove obsolete OP_ONCE_NC internal opcode.

This commit is contained in:
Philip.Hazel 2017-03-11 17:59:23 +00:00
parent 02779c9a67
commit 04da0725f3
10 changed files with 63 additions and 106 deletions

14
HACKING
View File

@ -677,17 +677,9 @@ repetition is zero, the group is preceded by OP_BRAPOSZERO.
Once-only (atomic) groups
-------------------------
These are just like other subpatterns, but they start with the opcode
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
within the atomic group; the latter when there are. The distinction is needed
for when there is a backtrack to before the group - any captures within the
group must be reset, so it is necessary to retain backtracking points inside
the group, even after it is complete, in order to do this. When there are no
captures in an atomic group, all the backtracking can be discarded when it is
complete. This is more efficient, and also uses less stack.
These are just like other subpatterns, but they start with the opcode OP_ONCE.
The check for matching an empty string in an unbounded repeat is handled
entirely at runtime, so there are just these two opcodes for atomic groups.
entirely at runtime, so there are just this one opcode for atomic groups.
Assertions
@ -795,4 +787,4 @@ not a real opcode, but is used to check that tables indexed by opcode are the
correct length, in order to catch updating errors.
Philip Hazel
November 2016
March 2017

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -588,7 +588,6 @@ for(;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
/* Atomic sub-patterns and assertions can always auto-possessify their
last iterator. However, if the group was entered as a result of checking
@ -601,7 +600,6 @@ for(;;)
continue;
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
next_code = code + GET(code, 1);
@ -625,8 +623,8 @@ for(;;)
case OP_BRAMINZERO:
next_code = code + 1;
if (*next_code != OP_BRA && *next_code != OP_CBRA
&& *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
if (*next_code != OP_BRA && *next_code != OP_CBRA &&
*next_code != OP_ONCE) return FALSE;
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);

View File

@ -4746,7 +4746,6 @@ for (;; pptr++)
int class_has_8bitchar;
int i;
uint32_t mclength;
uint32_t templastcapture;
uint32_t skipunits;
uint32_t subreqcu, subfirstcu;
uint32_t groupnumber;
@ -5753,7 +5752,6 @@ for (;; pptr++)
pptr++;
tempcode = code;
tempreqvary = cb->req_varyopt; /* Save value before group */
templastcapture = cb->lastcapture; /* Save value before group */
length_prevgroup = 0; /* Initialize for pre-compile phase */
if ((group_return =
@ -5783,12 +5781,6 @@ for (;; pptr++)
if (note_group_empty && bravalue != OP_COND && group_return > 0)
matched_char = TRUE;
/* If that was an atomic group and there are no capturing groups within it,
generate OP_ONCE_NC instead of OP_ONCE. */
if (bravalue == OP_ONCE && cb->lastcapture <= templastcapture)
*code = OP_ONCE_NC;
/* If we've just compiled an assertion, pop the assert depth. */
if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
@ -6376,7 +6368,6 @@ for (;; pptr++)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@ -6620,14 +6611,12 @@ for (;; pptr++)
/* Convert possessive ONCE brackets to non-capturing */
if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
possessive_quantifier) *bracode = OP_BRA;
if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;
/* For non-possessive ONCE brackets, all we need to do is to
set the KET. */
if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
*ketcode = OP_KETRMAX + repeat_type;
if (*bracode == OP_ONCE) *ketcode = OP_KETRMAX + repeat_type;
/* Handle non-ONCE brackets and possessive ONCEs (which have been
converted to non-capturing above). */
@ -7621,7 +7610,7 @@ do {
/* Atomic groups */
else if (op == OP_ONCE || op == OP_ONCE_NC)
else if (op == OP_ONCE)
{
if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert))
return FALSE;
@ -7751,7 +7740,7 @@ do {
/* Atomic brackets */
else if (op == OP_ONCE || op == OP_ONCE_NC)
else if (op == OP_ONCE)
{
if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert))
return FALSE;
@ -7773,9 +7762,8 @@ do {
}
/* Check for explicit circumflex; anything else gives a FALSE result. Note
in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC
because the number of characters matched by .* cannot be adjusted inside
them. */
in particular that this includes atomic brackets OP_ONCE because the number
of characters matched by .* cannot be adjusted inside them. */
else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
@ -7986,7 +7974,6 @@ do {
case OP_SCBRAPOS:
case OP_ASSERT:
case OP_ONCE:
case OP_ONCE_NC:
d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT);
if (dflags < 0)
return 0;

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -172,7 +172,7 @@ static const uint8_t coptable[] = {
0, /* Assert not */
0, /* Assert behind */
0, /* Assert behind not */
0, 0, /* ONCE, ONCE_NC */
0, /* ONCE */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
@ -245,7 +245,7 @@ static const uint8_t poptable[] = {
0, /* Assert not */
0, /* Assert behind */
0, /* Assert behind not */
0, 0, /* ONCE, ONCE_NC */
0, /* ONCE */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
@ -2889,7 +2889,6 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_ONCE:
case OP_ONCE_NC:
{
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];

View File

@ -1510,68 +1510,67 @@ enum {
OP_ASSERTBACK, /* 128 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
after the assertions, with ONCE first, as there's a test for >= ONCE for a
subpattern that isn't an assertion. The POS versions must immediately follow
the non-POS versions in each case. */
/* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the
assertions, with ONCE first, as there's a test for >= ONCE for a subpattern
that isn't an assertion. The POS versions must immediately follow the non-POS
versions in each case. */
OP_ONCE, /* 130 Atomic group, contains captures */
OP_ONCE_NC, /* 131 Atomic group containing no captures */
OP_BRA, /* 132 Start of non-capturing bracket */
OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
OP_CBRA, /* 134 Start of capturing bracket */
OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
OP_COND, /* 136 Conditional group */
OP_BRA, /* 131 Start of non-capturing bracket */
OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
OP_CBRA, /* 133 Start of capturing bracket */
OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
OP_COND, /* 135 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
OP_SBRA, /* 137 Start of non-capturing bracket, check empty */
OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
OP_SCBRA, /* 139 Start of capturing bracket, check empty */
OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
OP_SCOND, /* 141 Conditional group, check empty */
OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
OP_SCBRA, /* 138 Start of capturing bracket, check empty */
OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
OP_SCOND, /* 140 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
OP_CREF, /* 142 Used to hold a capture number as condition */
OP_DNCREF, /* 143 Used to point to duplicate names as a condition */
OP_RREF, /* 144 Used to hold a recursion number as condition */
OP_DNRREF, /* 145 Used to point to duplicate names as a condition */
OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */
OP_TRUE, /* 147 Always true (used by VERSION) */
OP_CREF, /* 141 Used to hold a capture number as condition */
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
OP_RREF, /* 143 Used to hold a recursion number as condition */
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
OP_TRUE, /* 146 Always true (used by VERSION) */
OP_BRAZERO, /* 148 These two must remain together and in this */
OP_BRAMINZERO, /* 149 order. */
OP_BRAPOSZERO, /* 150 */
OP_BRAZERO, /* 147 These two must remain together and in this */
OP_BRAMINZERO, /* 148 order. */
OP_BRAPOSZERO, /* 149 */
/* These are backtracking control verbs */
OP_MARK, /* 151 always has an argument */
OP_PRUNE, /* 152 */
OP_PRUNE_ARG, /* 153 same, but with argument */
OP_SKIP, /* 154 */
OP_SKIP_ARG, /* 155 same, but with argument */
OP_THEN, /* 156 */
OP_THEN_ARG, /* 157 same, but with argument */
OP_COMMIT, /* 158 */
OP_MARK, /* 150 always has an argument */
OP_PRUNE, /* 151 */
OP_PRUNE_ARG, /* 152 same, but with argument */
OP_SKIP, /* 153 */
OP_SKIP_ARG, /* 154 same, but with argument */
OP_THEN, /* 155 */
OP_THEN_ARG, /* 156 same, but with argument */
OP_COMMIT, /* 157 */
/* These are forced failure and success verbs */
OP_FAIL, /* 159 */
OP_ACCEPT, /* 160 */
OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
OP_FAIL, /* 158 */
OP_ACCEPT, /* 159 */
OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 163 */
OP_SKIPZERO, /* 162 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
OP_DEFINE, /* 164 */
OP_DEFINE, /* 163 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@ -1618,7 +1617,7 @@ some cases doesn't actually use these names at all). */
"Recurse", "Callout", "CalloutStr", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
"Once", "Once_NC", \
"Once", \
"Bra", "BraPos", "CBra", "CBraPos", \
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
@ -1702,7 +1701,6 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* Assert behind */ \
1+LINK_SIZE, /* Assert behind not */ \
1+LINK_SIZE, /* ONCE */ \
1+LINK_SIZE, /* ONCE_NC */ \
1+LINK_SIZE, /* BRA */ \
1+LINK_SIZE, /* BRAPOS */ \
1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -675,7 +675,6 @@ switch(*cc)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRA:
case OP_BRAPOS:
case OP_CBRA:
@ -1304,7 +1303,7 @@ while (cc < ccend)
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
break;
if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
{
if (detect_repeat(common, cc))
{
@ -1333,7 +1332,6 @@ while (cc < ccend)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@ -1802,7 +1800,6 @@ while (cc < ccend)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@ -1982,7 +1979,6 @@ do
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@ -3583,7 +3579,6 @@ while (TRUE)
continue;
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRA:
case OP_BRAPOS:
case OP_CBRA:
@ -7826,7 +7821,6 @@ return stacksize;
(|) OP_*BRA | OP_ALT ... M A
(?()|) OP_*COND | OP_ALT M A
(?>|) OP_ONCE | OP_ALT ... [stack trace] M A
(?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
Or nothing, if trace is unnecessary
*/
@ -7894,8 +7888,6 @@ if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
opcode = OP_SCOND;
if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
opcode = OP_ONCE;
if (opcode == OP_CBRA || opcode == OP_SCBRA)
{
@ -9546,7 +9538,6 @@ while (cc < ccend)
break;
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@ -9953,8 +9944,6 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA)
offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
opcode = OP_SCOND;
if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
opcode = OP_ONCE;
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
@ -10627,7 +10616,6 @@ while (current)
break;
case OP_ONCE:
case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
case OP_COND:

View File

@ -5021,7 +5021,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* Atomic groups and non-capturing brackets that can match an empty string
must record a backtracking point and also set up a chained frame. */
case OP_ONCE_NC: /* Obsolete */
case OP_ONCE:
case OP_SBRA:
Lframe_type = GF_NOCAPTURE | Fop;
@ -5518,7 +5517,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
frame so that it points to the final branch. */
case OP_ONCE:
case OP_ONCE_NC: /* Obsolete */
Fback_frame = ((char *)F - (char *)P) + frame_size;
for (;;)
{

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -393,7 +393,6 @@ for(;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
case OP_COND:
case OP_SCOND:
case OP_REVERSE:

View File

@ -171,7 +171,6 @@ for (;;)
/* Fall through */
case OP_ONCE:
case OP_ONCE_NC:
case OP_SBRA:
case OP_BRAPOS:
case OP_SBRAPOS:
@ -1068,7 +1067,6 @@ do
case OP_CBRAPOS:
case OP_SCBRAPOS:
case OP_ONCE:
case OP_ONCE_NC:
case OP_ASSERT:
rc = set_start_bits(re, tcode, utf);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;

14
testdata/testoutput2 vendored
View File

@ -11042,7 +11042,7 @@ Subject length lower bound = 0
------------------------------------------------------------------
Bra
^
Once_NC
Once
a++
Ket
Once
@ -12510,7 +12510,7 @@ Subject length lower bound = 5
cc
Ket
a++
Once_NC
Once
bb
Alt
cc
@ -12859,7 +12859,7 @@ Subject length lower bound = 5
------------------------------------------------------------------
Bra
[a-f]*+
Once_NC
Once
gg
Alt
hh
@ -12867,7 +12867,7 @@ Subject length lower bound = 5
#
[a-f]*+
Brazero
Once_NC
Once
gg
Alt
hh
@ -12875,7 +12875,7 @@ Subject length lower bound = 5
#
[a-f]*
Brazero
Once_NC
Once
gg
Alt
hh
@ -12883,7 +12883,7 @@ Subject length lower bound = 5
a#
[a-f]*+
Brazero
Once_NC
Once
gg
Alt
hh
@ -13173,7 +13173,7 @@ Failed: error 133 at offset 7: parentheses are too deeply nested (stack check)
Bra
^
\w+
Once_NC
Once
\s*+
Ket
AssertB