Add string offset within the pattern to the data passed to a callout with a
string argument.
This commit is contained in:
parent
b2c67082bd
commit
15e034c9c2
|
@ -338,6 +338,7 @@ typedef struct pcre2_callout_block { \
|
|||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
uint32_t callout_string_length; /* Length of string compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
|
|
|
@ -5652,17 +5652,19 @@ for (;; ptr++)
|
|||
for the terminating zero. Any doubled delimiters within the string
|
||||
make this an overestimate, but it is not worth bothering about. */
|
||||
|
||||
(*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE);
|
||||
(*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE);
|
||||
}
|
||||
|
||||
/* In the real compile we can copy the string, knowing that it is
|
||||
syntactically OK. The starting delimiter is included so that the
|
||||
client can discover it if they want. */
|
||||
client can discover it if they want. We also pass the start offset to
|
||||
help a script language give better error messages. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE);
|
||||
PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE);
|
||||
*callout_string++ = *ptr++;
|
||||
PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */
|
||||
for(;;)
|
||||
{
|
||||
if (*ptr == delimiter)
|
||||
|
@ -7302,7 +7304,7 @@ do {
|
|||
scode += 1 + LINK_SIZE;
|
||||
|
||||
if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
|
||||
else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
|
||||
|
||||
switch (*scode)
|
||||
{
|
||||
|
|
|
@ -2631,15 +2631,17 @@ for (;;)
|
|||
if (code[LINK_SIZE + 1] == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = code[2 + 3*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string = code + (2 + 4*LINK_SIZE) + 1;
|
||||
cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE);
|
||||
cb.callout_string = code + (2 + 5*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 3*LINK_SIZE) - 2;
|
||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
||||
|
@ -2997,15 +2999,17 @@ for (;;)
|
|||
if (*code == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = code[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string = code + (1 + 3*LINK_SIZE) + 1;
|
||||
cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string = code + (1 + 4*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 3*LINK_SIZE) - 2;
|
||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
||||
|
|
|
@ -6346,6 +6346,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT)
|
|||
? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
|
||||
sljit_sw value1;
|
||||
sljit_sw value2;
|
||||
sljit_sw value3;
|
||||
|
||||
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
|
||||
|
||||
|
@ -6373,15 +6374,18 @@ if (*cc == OP_CALLOUT)
|
|||
{
|
||||
value1 = 0;
|
||||
value2 = 0;
|
||||
value3 = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
value1 = (sljit_sw) (cc + (1 + 3*LINK_SIZE) + 1);
|
||||
value2 = (callout_length - (1 + 3*LINK_SIZE + 2));
|
||||
value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
|
||||
value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
|
||||
value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
|
||||
}
|
||||
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
|
||||
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
|
||||
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
|
||||
|
||||
/* Needed to save important temporary registers. */
|
||||
|
|
|
@ -1333,15 +1333,17 @@ for (;;)
|
|||
if (*ecode == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
|
||||
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 3*LINK_SIZE) - 2;
|
||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
||||
|
@ -1757,15 +1759,17 @@ for (;;)
|
|||
if (*ecode == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
|
||||
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 3*LINK_SIZE) - 2;
|
||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
||||
|
|
|
@ -600,18 +600,18 @@ for(;;)
|
|||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
c = code[1 + 3*LINK_SIZE];
|
||||
c = code[1 + 4*LINK_SIZE];
|
||||
fprintf(f, " %s %c", OP_names[*code], c);
|
||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||
print_custring(f, code + 2 + 3*LINK_SIZE);
|
||||
|
||||
print_custring(f, code + 2 + 4*LINK_SIZE);
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
if (c == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
c = PRIV(callout_end_delims)[i];
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%c %d %d", c, GET(code, 1), GET(code, 1 + LINK_SIZE));
|
||||
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
|
||||
GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
|
|
|
@ -4546,7 +4546,8 @@ isn't a tidy way to fit it in the rest of the data. */
|
|||
if (cb->callout_string != NULL)
|
||||
{
|
||||
uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
|
||||
fprintf(outfile, "Callout: %c", delimiter);
|
||||
fprintf(outfile, "Callout (%lu): %c",
|
||||
(unsigned long int)cb->callout_string_offset, delimiter);
|
||||
PCHARSV(cb->callout_string, 0,
|
||||
cb->callout_string_length, utf, outfile);
|
||||
for (i = 0; callout_start_delims[i] != 0; i++)
|
||||
|
|
|
@ -13987,7 +13987,7 @@ Failed: error 139 at offset 7: closing parenthesis for (?C expected
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
a
|
||||
CalloutStr "a)b"c" 13 0
|
||||
CalloutStr "a)b"c" 5 13 0
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
@ -13996,18 +13996,18 @@ Failed: error 139 at offset 7: closing parenthesis for (?C expected
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
ab
|
||||
CalloutStr " any text with spaces " 30 1
|
||||
CalloutStr " any text with spaces " 6 30 1
|
||||
cde
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abcde
|
||||
Callout: " any text with spaces "
|
||||
Callout (6): " any text with spaces "
|
||||
--->abcde
|
||||
^ ^ c
|
||||
0: abcde
|
||||
12abcde
|
||||
Callout: " any text with spaces "
|
||||
Callout (6): " any text with spaces "
|
||||
--->12abcde
|
||||
^ ^ c
|
||||
0: abcde
|
||||
|
@ -14021,7 +14021,7 @@ Callout: " any text with spaces "
|
|||
|
||||
/^a(b)c(?C"AB")def/
|
||||
abcdef
|
||||
Callout: "AB"
|
||||
Callout (10): "AB"
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
|
@ -14046,13 +14046,13 @@ Callout 1: last capture = 1
|
|||
b
|
||||
Ket
|
||||
c
|
||||
CalloutStr {AB} 14 1
|
||||
CalloutStr {AB} 10 14 1
|
||||
def
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abcdef\=callout_capture
|
||||
Callout: {AB} last capture = 1
|
||||
Callout (10): {AB} last capture = 1
|
||||
0: <unset>
|
||||
1: b
|
||||
--->abcdef
|
||||
|
@ -14063,14 +14063,14 @@ Callout: {AB} last capture = 1
|
|||
/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
CalloutStr `a`b` 10 0
|
||||
CalloutStr 'a'b' 20 0
|
||||
CalloutStr "a"b" 30 0
|
||||
CalloutStr ^a^b^ 40 0
|
||||
CalloutStr %a%b% 50 0
|
||||
CalloutStr #a#b# 60 0
|
||||
CalloutStr $a$b$ 70 0
|
||||
CalloutStr {a}b} 80 0
|
||||
CalloutStr `a`b` 4 10 0
|
||||
CalloutStr 'a'b' 14 20 0
|
||||
CalloutStr "a"b" 24 30 0
|
||||
CalloutStr ^a^b^ 34 40 0
|
||||
CalloutStr %a%b% 44 50 0
|
||||
CalloutStr #a#b# 54 60 0
|
||||
CalloutStr $a$b$ 64 70 0
|
||||
CalloutStr {a}b} 74 80 0
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
@ -14080,15 +14080,15 @@ Callout: {AB} last capture = 1
|
|||
Bra
|
||||
Bra
|
||||
a
|
||||
CalloutStr `code` 14 0
|
||||
CalloutStr `code` 8 14 0
|
||||
Ket
|
||||
Bra
|
||||
a
|
||||
CalloutStr `code` 14 0
|
||||
CalloutStr `code` 8 14 0
|
||||
Ket
|
||||
Bra
|
||||
a
|
||||
CalloutStr `code` 14 0
|
||||
CalloutStr `code` 8 14 0
|
||||
Ket
|
||||
Ket
|
||||
End
|
||||
|
@ -14124,7 +14124,7 @@ Callout: {AB} last capture = 1
|
|||
Bra
|
||||
^
|
||||
Cond
|
||||
CalloutStr $abc$ 12 7
|
||||
CalloutStr $abc$ 7 12 7
|
||||
Assert
|
||||
abc
|
||||
Ket
|
||||
|
@ -14136,35 +14136,35 @@ Callout: {AB} last capture = 1
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
abcdefg
|
||||
Callout: $abc$
|
||||
Callout (7): $abc$
|
||||
--->abcdefg
|
||||
^ (?=abc)
|
||||
0: abcd
|
||||
xyz123
|
||||
Callout: $abc$
|
||||
Callout (7): $abc$
|
||||
--->xyz123
|
||||
^ (?=abc)
|
||||
0: xyz
|
||||
|
||||
/^ab(?C'first')cd(?C"second")ef/
|
||||
abcdefg
|
||||
Callout: 'first'
|
||||
Callout (7): 'first'
|
||||
--->abcdefg
|
||||
^ ^ c
|
||||
Callout: "second"
|
||||
Callout (20): "second"
|
||||
--->abcdefg
|
||||
^ ^ e
|
||||
0: abcdef
|
||||
|
||||
/(?:a(?C`code`)){3}X/
|
||||
aaaXY
|
||||
Callout: `code`
|
||||
Callout (8): `code`
|
||||
--->aaaXY
|
||||
^^ )
|
||||
Callout: `code`
|
||||
Callout (8): `code`
|
||||
--->aaaXY
|
||||
^ ^ )
|
||||
Callout: `code`
|
||||
Callout (8): `code`
|
||||
--->aaaXY
|
||||
^ ^ )
|
||||
0: aaaX
|
||||
|
|
|
@ -7777,18 +7777,18 @@ get substring list failed (-2): partial match
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
ab
|
||||
CalloutStr " any text with spaces " 30 1
|
||||
CalloutStr " any text with spaces " 6 30 1
|
||||
cde
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abcde
|
||||
Callout: " any text with spaces "
|
||||
Callout (6): " any text with spaces "
|
||||
--->abcde
|
||||
^ ^ c
|
||||
0: abcde
|
||||
12abcde
|
||||
Callout: " any text with spaces "
|
||||
Callout (6): " any text with spaces "
|
||||
--->12abcde
|
||||
^ ^ c
|
||||
0: abcde
|
||||
|
@ -7801,7 +7801,7 @@ Callout: " any text with spaces "
|
|||
|
||||
/^a(b)c(?C"AB")def/
|
||||
abcdef
|
||||
Callout: "AB"
|
||||
Callout (10): "AB"
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
|
@ -7823,13 +7823,13 @@ Callout 1: last capture = 0
|
|||
b
|
||||
Ket
|
||||
c
|
||||
CalloutStr {AB} 14 1
|
||||
CalloutStr {AB} 10 14 1
|
||||
def
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abcdef\=callout_capture
|
||||
Callout: {AB} last capture = 0
|
||||
Callout (10): {AB} last capture = 0
|
||||
0:
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
|
@ -7865,7 +7865,7 @@ Callout: {AB} last capture = 0
|
|||
Bra
|
||||
^
|
||||
Cond
|
||||
CalloutStr $abc$ 12 7
|
||||
CalloutStr $abc$ 7 12 7
|
||||
Assert
|
||||
abc
|
||||
Ket
|
||||
|
@ -7877,35 +7877,35 @@ Callout: {AB} last capture = 0
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
abcdefg
|
||||
Callout: $abc$
|
||||
Callout (7): $abc$
|
||||
--->abcdefg
|
||||
^ (?=abc)
|
||||
0: abcd
|
||||
xyz123
|
||||
Callout: $abc$
|
||||
Callout (7): $abc$
|
||||
--->xyz123
|
||||
^ (?=abc)
|
||||
0: xyz
|
||||
|
||||
/^ab(?C'first')cd(?C"second")ef/
|
||||
abcdefg
|
||||
Callout: 'first'
|
||||
Callout (7): 'first'
|
||||
--->abcdefg
|
||||
^ ^ c
|
||||
Callout: "second"
|
||||
Callout (20): "second"
|
||||
--->abcdefg
|
||||
^ ^ e
|
||||
0: abcdef
|
||||
|
||||
/(?:a(?C`code`)){3}X/
|
||||
aaaXY
|
||||
Callout: `code`
|
||||
Callout (8): `code`
|
||||
--->aaaXY
|
||||
^^ )
|
||||
Callout: `code`
|
||||
Callout (8): `code`
|
||||
--->aaaXY
|
||||
^ ^ )
|
||||
Callout: `code`
|
||||
Callout (8): `code`
|
||||
--->aaaXY
|
||||
^ ^ )
|
||||
0: aaaX
|
||||
|
|
Loading…
Reference in New Issue