Add string offset within the pattern to the data passed to a callout with a

string argument.
This commit is contained in:
Philip.Hazel 2015-03-14 12:20:18 +00:00
parent b2c67082bd
commit 15e034c9c2
9 changed files with 75 additions and 59 deletions

View File

@ -338,6 +338,7 @@ typedef struct pcre2_callout_block { \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 1 -------------------------- */ \
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
uint32_t callout_string_length; /* Length of string compiled into pattern */ \
/* ------------------------------------------------------------------ */ \

View File

@ -5652,17 +5652,19 @@ for (;; ptr++)
for the terminating zero. Any doubled delimiters within the string
make this an overestimate, but it is not worth bothering about. */
(*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE);
(*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE);
}
/* In the real compile we can copy the string, knowing that it is
syntactically OK. The starting delimiter is included so that the
client can discover it if they want. */
client can discover it if they want. We also pass the start offset to
help a script language give better error messages. */
else
{
PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE);
PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE);
*callout_string++ = *ptr++;
PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */
for(;;)
{
if (*ptr == delimiter)

View File

@ -2631,15 +2631,17 @@ for (;;)
if (code[LINK_SIZE + 1] == OP_CALLOUT)
{
cb.callout_number = code[2 + 3*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
cb.callout_string = code + (2 + 4*LINK_SIZE) + 1;
cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE);
cb.callout_string = code + (2 + 5*LINK_SIZE) + 1;
cb.callout_string_length =
callout_length - (1 + 3*LINK_SIZE) - 2;
callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
@ -2997,15 +2999,17 @@ for (;;)
if (*code == OP_CALLOUT)
{
cb.callout_number = code[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
cb.callout_string = code + (1 + 3*LINK_SIZE) + 1;
cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE);
cb.callout_string = code + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length =
callout_length - (1 + 3*LINK_SIZE) - 2;
callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)

View File

@ -6346,6 +6346,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT)
? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
sljit_sw value1;
sljit_sw value2;
sljit_sw value3;
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@ -6373,15 +6374,18 @@ if (*cc == OP_CALLOUT)
{
value1 = 0;
value2 = 0;
value3 = 0;
}
else
{
value1 = (sljit_sw) (cc + (1 + 3*LINK_SIZE) + 1);
value2 = (callout_length - (1 + 3*LINK_SIZE + 2));
value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
}
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
/* Needed to save important temporary registers. */

View File

@ -1333,15 +1333,17 @@ for (;;)
if (*ecode == OP_CALLOUT)
{
cb.callout_number = ecode[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length =
callout_length - (1 + 3*LINK_SIZE) - 2;
callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
@ -1757,15 +1759,17 @@ for (;;)
if (*ecode == OP_CALLOUT)
{
cb.callout_number = ecode[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length =
callout_length - (1 + 3*LINK_SIZE) - 2;
callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)

View File

@ -600,18 +600,18 @@ for(;;)
break;
case OP_CALLOUT_STR:
c = code[1 + 3*LINK_SIZE];
c = code[1 + 4*LINK_SIZE];
fprintf(f, " %s %c", OP_names[*code], c);
extra = GET(code, 1 + 2*LINK_SIZE);
print_custring(f, code + 2 + 3*LINK_SIZE);
print_custring(f, code + 2 + 4*LINK_SIZE);
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
if (c == PRIV(callout_start_delims)[i])
{
c = PRIV(callout_end_delims)[i];
break;
}
fprintf(f, "%c %d %d", c, GET(code, 1), GET(code, 1 + LINK_SIZE));
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
GET(code, 1 + LINK_SIZE));
break;
case OP_PROP:

View File

@ -4546,7 +4546,8 @@ isn't a tidy way to fit it in the rest of the data. */
if (cb->callout_string != NULL)
{
uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
fprintf(outfile, "Callout: %c", delimiter);
fprintf(outfile, "Callout (%lu): %c",
(unsigned long int)cb->callout_string_offset, delimiter);
PCHARSV(cb->callout_string, 0,
cb->callout_string_length, utf, outfile);
for (i = 0; callout_start_delims[i] != 0; i++)

52
testdata/testoutput2 vendored
View File

@ -13987,7 +13987,7 @@ Failed: error 139 at offset 7: closing parenthesis for (?C expected
------------------------------------------------------------------
Bra
a
CalloutStr "a)b"c" 13 0
CalloutStr "a)b"c" 5 13 0
Ket
End
------------------------------------------------------------------
@ -13996,18 +13996,18 @@ Failed: error 139 at offset 7: closing parenthesis for (?C expected
------------------------------------------------------------------
Bra
ab
CalloutStr " any text with spaces " 30 1
CalloutStr " any text with spaces " 6 30 1
cde
Ket
End
------------------------------------------------------------------
abcde
Callout: " any text with spaces "
Callout (6): " any text with spaces "
--->abcde
^ ^ c
0: abcde
12abcde
Callout: " any text with spaces "
Callout (6): " any text with spaces "
--->12abcde
^ ^ c
0: abcde
@ -14021,7 +14021,7 @@ Callout: " any text with spaces "
/^a(b)c(?C"AB")def/
abcdef
Callout: "AB"
Callout (10): "AB"
--->abcdef
^ ^ d
0: abcdef
@ -14046,13 +14046,13 @@ Callout 1: last capture = 1
b
Ket
c
CalloutStr {AB} 14 1
CalloutStr {AB} 10 14 1
def
Ket
End
------------------------------------------------------------------
abcdef\=callout_capture
Callout: {AB} last capture = 1
Callout (10): {AB} last capture = 1
0: <unset>
1: b
--->abcdef
@ -14063,14 +14063,14 @@ Callout: {AB} last capture = 1
/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B
------------------------------------------------------------------
Bra
CalloutStr `a`b` 10 0
CalloutStr 'a'b' 20 0
CalloutStr "a"b" 30 0
CalloutStr ^a^b^ 40 0
CalloutStr %a%b% 50 0
CalloutStr #a#b# 60 0
CalloutStr $a$b$ 70 0
CalloutStr {a}b} 80 0
CalloutStr `a`b` 4 10 0
CalloutStr 'a'b' 14 20 0
CalloutStr "a"b" 24 30 0
CalloutStr ^a^b^ 34 40 0
CalloutStr %a%b% 44 50 0
CalloutStr #a#b# 54 60 0
CalloutStr $a$b$ 64 70 0
CalloutStr {a}b} 74 80 0
Ket
End
------------------------------------------------------------------
@ -14080,15 +14080,15 @@ Callout: {AB} last capture = 1
Bra
Bra
a
CalloutStr `code` 14 0
CalloutStr `code` 8 14 0
Ket
Bra
a
CalloutStr `code` 14 0
CalloutStr `code` 8 14 0
Ket
Bra
a
CalloutStr `code` 14 0
CalloutStr `code` 8 14 0
Ket
Ket
End
@ -14124,7 +14124,7 @@ Callout: {AB} last capture = 1
Bra
^
Cond
CalloutStr $abc$ 12 7
CalloutStr $abc$ 7 12 7
Assert
abc
Ket
@ -14136,35 +14136,35 @@ Callout: {AB} last capture = 1
End
------------------------------------------------------------------
abcdefg
Callout: $abc$
Callout (7): $abc$
--->abcdefg
^ (?=abc)
0: abcd
xyz123
Callout: $abc$
Callout (7): $abc$
--->xyz123
^ (?=abc)
0: xyz
/^ab(?C'first')cd(?C"second")ef/
abcdefg
Callout: 'first'
Callout (7): 'first'
--->abcdefg
^ ^ c
Callout: "second"
Callout (20): "second"
--->abcdefg
^ ^ e
0: abcdef
/(?:a(?C`code`)){3}X/
aaaXY
Callout: `code`
Callout (8): `code`
--->aaaXY
^^ )
Callout: `code`
Callout (8): `code`
--->aaaXY
^ ^ )
Callout: `code`
Callout (8): `code`
--->aaaXY
^ ^ )
0: aaaX

28
testdata/testoutput6 vendored
View File

@ -7777,18 +7777,18 @@ get substring list failed (-2): partial match
------------------------------------------------------------------
Bra
ab
CalloutStr " any text with spaces " 30 1
CalloutStr " any text with spaces " 6 30 1
cde
Ket
End
------------------------------------------------------------------
abcde
Callout: " any text with spaces "
Callout (6): " any text with spaces "
--->abcde
^ ^ c
0: abcde
12abcde
Callout: " any text with spaces "
Callout (6): " any text with spaces "
--->12abcde
^ ^ c
0: abcde
@ -7801,7 +7801,7 @@ Callout: " any text with spaces "
/^a(b)c(?C"AB")def/
abcdef
Callout: "AB"
Callout (10): "AB"
--->abcdef
^ ^ d
0: abcdef
@ -7823,13 +7823,13 @@ Callout 1: last capture = 0
b
Ket
c
CalloutStr {AB} 14 1
CalloutStr {AB} 10 14 1
def
Ket
End
------------------------------------------------------------------
abcdef\=callout_capture
Callout: {AB} last capture = 0
Callout (10): {AB} last capture = 0
0:
--->abcdef
^ ^ d
@ -7865,7 +7865,7 @@ Callout: {AB} last capture = 0
Bra
^
Cond
CalloutStr $abc$ 12 7
CalloutStr $abc$ 7 12 7
Assert
abc
Ket
@ -7877,35 +7877,35 @@ Callout: {AB} last capture = 0
End
------------------------------------------------------------------
abcdefg
Callout: $abc$
Callout (7): $abc$
--->abcdefg
^ (?=abc)
0: abcd
xyz123
Callout: $abc$
Callout (7): $abc$
--->xyz123
^ (?=abc)
0: xyz
/^ab(?C'first')cd(?C"second")ef/
abcdefg
Callout: 'first'
Callout (7): 'first'
--->abcdefg
^ ^ c
Callout: "second"
Callout (20): "second"
--->abcdefg
^ ^ e
0: abcdef
/(?:a(?C`code`)){3}X/
aaaXY
Callout: `code`
Callout (8): `code`
--->aaaXY
^^ )
Callout: `code`
Callout (8): `code`
--->aaaXY
^ ^ )
Callout: `code`
Callout (8): `code`
--->aaaXY
^ ^ )
0: aaaX