Add string offset within the pattern to the data passed to a callout with a

string argument.
This commit is contained in:
Philip.Hazel 2015-03-14 12:20:18 +00:00
parent b2c67082bd
commit 15e034c9c2
9 changed files with 75 additions and 59 deletions

View File

@ -338,6 +338,7 @@ typedef struct pcre2_callout_block { \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 1 -------------------------- */ \ /* ------------------- Added for Version 1 -------------------------- */ \
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \ PCRE2_SPTR callout_string; /* String compiled into pattern */ \
uint32_t callout_string_length; /* Length of string compiled into pattern */ \ uint32_t callout_string_length; /* Length of string compiled into pattern */ \
/* ------------------------------------------------------------------ */ \ /* ------------------------------------------------------------------ */ \

View File

@ -5652,17 +5652,19 @@ for (;; ptr++)
for the terminating zero. Any doubled delimiters within the string for the terminating zero. Any doubled delimiters within the string
make this an overestimate, but it is not worth bothering about. */ make this an overestimate, but it is not worth bothering about. */
(*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE); (*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE);
} }
/* In the real compile we can copy the string, knowing that it is /* In the real compile we can copy the string, knowing that it is
syntactically OK. The starting delimiter is included so that the syntactically OK. The starting delimiter is included so that the
client can discover it if they want. */ client can discover it if they want. We also pass the start offset to
help a script language give better error messages. */
else else
{ {
PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE); PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE);
*callout_string++ = *ptr++; *callout_string++ = *ptr++;
PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */
for(;;) for(;;)
{ {
if (*ptr == delimiter) if (*ptr == delimiter)
@ -7302,7 +7304,7 @@ do {
scode += 1 + LINK_SIZE; scode += 1 + LINK_SIZE;
if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE); else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
switch (*scode) switch (*scode)
{ {

View File

@ -2631,15 +2631,17 @@ for (;;)
if (code[LINK_SIZE + 1] == OP_CALLOUT) if (code[LINK_SIZE + 1] == OP_CALLOUT)
{ {
cb.callout_number = code[2 + 3*LINK_SIZE]; cb.callout_number = code[2 + 3*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL; cb.callout_string = NULL;
cb.callout_string_length = 0; cb.callout_string_length = 0;
} }
else else
{ {
cb.callout_number = 0; cb.callout_number = 0;
cb.callout_string = code + (2 + 4*LINK_SIZE) + 1; cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE);
cb.callout_string = code + (2 + 5*LINK_SIZE) + 1;
cb.callout_string_length = cb.callout_string_length =
callout_length - (1 + 3*LINK_SIZE) - 2; callout_length - (1 + 4*LINK_SIZE) - 2;
} }
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0) if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
@ -2997,15 +2999,17 @@ for (;;)
if (*code == OP_CALLOUT) if (*code == OP_CALLOUT)
{ {
cb.callout_number = code[1 + 2*LINK_SIZE]; cb.callout_number = code[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL; cb.callout_string = NULL;
cb.callout_string_length = 0; cb.callout_string_length = 0;
} }
else else
{ {
cb.callout_number = 0; cb.callout_number = 0;
cb.callout_string = code + (1 + 3*LINK_SIZE) + 1; cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE);
cb.callout_string = code + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length = cb.callout_string_length =
callout_length - (1 + 3*LINK_SIZE) - 2; callout_length - (1 + 4*LINK_SIZE) - 2;
} }
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0) if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)

View File

@ -6346,6 +6346,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT)
? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE); ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
sljit_sw value1; sljit_sw value1;
sljit_sw value2; sljit_sw value2;
sljit_sw value3;
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@ -6373,15 +6374,18 @@ if (*cc == OP_CALLOUT)
{ {
value1 = 0; value1 = 0;
value2 = 0; value2 = 0;
value3 = 0;
} }
else else
{ {
value1 = (sljit_sw) (cc + (1 + 3*LINK_SIZE) + 1); value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
value2 = (callout_length - (1 + 3*LINK_SIZE + 2)); value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
} }
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2); OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
/* Needed to save important temporary registers. */ /* Needed to save important temporary registers. */

View File

@ -1333,15 +1333,17 @@ for (;;)
if (*ecode == OP_CALLOUT) if (*ecode == OP_CALLOUT)
{ {
cb.callout_number = ecode[1 + 2*LINK_SIZE]; cb.callout_number = ecode[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL; cb.callout_string = NULL;
cb.callout_string_length = 0; cb.callout_string_length = 0;
} }
else else
{ {
cb.callout_number = 0; cb.callout_number = 0;
cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1; cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length = cb.callout_string_length =
callout_length - (1 + 3*LINK_SIZE) - 2; callout_length - (1 + 4*LINK_SIZE) - 2;
} }
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0) if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
@ -1757,15 +1759,17 @@ for (;;)
if (*ecode == OP_CALLOUT) if (*ecode == OP_CALLOUT)
{ {
cb.callout_number = ecode[1 + 2*LINK_SIZE]; cb.callout_number = ecode[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL; cb.callout_string = NULL;
cb.callout_string_length = 0; cb.callout_string_length = 0;
} }
else else
{ {
cb.callout_number = 0; cb.callout_number = 0;
cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1; cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length = cb.callout_string_length =
callout_length - (1 + 3*LINK_SIZE) - 2; callout_length - (1 + 4*LINK_SIZE) - 2;
} }
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0) if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)

View File

@ -600,18 +600,18 @@ for(;;)
break; break;
case OP_CALLOUT_STR: case OP_CALLOUT_STR:
c = code[1 + 3*LINK_SIZE]; c = code[1 + 4*LINK_SIZE];
fprintf(f, " %s %c", OP_names[*code], c); fprintf(f, " %s %c", OP_names[*code], c);
extra = GET(code, 1 + 2*LINK_SIZE); extra = GET(code, 1 + 2*LINK_SIZE);
print_custring(f, code + 2 + 3*LINK_SIZE); print_custring(f, code + 2 + 4*LINK_SIZE);
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
if (c == PRIV(callout_start_delims)[i]) if (c == PRIV(callout_start_delims)[i])
{ {
c = PRIV(callout_end_delims)[i]; c = PRIV(callout_end_delims)[i];
break; break;
} }
fprintf(f, "%c %d %d", c, GET(code, 1), GET(code, 1 + LINK_SIZE)); fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
GET(code, 1 + LINK_SIZE));
break; break;
case OP_PROP: case OP_PROP:

View File

@ -4546,7 +4546,8 @@ isn't a tidy way to fit it in the rest of the data. */
if (cb->callout_string != NULL) if (cb->callout_string != NULL)
{ {
uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
fprintf(outfile, "Callout: %c", delimiter); fprintf(outfile, "Callout (%lu): %c",
(unsigned long int)cb->callout_string_offset, delimiter);
PCHARSV(cb->callout_string, 0, PCHARSV(cb->callout_string, 0,
cb->callout_string_length, utf, outfile); cb->callout_string_length, utf, outfile);
for (i = 0; callout_start_delims[i] != 0; i++) for (i = 0; callout_start_delims[i] != 0; i++)

52
testdata/testoutput2 vendored
View File

@ -13987,7 +13987,7 @@ Failed: error 139 at offset 7: closing parenthesis for (?C expected
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
a a
CalloutStr "a)b"c" 13 0 CalloutStr "a)b"c" 5 13 0
Ket Ket
End End
------------------------------------------------------------------ ------------------------------------------------------------------
@ -13996,18 +13996,18 @@ Failed: error 139 at offset 7: closing parenthesis for (?C expected
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
ab ab
CalloutStr " any text with spaces " 30 1 CalloutStr " any text with spaces " 6 30 1
cde cde
Ket Ket
End End
------------------------------------------------------------------ ------------------------------------------------------------------
abcde abcde
Callout: " any text with spaces " Callout (6): " any text with spaces "
--->abcde --->abcde
^ ^ c ^ ^ c
0: abcde 0: abcde
12abcde 12abcde
Callout: " any text with spaces " Callout (6): " any text with spaces "
--->12abcde --->12abcde
^ ^ c ^ ^ c
0: abcde 0: abcde
@ -14021,7 +14021,7 @@ Callout: " any text with spaces "
/^a(b)c(?C"AB")def/ /^a(b)c(?C"AB")def/
abcdef abcdef
Callout: "AB" Callout (10): "AB"
--->abcdef --->abcdef
^ ^ d ^ ^ d
0: abcdef 0: abcdef
@ -14046,13 +14046,13 @@ Callout 1: last capture = 1
b b
Ket Ket
c c
CalloutStr {AB} 14 1 CalloutStr {AB} 10 14 1
def def
Ket Ket
End End
------------------------------------------------------------------ ------------------------------------------------------------------
abcdef\=callout_capture abcdef\=callout_capture
Callout: {AB} last capture = 1 Callout (10): {AB} last capture = 1
0: <unset> 0: <unset>
1: b 1: b
--->abcdef --->abcdef
@ -14063,14 +14063,14 @@ Callout: {AB} last capture = 1
/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B /(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
CalloutStr `a`b` 10 0 CalloutStr `a`b` 4 10 0
CalloutStr 'a'b' 20 0 CalloutStr 'a'b' 14 20 0
CalloutStr "a"b" 30 0 CalloutStr "a"b" 24 30 0
CalloutStr ^a^b^ 40 0 CalloutStr ^a^b^ 34 40 0
CalloutStr %a%b% 50 0 CalloutStr %a%b% 44 50 0
CalloutStr #a#b# 60 0 CalloutStr #a#b# 54 60 0
CalloutStr $a$b$ 70 0 CalloutStr $a$b$ 64 70 0
CalloutStr {a}b} 80 0 CalloutStr {a}b} 74 80 0
Ket Ket
End End
------------------------------------------------------------------ ------------------------------------------------------------------
@ -14080,15 +14080,15 @@ Callout: {AB} last capture = 1
Bra Bra
Bra Bra
a a
CalloutStr `code` 14 0 CalloutStr `code` 8 14 0
Ket Ket
Bra Bra
a a
CalloutStr `code` 14 0 CalloutStr `code` 8 14 0
Ket Ket
Bra Bra
a a
CalloutStr `code` 14 0 CalloutStr `code` 8 14 0
Ket Ket
Ket Ket
End End
@ -14124,7 +14124,7 @@ Callout: {AB} last capture = 1
Bra Bra
^ ^
Cond Cond
CalloutStr $abc$ 12 7 CalloutStr $abc$ 7 12 7
Assert Assert
abc abc
Ket Ket
@ -14136,35 +14136,35 @@ Callout: {AB} last capture = 1
End End
------------------------------------------------------------------ ------------------------------------------------------------------
abcdefg abcdefg
Callout: $abc$ Callout (7): $abc$
--->abcdefg --->abcdefg
^ (?=abc) ^ (?=abc)
0: abcd 0: abcd
xyz123 xyz123
Callout: $abc$ Callout (7): $abc$
--->xyz123 --->xyz123
^ (?=abc) ^ (?=abc)
0: xyz 0: xyz
/^ab(?C'first')cd(?C"second")ef/ /^ab(?C'first')cd(?C"second")ef/
abcdefg abcdefg
Callout: 'first' Callout (7): 'first'
--->abcdefg --->abcdefg
^ ^ c ^ ^ c
Callout: "second" Callout (20): "second"
--->abcdefg --->abcdefg
^ ^ e ^ ^ e
0: abcdef 0: abcdef
/(?:a(?C`code`)){3}X/ /(?:a(?C`code`)){3}X/
aaaXY aaaXY
Callout: `code` Callout (8): `code`
--->aaaXY --->aaaXY
^^ ) ^^ )
Callout: `code` Callout (8): `code`
--->aaaXY --->aaaXY
^ ^ ) ^ ^ )
Callout: `code` Callout (8): `code`
--->aaaXY --->aaaXY
^ ^ ) ^ ^ )
0: aaaX 0: aaaX

28
testdata/testoutput6 vendored
View File

@ -7777,18 +7777,18 @@ get substring list failed (-2): partial match
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
ab ab
CalloutStr " any text with spaces " 30 1 CalloutStr " any text with spaces " 6 30 1
cde cde
Ket Ket
End End
------------------------------------------------------------------ ------------------------------------------------------------------
abcde abcde
Callout: " any text with spaces " Callout (6): " any text with spaces "
--->abcde --->abcde
^ ^ c ^ ^ c
0: abcde 0: abcde
12abcde 12abcde
Callout: " any text with spaces " Callout (6): " any text with spaces "
--->12abcde --->12abcde
^ ^ c ^ ^ c
0: abcde 0: abcde
@ -7801,7 +7801,7 @@ Callout: " any text with spaces "
/^a(b)c(?C"AB")def/ /^a(b)c(?C"AB")def/
abcdef abcdef
Callout: "AB" Callout (10): "AB"
--->abcdef --->abcdef
^ ^ d ^ ^ d
0: abcdef 0: abcdef
@ -7823,13 +7823,13 @@ Callout 1: last capture = 0
b b
Ket Ket
c c
CalloutStr {AB} 14 1 CalloutStr {AB} 10 14 1
def def
Ket Ket
End End
------------------------------------------------------------------ ------------------------------------------------------------------
abcdef\=callout_capture abcdef\=callout_capture
Callout: {AB} last capture = 0 Callout (10): {AB} last capture = 0
0: 0:
--->abcdef --->abcdef
^ ^ d ^ ^ d
@ -7865,7 +7865,7 @@ Callout: {AB} last capture = 0
Bra Bra
^ ^
Cond Cond
CalloutStr $abc$ 12 7 CalloutStr $abc$ 7 12 7
Assert Assert
abc abc
Ket Ket
@ -7877,35 +7877,35 @@ Callout: {AB} last capture = 0
End End
------------------------------------------------------------------ ------------------------------------------------------------------
abcdefg abcdefg
Callout: $abc$ Callout (7): $abc$
--->abcdefg --->abcdefg
^ (?=abc) ^ (?=abc)
0: abcd 0: abcd
xyz123 xyz123
Callout: $abc$ Callout (7): $abc$
--->xyz123 --->xyz123
^ (?=abc) ^ (?=abc)
0: xyz 0: xyz
/^ab(?C'first')cd(?C"second")ef/ /^ab(?C'first')cd(?C"second")ef/
abcdefg abcdefg
Callout: 'first' Callout (7): 'first'
--->abcdefg --->abcdefg
^ ^ c ^ ^ c
Callout: "second" Callout (20): "second"
--->abcdefg --->abcdefg
^ ^ e ^ ^ e
0: abcdef 0: abcdef
/(?:a(?C`code`)){3}X/ /(?:a(?C`code`)){3}X/
aaaXY aaaXY
Callout: `code` Callout (8): `code`
--->aaaXY --->aaaXY
^^ ) ^^ )
Callout: `code` Callout (8): `code`
--->aaaXY --->aaaXY
^ ^ ) ^ ^ )
Callout: `code` Callout (8): `code`
--->aaaXY --->aaaXY
^ ^ ) ^ ^ )
0: aaaX 0: aaaX