Add tests for JIT fast path.

This commit is contained in:
Philip.Hazel 2014-11-02 15:34:31 +00:00
parent a15a4de10b
commit dea68c01fb
5 changed files with 193 additions and 33 deletions

View File

@ -1,4 +1,4 @@
.TH PCRE2JIT 3 "27 October 2014" "PCRE2 10.00"
.TH PCRE2JIT 3 "02 November 2014" "PCRE2 10.00"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
@ -349,11 +349,11 @@ processed by \fBpcre2_jit_compile()\fP).
The fast path function is called \fBpcre2_jit_match()\fP, and it takes exactly
the same arguments as \fBpcre2_match()\fP, plus one additional argument that
must either point to a JIT stack or be NULL. In the latter case, if a callback
function has been set up by \fBpcre2_jit_stack_alloc()\fP, it is called.
function has been set up by \fBpcre2_jit_stack_assign()\fP, it is called.
Otherwise the system stack is used. The return values are the same as for
\fBpcre2_match()\fP, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
or complete) is requested that was not compiled. Unsupported option bits are
ignored.
or complete) is requested that was not compiled. Unsupported option bits (for
example, PCRE2_ANCHORED) are ignored.
.P
When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
number of other sanity checks are performed on the arguments. For example, if
@ -386,6 +386,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
Last updated: 27 October 2014
Last updated: 02 November 2014
Copyright (c) 1997-2014 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "31 October 2014" "PCRE 10.00"
.TH PCRE2TEST 1 "02 November 2014" "PCRE 10.00"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -437,6 +437,7 @@ about the pattern:
/I info show info about compiled pattern
hex pattern is coded in hexadecimal
jit[=<number>] use JIT
jitfast use JIT fast path
jitverify verify JIT use
locale=<name> use this locale
memory show memory used
@ -531,6 +532,12 @@ are specified. For more details, see the
documentation. See also the \fBjitstack\fP modifier below for a way of
setting the size of the JIT stack.
.P
If the \fBjitfast\fP modifier is specified, matching is done using the JIT
"fast path" interface (\fBpcre2_jit_match()), which skips some of the sanity
checks that are done by \fBpcre2_match()\fP, and of course does not work when
JIT is not supported. If \fBjitfast\fP is specified without \fBjit\fP, jit=7 is
assumed.
.P
If the \fBjitverify\fP modifier is specified, information about the compiled
pattern shows whether JIT compilation was or was not successful. If
\fBjitverify\fP is specified without \fBjit\fP, jit=7 is assumed. If JIT
@ -847,8 +854,9 @@ current character is CR followed by LF, an advance of two is used.
.sp
The \fBjitstack\fP modifier provides a way of setting the maximum stack size
that is used by the just-in-time optimization code. It is ignored if JIT
optimization is not being used. Providing a stack that is larger than the
default 32K is necessary only for very complicated patterns.
optimization is not being used. The value is a number of kilobytes. Providing a
stack that is larger than the default 32K is necessary only for very
complicated patterns.
.
.
.SS "Setting match and recursion limits"
@ -1165,10 +1173,9 @@ characters.
.SH "SEE ALSO"
.rs
.sp
\fBpcre2\fP(3), \fBpcre16\fP(3), \fBpcre32\fP(3), \fBpcre2api\fP(3),
\fBpcre2callout\fP(3),
\fBpcre2\fP(3), \fBpcre2api\fP(3), \fBpcre2callout\fP(3),
\fBpcre2jit\fP, \fBpcre2matching\fP(3), \fBpcre2partial\fP(d),
\fBpcre2pattern\fP(3), \fBpcre2precompile\fP(3).
\fBpcre2pattern\fP(3).
.
.
.SH AUTHOR
@ -1185,6 +1192,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
Last updated: 31 October 2014
Last updated: 02 November 2014
Copyright (c) 1997-2014 University of Cambridge.
.fi

View File

@ -340,12 +340,13 @@ either on a pattern or a data line, so they must all be distinct. */
#define CTL_GLOBAL 0x00001000u
#define CTL_HEXPAT 0x00002000u
#define CTL_INFO 0x00004000u
#define CTL_JITVERIFY 0x00008000u
#define CTL_MARK 0x00010000u
#define CTL_MEMORY 0x00020000u
#define CTL_PATLEN 0x00040000u
#define CTL_POSIX 0x00080000u
#define CTL_STARTCHAR 0x00100000u
#define CTL_JITFAST 0x00008000u
#define CTL_JITVERIFY 0x00010000u
#define CTL_MARK 0x00020000u
#define CTL_MEMORY 0x00040000u
#define CTL_PATLEN 0x00080000u
#define CTL_POSIX 0x00100000u
#define CTL_STARTCHAR 0x00200000u
#define CTL_BSR_SET 0x80000000u /* This is informational */
#define CTL_NL_SET 0x40000000u /* This is informational */
@ -363,7 +364,6 @@ data line. */
CTL_ALLUSEDTEXT|\
CTL_ALTGLOBAL|\
CTL_GLOBAL|\
CTL_JITVERIFY|\
CTL_MARK|\
CTL_MEMORY|\
CTL_STARTCHAR)
@ -455,6 +455,7 @@ static modstruct modlist[] = {
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
{ "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
{ "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) },
{ "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
{ "locale", MOD_PAT, MOD_STR, 0, PO(locale) },
@ -810,6 +811,17 @@ are supported. */
else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
else pcre2_jit_free_unused_memory_32(G(a,32))
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
if (test_mode == PCRE8_MODE) \
a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
(pcre2_jit_stack_8 *)i); \
else if (test_mode == PCRE16_MODE) \
a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
(pcre2_jit_stack_16 *)i); \
else \
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
(pcre2_jit_stack_32 *)i)
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
if (test_mode == PCRE8_MODE) \
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_8(b,c,d); \
@ -1180,6 +1192,14 @@ the three different cases. */
else \
G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
G(g,BITONE),G(h,BITONE),(G(pcre2_jit_stack_,BITONE) *)i); \
else \
a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
G(g,BITTWO),G(h,BITTWO),(G(pcre2_jit_stack_,BITTWO) *)i)
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_alloc_,BITONE)(b,c,d); \
@ -1424,6 +1444,9 @@ the three different cases. */
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_8(G(a,8),b)
#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,(pcre2_jit_stack_8 *)i) \
a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
(pcre2_jit_stack_8 *)i)
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_8(b,c,d);
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
@ -1500,6 +1523,9 @@ the three different cases. */
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b)
#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
(pcre2_jit_stack_16 *)i)
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_16(b,c,d);
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
@ -1576,6 +1602,9 @@ the three different cases. */
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b)
#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
(pcre2_jit_stack_32 *)i)
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_32(b,c,d);
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
@ -3154,7 +3183,7 @@ Returns: nothing
static void
show_controls(uint32_t controls, const char *before)
{
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@ -3171,6 +3200,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((controls & CTL_GLOBAL) != 0)? " global" : "",
((controls & CTL_HEXPAT) != 0)? " hex" : "",
((controls & CTL_INFO) != 0)? " info" : "",
((controls & CTL_JITFAST) != 0)? " jitfast" : "",
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
((controls & CTL_MARK) != 0)? " mark" : "",
((controls & CTL_MEMORY) != 0)? " memory" : "",
@ -3648,7 +3678,11 @@ patlen = p - buffer - 2;
if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
if (pat_patctl.jit == 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
/* Assume full JIT compile for jitverify and/or jitfast if nothing else was
specified. */
if (pat_patctl.jit == 0 &&
(pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
pat_patctl.jit = 7;
utf = (pat_patctl.options & PCRE2_UTF) != 0;
@ -3959,8 +3993,12 @@ for (;;)
PCRE2_SET_RECURSION_LIMIT(dat_context, mid);
}
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data, dat_context);
if ((pat_patctl.control & CTL_JITFAST) != 0)
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data, dat_context, jit_stack);
else
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data, dat_context);
if (capcount == errnumber)
{
@ -4651,8 +4689,12 @@ for (gmatched = 0;; gmatched++)
PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[2];
jit_was_used = FALSE;
ovector = FLD(match_data, ovector);
/* When matching is via pcre2_match(), we will detect the use of JIT via the
stack callback function. */
jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
/* After the first time round a global loop, save the current ovector[0,1] so
that we can check that they do change each time. Otherwise a matching bug
@ -4669,8 +4711,7 @@ for (gmatched = 0;; gmatched++)
if (timeitm > 0)
{
register int i;
clock_t time_taken;
clock_t start_time = clock();
clock_t start_time, time_taken;
if ((dat_datctl.control & CTL_DFA) != 0)
{
@ -4681,6 +4722,7 @@ for (gmatched = 0;; gmatched++)
}
if (dfa_workspace == NULL)
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
start_time = clock();
for (i = 0; i < timeitm; i++)
{
PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
@ -4688,12 +4730,28 @@ for (gmatched = 0;; gmatched++)
dat_context, dfa_workspace, DFA_WS_DIMENSION);
}
}
else for (i = 0; i < timeitm; i++)
else if ((pat_patctl.control & CTL_JITFAST) != 0)
{
PCRE2_MATCH(capcount, compiled_code, pp, ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
dat_context);
}
start_time = clock();
for (i = 0; i < timeitm; i++)
{
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
dat_context, jit_stack);
}
}
else
{
start_time = clock();
for (i = 0; i < timeitm; i++)
{
PCRE2_MATCH(capcount, compiled_code, pp, ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
dat_context);
}
}
total_match_time += (time_taken = clock() - start_time);
fprintf(outfile, "Match time %.4f milliseconds\n",
(((double)time_taken * 1000.0) / (double)timeitm) /
@ -4748,8 +4806,12 @@ for (gmatched = 0;; gmatched++)
}
else
{
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, dat_context);
if ((pat_patctl.control & CTL_JITFAST) != 0)
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, dat_context, jit_stack);
else
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, dat_context);
if (capcount == 0)
{
fprintf(outfile, "Matched, but too many substrings\n");

35
testdata/testinput16 vendored
View File

@ -22,6 +22,12 @@
ab\=ph
xyz
/abcd/jitfast
abcd
ab\=ps
ab\=ph
xyz
/abcd/jit=1
abcd
ab\=ps
@ -29,12 +35,25 @@
xyz
xyz\=ps
/abcd/jit=1,jitfast
abcd
ab\=ps
ab\=ph
xyz
xyz\=ps
/abcd/jit=2
abcd
ab\=ps
ab\=ph
xyz
/abcd/jit=2,jitfast
abcd
ab\=ps
ab\=ph
xyz
/abcd/jit=3
abcd
ab\=ps
@ -108,6 +127,10 @@
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
aabbccddee\=find_limits
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast
aabbccddee\=find_limits
aabbccddee\=jitstack=1
/(a+)*zz/
aaaaaaaaaaaaaz
aaaaaaaaaaaaaz\=match_limit=3000
@ -134,4 +157,16 @@
/(?(R)a*(?1)|((?R))b)/
aaaabcde
# Invalid options disable JIT when called via pcre2_match(), causing the
# match to happen via the interpreter, but for fast JIT invalid options are
# ignored, so an unanchored match happens.
/abcd/jit
abcd\=anchored
fail abcd\=anchored
/abcd/jitfast
abcd\=anchored
succeed abcd\=anchored
# End of testinput16

56
testdata/testoutput16 vendored
View File

@ -38,6 +38,16 @@ Partial match: ab (JIT)
xyz
No match (JIT)
/abcd/jitfast
abcd
0: abcd (JIT)
ab\=ps
Partial match: ab (JIT)
ab\=ph
Partial match: ab (JIT)
xyz
No match (JIT)
/abcd/jit=1
abcd
0: abcd (JIT)
@ -50,6 +60,18 @@ No match (JIT)
xyz\=ps
No match
/abcd/jit=1,jitfast
abcd
0: abcd (JIT)
ab\=ps
Failed: error -43: bad JIT option
ab\=ph
Failed: error -43: bad JIT option
xyz
No match (JIT)
xyz\=ps
Failed: error -43: bad JIT option
/abcd/jit=2
abcd
0: abcd
@ -60,6 +82,16 @@ Partial match: ab
xyz
No match
/abcd/jit=2,jitfast
abcd
Failed: error -43: bad JIT option
ab\=ps
Partial match: ab (JIT)
ab\=ph
Failed: error -43: bad JIT option
xyz
Failed: error -43: bad JIT option
/abcd/jit=3
abcd
0: abcd (JIT)
@ -207,6 +239,14 @@ Minimum match limit = 6
2: cc
3: ee
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast
aabbccddee\=find_limits
Minimum match limit = 6
0: aabbccddee (JIT)
1: aa
2: cc
3: ee
/(a+)*zz/
aaaaaaaaaaaaaz
No match (JIT)
@ -261,4 +301,20 @@ Failed: error -44: JIT stack limit reached
aaaabcde
Failed: error -44: JIT stack limit reached
# Invalid options disable JIT when called via pcre2_match(), causing the
# match to happen via the interpreter, but for fast JIT invalid options are
# ignored, so an unanchored match happens.
/abcd/jit
abcd\=anchored
0: abcd
fail abcd\=anchored
No match
/abcd/jitfast
abcd\=anchored
0: abcd (JIT)
succeed abcd\=anchored
0: abcd (JIT)
# End of testinput16