diff --git a/RunTest b/RunTest index 0a39d60..e7fbc74 100755 --- a/RunTest +++ b/RunTest @@ -269,11 +269,11 @@ done $sim ./pcre2test -C linksize >/dev/null link_size=$? if [ $link_size -lt 2 ] ; then - echo "Failed to find internal link size" + echo "RunTest: Failed to find internal link size" exit 1 fi if [ $link_size -gt 4 ] ; then - echo "Failed to find internal link size" + echo "RunTest: Failed to find internal link size" exit 1 fi diff --git a/doc/pcre2_config.3 b/doc/pcre2_config.3 index de48e58..7fed17b 100644 --- a/doc/pcre2_config.3 +++ b/doc/pcre2_config.3 @@ -56,8 +56,8 @@ available codes are: .sp The function yields a non-negative value on success or the negative value PCRE2_ERROR_BADOPTION otherwise. This is also the result for the -PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string -is returned the yield is the length of the string, in code units, excluding the +PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is +requested, the function returns the number of code units used, including the terminating zero. .P There is a complete description of the PCRE2 native API in the diff --git a/doc/pcre2_jit_match.3 b/doc/pcre2_jit_match.3 index 03b543c..b0cc197 100644 --- a/doc/pcre2_jit_match.3 +++ b/doc/pcre2_jit_match.3 @@ -10,7 +10,7 @@ PCRE2 - Perl-compatible regular expressions (revised API) .B int pcre2_jit_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, .B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," .B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," -.B " pcre2_match_context *\fImcontext\fP, pcre2_jit_stack *\fIjit_stack\fP);" +.B " pcre2_match_context *\fImcontext\fP);" .fi . .SH DESCRIPTION @@ -22,12 +22,8 @@ algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and it bypasses some of the sanity checks that \fBpcre2_match()\fP applies. Its arguments are exactly the same as for .\" HREF -\fBpcre2_match()\fP +\fBpcre2_match()\fP. .\" -plus one additional argument that must either point to a JIT stack or be NULL. -In the latter case, if a callback function has been set up by -\fBpcre2_jit_stack_create()\fP, it is called. Otherwise the system stack is -used. .P The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported diff --git a/doc/pcre2_jit_stack_assign.3 b/doc/pcre2_jit_stack_assign.3 index 5097c7e..09ca70f 100644 --- a/doc/pcre2_jit_stack_assign.3 +++ b/doc/pcre2_jit_stack_assign.3 @@ -1,4 +1,4 @@ -.TH PCRE2_JIT_STACK_ASSIGN 3 "03 November 2014" "PCRE2 10.0" +.TH PCRE2_JIT_STACK_ASSIGN 3 "08 November 2014" "PCRE2 10.0" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -7,18 +7,20 @@ PCRE2 - Perl-compatible regular expressions (revised API) .B #include .PP .nf -.B void pcre2_jit_stack_assign(const pcre2_code *\fIcode\fP, +.B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP, .B " pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);" .fi . .SH DESCRIPTION .rs .sp -This function provides control over the memory used as a stack at run-time by a -call to \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP with a pattern that has -been successfully processed by the JIT compiler. The arguments are: +This function provides control over the memory used by JIT as a run-time stack +when \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP is called with a pattern +that has been successfully processed by the JIT compiler. The information that +determines which stack is used is put into a match context that is subsequently +passed to a matching function. The arguments of this function are: .sp - code the pointer returned by \fBpcre2_compile()\fP + mcontext a pointer to a match context callback a callback function callback_data a JIT stack or a value to be passed to the callback .P @@ -34,7 +36,7 @@ argument at the start of matching, in order to set up a JIT stack. If the result is NULL, the internal 32K stack is used; otherwise the return value must be a valid JIT stack, the result of calling \fBpcre2_jit_stack_create()\fP. .P -You may safely assign the same JIT stack to multiple patterns, as long as they +You may safely use the same JIT stack for multiple patterns, as long as they are all matched in the same thread. In a multithread application, each thread must use its own JIT stack. For more details, see the .\" HREF diff --git a/doc/pcre2jit.3 b/doc/pcre2jit.3 index bc06f1e..ac82892 100644 --- a/doc/pcre2jit.3 +++ b/doc/pcre2jit.3 @@ -1,4 +1,4 @@ -.TH PCRE2JIT 3 "03 November 2014" "PCRE2 10.00" +.TH PCRE2JIT 3 "08 November 2014" "PCRE2 10.00" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT" @@ -164,19 +164,20 @@ pattern. The \fBpcre2_jit_stack_assign()\fP function specifies which stack JIT code should use. Its arguments are as follows: .sp - pcre2_code *code - pcre2_jit_callback callback - void *data + pcre2_match_context *mcontext + pcre2_jit_callback callback + void *data .sp -The \fIcode\fP argument is a pointer to a compiled pattern, after it has been -processed by \fBpcre2_jit_compile()\fP. There are three cases for the values of -the other two options: +The first argument is a pointer to a match context. When this is subsequently +passed to a matching function, its information determines which JIT stack is +used. There are three cases for the values of the other two options: .sp (1) If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block on the machine stack is used. .sp (2) If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must be - a valid JIT stack, the result of calling \fBpcre2_jit_stack_create()\fP. + a pointer to a valid JIT stack, the result of calling + \fBpcre2_jit_stack_create()\fP. .sp (3) If \fIcallback\fP is not NULL, it must point to a function that is called with \fIdata\fP as an argument at the start of matching, in @@ -199,11 +200,11 @@ assign or pass back a non-NULL JIT stack, this must be a different stack for each thread so that the application is thread-safe. .P Strictly speaking, even more is allowed. You can assign the same non-NULL stack -to any number of patterns as long as they are not used for matching by multiple -threads at the same time. For example, you can assign the same stack to all -compiled patterns, and use a global mutex in the callback to wait until the -stack is available for use. However, this is an inefficient solution, and not -recommended. +to a match context that is used by any number of patterns, as long as they are +not used for matching by multiple threads at the same time. For example, you +could use the same stack in all compiled patterns, with a global mutex in the +callback to wait until the stack is available for use. However, this is an +inefficient solution, and not recommended. .P This is a suggestion for how a multithreaded program that needs to set up non-default JIT stacks might operate: @@ -217,10 +218,7 @@ non-default JIT stacks might operate: Use a one-line callback function return thread_local_var .sp -All the functions described in this section do nothing if JIT is not available, -and \fBpcre2_jit_stack_assign()\fP does nothing unless the \fBcode\fP argument -is non-NULL and points to a \fBpcre2_code\fP block that has been successfully -processed by \fBpcre2_jit_compile()\fP. +All the functions described in this section do nothing if JIT is not available. . . .\" HTML @@ -248,24 +246,24 @@ grow up to 1M anytime if needed. (3) Who "owns" a JIT stack? .sp The owner of the stack is the user program, not the JIT studied pattern or -anything else. The user program must ensure that if a stack is used by -\fBpcre2_match()\fP, (that is, it is assigned to the pattern currently -running), that stack must not be used by any other threads (to avoid -overwriting the same memory area). The best practice for multithreaded programs -is to allocate a stack for each thread, and return this stack through the JIT -callback function. +anything else. The user program must ensure that if a stack is being used by +\fBpcre2_match()\fP, (that is, it is assigned to a match context that is passed +to the pattern currently running), that stack must not be used by any other +threads (to avoid overwriting the same memory area). The best practice for +multithreaded programs is to allocate a stack for each thread, and return this +stack through the JIT callback function. .P (4) When should a JIT stack be freed? .sp You can free a JIT stack at any time, as long as it will not be used by -\fBpcre2_match()\fP again. When you assign the stack to a pattern, only a +\fBpcre2_match()\fP again. When you assign the stack to a match context, only a pointer is set. There is no reference counting or any other magic. You can free -the patterns and stacks in any order, anytime. Just \fIdo not\fP call -\fBpcre2_match()\fP with a pattern pointing to an already freed stack, as that -will cause SEGFAULT. (Also, do not free a stack currently used by -\fBpcre2_match()\fP in another thread). You can also replace the stack for a -pattern at any time. You can even free the previous stack before assigning a -replacement. +compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo +not\fP call \fBpcre2_match()\fP with a match context pointing to an already +freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently +used by \fBpcre2_match()\fP in another thread). You can also replace the stack +in a context at any time when it is not in use. You can also free the previous +stack before assigning a replacement. .P (5) Should I allocate/free a stack every time before/after calling \fBpcre2_match()\fP? @@ -273,7 +271,7 @@ replacement. No, because this is too costly in terms of resources. However, you could implement some clever idea which release the stack if it is not used in let's say two minutes. The JIT callback can help to achieve this without keeping a -list of the currently JIT studied patterns. +list of patterns. .P (6) OK, the stack is for long term memory allocation. But what happens if a pattern causes stack overflow with a stack of 1M? Is that 1M kept until the @@ -311,25 +309,28 @@ memory management, or NULL for standard memory management. .rs .sp This is a single-threaded example that specifies a JIT stack without using a -callback. +callback. A real program should include error checking after all the function +calls. .sp int rc; pcre2_code *re; pcre2_match_data *match_data; + pcre2_match_context *mcontext; pcre2_jit_stack *jit_stack; .sp - re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, + re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroffset, NULL); - /* Check for errors */ rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); - /* Check for errors */ + mcontext = pcre2_match_context_create(NULL); jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024); - /* Check for error (NULL) */ - pcre2_jit_stack_assign(re, NULL, jit_stack); + pcre2_jit_stack_assign(mcontext, NULL, jit_stack); match_data = pcre2_match_data_create(re, 10); - rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL); - /* Check results */ - pcre2_free(re); + rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext); + /* Process result */ +.sp + pcre2_code_free(re); + pcre2_match_data_free(match_data); + pcre2_match_context_free(mcontext); pcre2_jit_stack_free(jit_stack); .sp . @@ -347,13 +348,10 @@ available, and which need the best possible performance, can instead use a processed by \fBpcre2_jit_compile()\fP). .P The fast path function is called \fBpcre2_jit_match()\fP, and it takes exactly -the same arguments as \fBpcre2_match()\fP, plus one additional argument that -must either point to a JIT stack or be NULL. In the latter case, if a callback -function has been set up by \fBpcre2_jit_stack_assign()\fP, it is called. -Otherwise the system stack is used. The return values are the same as for -\fBpcre2_match()\fP, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial -or complete) is requested that was not compiled. Unsupported option bits (for -example, PCRE2_ANCHORED) are ignored. +the same arguments as \fBpcre2_match()\fP. The return values are also the same, +plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is +requested that was not compiled. Unsupported option bits (for example, +PCRE2_ANCHORED) are ignored. .P When you call \fBpcre2_match()\fP, as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For example, if @@ -386,6 +384,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 03 November 2014 +Last updated: 08 November 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/src/pcre2_config.c b/src/pcre2_config.c index f534d79..d8bc1c3 100644 --- a/src/pcre2_config.c +++ b/src/pcre2_config.c @@ -131,7 +131,7 @@ switch (what) const char *v = PRIV(jit_get_target)(); return (where == NULL)? (int)((strlen(v) + 1) * sizeof(PCRE2_UCHAR)) : - PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v); + PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v) + 1; } #else return PCRE2_ERROR_BADOPTION; @@ -174,7 +174,7 @@ switch (what) #endif return (where == NULL)? (int)((strlen(v) + 1) * sizeof(PCRE2_UCHAR)) : - PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v); + PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v) + 1; } break; @@ -212,7 +212,7 @@ switch (what) XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE); return (where == NULL)? (int)((strlen(v) + 1) * sizeof(PCRE2_UCHAR)) : - PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v); + PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v) + 1; } } diff --git a/src/pcre2grep.c b/src/pcre2grep.c index 72259e8..e1534a8 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -3122,7 +3122,7 @@ for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next) { #ifdef SUPPORT_PCRE2GREP_JIT if (jit_stack != NULL && cp->compiled != NULL) - pcre2_jit_stack_assign(cp->compiled, NULL, jit_stack); + pcre2_jit_stack_assign(match_context, NULL, jit_stack); #endif } diff --git a/src/pcre2test.c b/src/pcre2test.c index 7146733..5b5c937 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -812,16 +812,13 @@ are supported. */ else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \ else pcre2_jit_free_unused_memory_32(G(a,32)) -#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \ +#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ if (test_mode == PCRE8_MODE) \ - a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \ - (pcre2_jit_stack_8 *)i); \ + a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8)); \ else if (test_mode == PCRE16_MODE) \ - a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \ - (pcre2_jit_stack_16 *)i); \ + a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16)); \ else \ - a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \ - (pcre2_jit_stack_32 *)i) + a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32)) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ if (test_mode == PCRE8_MODE) \ @@ -1193,13 +1190,13 @@ the three different cases. */ else \ G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO)) -#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \ +#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ - G(g,BITONE),G(h,BITONE),(G(pcre2_jit_stack_,BITONE) *)i); \ + G(g,BITONE),G(h,BITONE)); \ else \ a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ - G(g,BITTWO),G(h,BITTWO),(G(pcre2_jit_stack_,BITTWO) *)i) + G(g,BITTWO),G(h,BITTWO)) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -1445,9 +1442,8 @@ the three different cases. */ #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8)) #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_8(G(a,8),b) #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8)) -#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \ - a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \ - (pcre2_jit_stack_8 *)i) +#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ + a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8)) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -1525,8 +1521,7 @@ the three different cases. */ #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b) #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16)) #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \ - a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \ - (pcre2_jit_stack_16 *)i) + a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16)) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -1604,8 +1599,7 @@ the three different cases. */ #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b) #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32)) #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \ - a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \ - (pcre2_jit_stack_32 *)i) + a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32)) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -3996,7 +3990,7 @@ for (;;) if ((pat_patctl.control & CTL_JITFAST) != 0) PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, - dat_datctl.options, match_data, dat_context, jit_stack); + dat_datctl.options, match_data, dat_context); else PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options, match_data, dat_context); @@ -4641,14 +4635,14 @@ if (dat_datctl.jitstack != 0) PCRE2_JIT_STACK_CREATE(jit_stack, NULL, 1, dat_datctl.jitstack * 1024); jit_stack_size = dat_datctl.jitstack; } - PCRE2_JIT_STACK_ASSIGN(compiled_code, jit_callback, jit_stack); + PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack); } /* Or de-assign */ else if (jit_stack != NULL) { - PCRE2_JIT_STACK_ASSIGN(compiled_code, NULL, NULL); + PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL); PCRE2_JIT_STACK_FREE(jit_stack); jit_stack = NULL; jit_stack_size = 0; @@ -4659,7 +4653,7 @@ if we want to verify that JIT was actually used. */ if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL) { - PCRE2_JIT_STACK_ASSIGN(compiled_code, jit_callback, NULL); + PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL); } /* Adjust match_data according to size of offsets required. A size of zero @@ -4745,7 +4739,7 @@ for (gmatched = 0;; gmatched++) { PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options | g_notempty, match_data, - dat_context, jit_stack); + dat_context); } } @@ -4815,7 +4809,7 @@ for (gmatched = 0;; gmatched++) { if ((pat_patctl.control & CTL_JITFAST) != 0) PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, - dat_datctl.options | g_notempty, match_data, dat_context, jit_stack); + dat_datctl.options | g_notempty, match_data, dat_context); else PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options | g_notempty, match_data, dat_context);