Add use_length to pcre2test.
This commit is contained in:
parent
fb231d30fe
commit
10f4e45e68
|
@ -147,6 +147,8 @@ obsolete these days and in any case had become very haphazard.
|
||||||
|
|
||||||
21. Make pcre2test -C list valgrind support when it is enabled.
|
21. Make pcre2test -C list valgrind support when it is enabled.
|
||||||
|
|
||||||
|
22. Add the use_length modifier to pcre2test.
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "02 August 2016" "PCRE 10.23"
|
.TH PCRE2TEST 1 "04 November 2016" "PCRE 10.23"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -580,6 +580,7 @@ about the pattern:
|
||||||
pushcopy push a copy onto the stack
|
pushcopy push a copy onto the stack
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2] select internal tables
|
||||||
|
use_length do not zero-terminate the pattern
|
||||||
utf8_input treat input as UTF-8
|
utf8_input treat input as UTF-8
|
||||||
.sp
|
.sp
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
|
@ -658,6 +659,18 @@ testing that \fBpcre2_compile()\fP behaves correctly in this case (it uses
|
||||||
default values).
|
default values).
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SS "Specifying the pattern's length"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
By default, patterns are passed to the compiling functions as zero-terminated
|
||||||
|
strings. When using the POSIX wrapper API, there is no other option. However,
|
||||||
|
when using PCRE2's native API, patterns can be passed by length instead of
|
||||||
|
being zero-terminated. The \fBuse_length\fP modifier causes this to happen.
|
||||||
|
Using a length happens automatically (whether or not \fBuse_length\fP is set)
|
||||||
|
when \fBhex\fP is set, because patterns specified in hexadecimal may contain
|
||||||
|
binary zeros.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SS "Specifying pattern characters in hexadecimal"
|
.SS "Specifying pattern characters in hexadecimal"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -679,10 +692,10 @@ Either single or double quotes may be used. There is no way of including
|
||||||
the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are
|
the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are
|
||||||
mutually exclusive.
|
mutually exclusive.
|
||||||
.P
|
.P
|
||||||
By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
|
The POSIX API cannot be used with patterns specified in hexadecimal because
|
||||||
\fBpcre2_compile()\fP, giving the length as PCRE2_ZERO_TERMINATED. However, for
|
they may contain binary zeros, which conflicts with \fBregcomp()\fP's
|
||||||
patterns specified with the \fBhex\fP modifier, the actual length of the
|
requirement for a zero-terminated string. Such patterns are always passed to
|
||||||
pattern is passed.
|
\fBpcre2_compile()\fP as a string with a length, not as zero-terminated.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Specifying wide characters in 16-bit and 32-bit modes"
|
.SS "Specifying wide characters in 16-bit and 32-bit modes"
|
||||||
|
@ -1734,6 +1747,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 02 August 2016
|
Last updated: 04 November 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -418,7 +418,7 @@ so many of them that they are split into two fields. */
|
||||||
#define CTL_FULLBINCODE 0x00001000u
|
#define CTL_FULLBINCODE 0x00001000u
|
||||||
#define CTL_GETALL 0x00002000u
|
#define CTL_GETALL 0x00002000u
|
||||||
#define CTL_GLOBAL 0x00004000u
|
#define CTL_GLOBAL 0x00004000u
|
||||||
#define CTL_HEXPAT 0x00008000u
|
#define CTL_HEXPAT 0x00008000u /* Same word as USE_LENGTH */
|
||||||
#define CTL_INFO 0x00010000u
|
#define CTL_INFO 0x00010000u
|
||||||
#define CTL_JITFAST 0x00020000u
|
#define CTL_JITFAST 0x00020000u
|
||||||
#define CTL_JITVERIFY 0x00040000u
|
#define CTL_JITVERIFY 0x00040000u
|
||||||
|
@ -430,9 +430,10 @@ so many of them that they are split into two fields. */
|
||||||
#define CTL_PUSH 0x01000000u
|
#define CTL_PUSH 0x01000000u
|
||||||
#define CTL_PUSHCOPY 0x02000000u
|
#define CTL_PUSHCOPY 0x02000000u
|
||||||
#define CTL_STARTCHAR 0x04000000u
|
#define CTL_STARTCHAR 0x04000000u
|
||||||
#define CTL_UTF8_INPUT 0x08000000u
|
#define CTL_USE_LENGTH 0x08000000u /* Same word as HEXPAT */
|
||||||
#define CTL_ZERO_TERMINATE 0x10000000u
|
#define CTL_UTF8_INPUT 0x10000000u
|
||||||
/* Spare 0x20000000u */
|
#define CTL_ZERO_TERMINATE 0x20000000u
|
||||||
|
|
||||||
#define CTL_NL_SET 0x40000000u /* Informational */
|
#define CTL_NL_SET 0x40000000u /* Informational */
|
||||||
#define CTL_BSR_SET 0x80000000u /* Informational */
|
#define CTL_BSR_SET 0x80000000u /* Informational */
|
||||||
|
|
||||||
|
@ -620,6 +621,7 @@ static modstruct modlist[] = {
|
||||||
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
|
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
|
||||||
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
|
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
|
||||||
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
|
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
|
||||||
|
{ "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
|
||||||
{ "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
|
{ "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
|
||||||
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
|
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
|
||||||
{ "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
|
{ "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
|
||||||
|
@ -649,7 +651,8 @@ static modstruct modlist[] = {
|
||||||
|
|
||||||
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
|
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
|
||||||
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
|
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
|
||||||
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET)
|
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET| \
|
||||||
|
CTL_USE_LENGTH)
|
||||||
|
|
||||||
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)
|
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)
|
||||||
|
|
||||||
|
@ -661,7 +664,7 @@ static modstruct modlist[] = {
|
||||||
/* Controls that are forbidden with #pop or #popcopy. */
|
/* Controls that are forbidden with #pop or #popcopy. */
|
||||||
|
|
||||||
#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
|
#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
|
||||||
CTL_PUSHCOPY)
|
CTL_PUSHCOPY|CTL_USE_LENGTH)
|
||||||
|
|
||||||
/* Pattern controls that are mutually exclusive. At present these are all in
|
/* Pattern controls that are mutually exclusive. At present these are all in
|
||||||
the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
|
the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
|
||||||
|
@ -671,6 +674,7 @@ static uint32_t exclusive_pat_controls[] = {
|
||||||
CTL_POSIX | CTL_HEXPAT,
|
CTL_POSIX | CTL_HEXPAT,
|
||||||
CTL_POSIX | CTL_PUSH,
|
CTL_POSIX | CTL_PUSH,
|
||||||
CTL_POSIX | CTL_PUSHCOPY,
|
CTL_POSIX | CTL_PUSHCOPY,
|
||||||
|
CTL_POSIX | CTL_USE_LENGTH,
|
||||||
CTL_EXPAND | CTL_HEXPAT };
|
CTL_EXPAND | CTL_HEXPAT };
|
||||||
|
|
||||||
/* Data controls that are mutually exclusive. At present these are all in the
|
/* Data controls that are mutually exclusive. At present these are all in the
|
||||||
|
@ -3681,7 +3685,7 @@ Returns: nothing
|
||||||
static void
|
static void
|
||||||
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
before,
|
before,
|
||||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||||
|
@ -3716,6 +3720,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
|
||||||
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
||||||
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
|
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
|
||||||
((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
|
((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
|
||||||
|
((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
|
||||||
((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
|
((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
|
||||||
((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
|
((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
|
||||||
}
|
}
|
||||||
|
@ -4976,12 +4981,13 @@ switch(errorcode)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The pattern is now in pbuffer[8|16|32], with the length in code units in
|
/* The pattern is now in pbuffer[8|16|32], with the length in code units in
|
||||||
patlen. By default, however, we pass a zero-terminated pattern. The length is
|
patlen. By default we pass a zero-terminated pattern, but a length is passed if
|
||||||
passed only if we had a hex pattern. When valgrind is supported, arrange for
|
"use_length" was specified or this is a hex pattern (which might contain binary
|
||||||
the unused part of the buffer to be marked as no access. */
|
zeros). When valgrind is supported, arrange for the unused part of the buffer
|
||||||
|
to be marked as no access. */
|
||||||
|
|
||||||
valgrind_access_length = patlen;
|
valgrind_access_length = patlen;
|
||||||
if ((pat_patctl.control & CTL_HEXPAT) == 0)
|
if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
|
||||||
{
|
{
|
||||||
patlen = PCRE2_ZERO_TERMINATED;
|
patlen = PCRE2_ZERO_TERMINATED;
|
||||||
valgrind_access_length += 1; /* For the terminating zero */
|
valgrind_access_length += 1; /* For the terminating zero */
|
||||||
|
|
Loading…
Reference in New Issue