Add use_length to pcre2test.

This commit is contained in:
Philip.Hazel 2016-11-04 10:53:43 +00:00
parent fb231d30fe
commit 10f4e45e68
3 changed files with 38 additions and 17 deletions

View File

@ -147,6 +147,8 @@ obsolete these days and in any case had become very haphazard.
21. Make pcre2test -C list valgrind support when it is enabled.
22. Add the use_length modifier to pcre2test.
Version 10.22 29-July-2016
--------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "02 August 2016" "PCRE 10.23"
.TH PCRE2TEST 1 "04 November 2016" "PCRE 10.23"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -580,6 +580,7 @@ about the pattern:
pushcopy push a copy onto the stack
stackguard=<number> test the stackguard feature
tables=[0|1|2] select internal tables
use_length do not zero-terminate the pattern
utf8_input treat input as UTF-8
.sp
The effects of these modifiers are described in the following sections.
@ -658,6 +659,18 @@ testing that \fBpcre2_compile()\fP behaves correctly in this case (it uses
default values).
.
.
.SS "Specifying the pattern's length"
.rs
.sp
By default, patterns are passed to the compiling functions as zero-terminated
strings. When using the POSIX wrapper API, there is no other option. However,
when using PCRE2's native API, patterns can be passed by length instead of
being zero-terminated. The \fBuse_length\fP modifier causes this to happen.
Using a length happens automatically (whether or not \fBuse_length\fP is set)
when \fBhex\fP is set, because patterns specified in hexadecimal may contain
binary zeros.
.
.
.SS "Specifying pattern characters in hexadecimal"
.rs
.sp
@ -679,10 +692,10 @@ Either single or double quotes may be used. There is no way of including
the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are
mutually exclusive.
.P
By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
\fBpcre2_compile()\fP, giving the length as PCRE2_ZERO_TERMINATED. However, for
patterns specified with the \fBhex\fP modifier, the actual length of the
pattern is passed.
The POSIX API cannot be used with patterns specified in hexadecimal because
they may contain binary zeros, which conflicts with \fBregcomp()\fP's
requirement for a zero-terminated string. Such patterns are always passed to
\fBpcre2_compile()\fP as a string with a length, not as zero-terminated.
.
.
.SS "Specifying wide characters in 16-bit and 32-bit modes"
@ -1734,6 +1747,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 02 August 2016
Last updated: 04 November 2016
Copyright (c) 1997-2016 University of Cambridge.
.fi

View File

@ -418,7 +418,7 @@ so many of them that they are split into two fields. */
#define CTL_FULLBINCODE 0x00001000u
#define CTL_GETALL 0x00002000u
#define CTL_GLOBAL 0x00004000u
#define CTL_HEXPAT 0x00008000u
#define CTL_HEXPAT 0x00008000u /* Same word as USE_LENGTH */
#define CTL_INFO 0x00010000u
#define CTL_JITFAST 0x00020000u
#define CTL_JITVERIFY 0x00040000u
@ -430,9 +430,10 @@ so many of them that they are split into two fields. */
#define CTL_PUSH 0x01000000u
#define CTL_PUSHCOPY 0x02000000u
#define CTL_STARTCHAR 0x04000000u
#define CTL_UTF8_INPUT 0x08000000u
#define CTL_ZERO_TERMINATE 0x10000000u
/* Spare 0x20000000u */
#define CTL_USE_LENGTH 0x08000000u /* Same word as HEXPAT */
#define CTL_UTF8_INPUT 0x10000000u
#define CTL_ZERO_TERMINATE 0x20000000u
#define CTL_NL_SET 0x40000000u /* Informational */
#define CTL_BSR_SET 0x80000000u /* Informational */
@ -620,6 +621,7 @@ static modstruct modlist[] = {
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
{ "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
{ "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
{ "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
@ -649,7 +651,8 @@ static modstruct modlist[] = {
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET)
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET| \
CTL_USE_LENGTH)
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)
@ -661,7 +664,7 @@ static modstruct modlist[] = {
/* Controls that are forbidden with #pop or #popcopy. */
#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
CTL_PUSHCOPY)
CTL_PUSHCOPY|CTL_USE_LENGTH)
/* Pattern controls that are mutually exclusive. At present these are all in
the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
@ -671,6 +674,7 @@ static uint32_t exclusive_pat_controls[] = {
CTL_POSIX | CTL_HEXPAT,
CTL_POSIX | CTL_PUSH,
CTL_POSIX | CTL_PUSHCOPY,
CTL_POSIX | CTL_USE_LENGTH,
CTL_EXPAND | CTL_HEXPAT };
/* Data controls that are mutually exclusive. At present these are all in the
@ -3681,7 +3685,7 @@ Returns: nothing
static void
show_controls(uint32_t controls, uint32_t controls2, const char *before)
{
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@ -3716,6 +3720,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
}
@ -4976,12 +4981,13 @@ switch(errorcode)
}
/* The pattern is now in pbuffer[8|16|32], with the length in code units in
patlen. By default, however, we pass a zero-terminated pattern. The length is
passed only if we had a hex pattern. When valgrind is supported, arrange for
the unused part of the buffer to be marked as no access. */
patlen. By default we pass a zero-terminated pattern, but a length is passed if
"use_length" was specified or this is a hex pattern (which might contain binary
zeros). When valgrind is supported, arrange for the unused part of the buffer
to be marked as no access. */
valgrind_access_length = patlen;
if ((pat_patctl.control & CTL_HEXPAT) == 0)
if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
{
patlen = PCRE2_ZERO_TERMINATED;
valgrind_access_length += 1; /* For the terminating zero */