2014-04-18 17:37:56 +02:00
|
|
|
/*************************************************
|
|
|
|
* PCRE2 testing program *
|
|
|
|
*************************************************/
|
2014-03-07 18:28:52 +01:00
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
|
|
|
and semantics are as close as possible to those of the Perl 5 language. In 2014
|
|
|
|
the API was completely revised and '2' was added to the name, because the old
|
|
|
|
API, which had lasted for 16 years, could not accommodate new requirements. At
|
|
|
|
the same time, this testing program was re-designed because its original
|
|
|
|
hacked-up (non-) design had also run out of steam.
|
|
|
|
|
|
|
|
Written by Philip Hazel
|
|
|
|
Original code Copyright (c) 1997-2012 University of Cambridge
|
|
|
|
Rewritten code Copyright (c) 2014 University of Cambridge
|
|
|
|
|
|
|
|
-----------------------------------------------------------------------------
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer.
|
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
|
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
|
|
|
|
* Neither the name of the University of Cambridge nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived from
|
|
|
|
this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
-----------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* This program supports the testing of all of the 8-bit, 16-bit, and 32-bit
|
|
|
|
PCRE2 libraries in a single program. This is different from modules such as
|
|
|
|
pcre2_compile.c in the library itself, which are compiled separately for each
|
|
|
|
mode. If two modes are enabled, for example, pcre2_compile.c is compiled twice.
|
|
|
|
By contrast, pcre2test.c is compiled only once. Therefore, it must not make use
|
|
|
|
of any of the macros from pcre2.h or pcre2_internal.h that depend on
|
|
|
|
PCRE2_CODE_UNIT_WIDTH. It does, however, make use of SUPPORT_PCRE8,
|
|
|
|
SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only supported
|
|
|
|
library functions. */
|
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <ctype.h>
|
2014-03-07 18:28:52 +01:00
|
|
|
#include <stdio.h>
|
2014-04-18 17:37:56 +02:00
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <time.h>
|
|
|
|
#include <locale.h>
|
|
|
|
#include <errno.h>
|
|
|
|
|
|
|
|
/* Both libreadline and libedit are optionally supported. The user-supplied
|
|
|
|
original patch uses readline/readline.h for libedit, but in at least one system
|
|
|
|
it is installed as editline/readline.h, so the configuration code now looks for
|
|
|
|
that first, falling back to readline/readline.h. */
|
|
|
|
|
|
|
|
#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
#if defined(SUPPORT_LIBREADLINE)
|
|
|
|
#include <readline/readline.h>
|
|
|
|
#include <readline/history.h>
|
|
|
|
#else
|
|
|
|
#if defined(HAVE_EDITLINE_READLINE_H)
|
|
|
|
#include <editline/readline.h>
|
|
|
|
#else
|
|
|
|
#include <readline/readline.h>
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* ---------------------- System-specific definitions ---------------------- */
|
|
|
|
|
|
|
|
/* A number of things vary for Windows builds. Originally, pcretest opened its
|
|
|
|
input and output without "b"; then I was told that "b" was needed in some
|
|
|
|
environments, so it was added for release 5.0 to both the input and output. (It
|
|
|
|
makes no difference on Unix-like systems.) Later I was told that it is wrong
|
|
|
|
for the input on Windows. I've now abstracted the modes into two macros that
|
|
|
|
are set here, to make it easier to fiddle with them, and removed "b" from the
|
|
|
|
input mode under Windows. */
|
|
|
|
|
|
|
|
#if defined(_WIN32) || defined(WIN32)
|
|
|
|
#include <io.h> /* For _setmode() */
|
|
|
|
#include <fcntl.h> /* For _O_BINARY */
|
|
|
|
#define INPUT_MODE "r"
|
|
|
|
#define OUTPUT_MODE "wb"
|
|
|
|
|
|
|
|
#ifndef isatty
|
|
|
|
#define isatty _isatty /* This is what Windows calls them, I'm told, */
|
|
|
|
#endif /* though in some environments they seem to */
|
|
|
|
/* be already defined, hence the #ifndefs. */
|
|
|
|
#ifndef fileno
|
|
|
|
#define fileno _fileno
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* A user sent this fix for Borland Builder 5 under Windows. */
|
|
|
|
|
|
|
|
#ifdef __BORLANDC__
|
|
|
|
#define _setmode(handle, mode) setmode(handle, mode)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Not Windows */
|
|
|
|
|
|
|
|
#else
|
|
|
|
#include <sys/time.h> /* These two includes are needed */
|
|
|
|
#include <sys/resource.h> /* for setrlimit(). */
|
|
|
|
#if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
|
|
|
|
#define INPUT_MODE "r"
|
|
|
|
#define OUTPUT_MODE "w"
|
|
|
|
#else
|
|
|
|
#define INPUT_MODE "rb"
|
|
|
|
#define OUTPUT_MODE "wb"
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __VMS
|
|
|
|
#include <ssdef.h>
|
|
|
|
void vms_setsymbol( char *, char *, int );
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* ------------------End of system-specific definitions -------------------- */
|
|
|
|
|
|
|
|
/* Glueing macros that are used in several places below. */
|
|
|
|
|
|
|
|
#define glue(a,b) a##b
|
|
|
|
#define G(a,b) glue(a,b)
|
|
|
|
|
|
|
|
/* Other parameters */
|
|
|
|
|
|
|
|
#ifndef CLOCKS_PER_SEC
|
|
|
|
#ifdef CLK_TCK
|
|
|
|
#define CLOCKS_PER_SEC CLK_TCK
|
|
|
|
#else
|
|
|
|
#define CLOCKS_PER_SEC 100
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
|
|
|
#define LOOPREPEAT 500000 /* Default loop count for timing. */
|
|
|
|
#define VERSION_SIZE 64 /* Size of buffer for the version string. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
/* Execution modes */
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define PCRE8_MODE 8
|
|
|
|
#define PCRE16_MODE 16
|
|
|
|
#define PCRE32_MODE 32
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
/* Processing returns */
|
|
|
|
|
|
|
|
enum { PR_OK, PR_SKIP, PR_ABEND };
|
|
|
|
|
|
|
|
/* This is defined before including pcre2_internal.h so that it does not get
|
|
|
|
defined therein. */
|
|
|
|
|
|
|
|
#define PRIV(name) name
|
|
|
|
|
|
|
|
/* We have to include pcre2_internal.h and pcre2_intstructs.h because we need
|
|
|
|
to know about the internal macros, structures, and other internal data values;
|
|
|
|
pcre2test has "inside information" compared to a program that strictly follows
|
|
|
|
the PCRE2 API. We need the structures at all supported code widths;
|
|
|
|
pcre2_internal.h does not include the structures if PCRE2_CODE_UNIT_WIDTH is
|
|
|
|
unset. This enables us to #include pcre2_intstructs.h as many times as
|
|
|
|
necessary.
|
|
|
|
|
|
|
|
Although pcre2_internal.h does itself include pcre2.h, we explicitly include it
|
|
|
|
before pcre2_internal.h so that the PCRE2_EXP_xxx macros get set
|
|
|
|
appropriately for an application, not for building PCRE2. */
|
|
|
|
|
|
|
|
#include "pcre2.h"
|
|
|
|
#include "pcre2posix.h"
|
|
|
|
#include "pcre2_internal.h"
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* Now include the internal mode-dependent structures at all supporte widths.
|
|
|
|
Because PCRE2_CODE_UNIT_WIDTH was not defined before including pcre2.h, it will
|
|
|
|
have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
|
|
|
|
while including the internal structures, and then restore it to a no-op. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
#undef PCRE2_SUFFIX
|
|
|
|
#define PCRE2_SUFFIX(a) G(a,8)
|
|
|
|
#include "pcre2_intstructs.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
#undef PCRE2_SUFFIX
|
|
|
|
#define PCRE2_SUFFIX(a) G(a,16)
|
|
|
|
#include "pcre2_intstructs.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
#undef PCRE2_SUFFIX
|
|
|
|
#define PCRE2_SUFFIX(a) G(a,32)
|
|
|
|
#include "pcre2_intstructs.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#undef PCRE2_SUFFIX
|
|
|
|
#define PCRE2_SUFFIX(a) a
|
|
|
|
|
|
|
|
/* We need access to some of the data tables that PCRE uses. So as not to have
|
|
|
|
to keep two copies, we include the source files here, having previously defined
|
|
|
|
PRIV to change the names of the external symbols to prevent clashes. Defining
|
|
|
|
PCRE2_INCLUDED makes some minor chantes in the files. */
|
|
|
|
|
|
|
|
#define PCRE2_INCLUDED
|
|
|
|
#include "pcre2_tables.c"
|
|
|
|
|
|
|
|
#ifdef FIXME
|
|
|
|
#include "pcre2_ucd.c"
|
|
|
|
#endif
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* The definition of the macro PRINTABLE, which determines whether to print an
|
|
|
|
output character as-is or as a hex value when showing compiled patterns, is
|
|
|
|
the same as in the printint.c file. We uses it here in cases when the locale
|
|
|
|
has not been explicitly changed, so as to get consistent output from systems
|
|
|
|
that differ in their output from isprint() even in the "C" locale. */
|
|
|
|
|
|
|
|
#ifdef EBCDIC
|
|
|
|
#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
|
|
|
|
#else
|
|
|
|
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
/* If we have 8-bit support, default to it; if there is also 16-or 32-bit
|
|
|
|
support, it can be changed by an option. If there is no 8-bit support, there
|
|
|
|
must be 16- or 32-bit support, so default to one of them. The config function,
|
|
|
|
JIT stack, contexts, and version string are the same in all modes, so use the
|
|
|
|
form of the first that is available. */
|
|
|
|
|
|
|
|
#if defined SUPPORT_PCRE8
|
|
|
|
#define DEFAULT_TEST_MODE PCRE8_MODE
|
|
|
|
#define PCRE2_CONFIG pcre2_config_8
|
|
|
|
#define PCRE2_JIT_STACK pcre2_jit_stack_8
|
|
|
|
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
|
|
|
|
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
|
|
|
|
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
|
|
|
|
#define PCRE2_VERSION pcre2_version_8
|
|
|
|
#define VERSION_TYPE PCRE2_UCHAR8
|
|
|
|
|
|
|
|
#elif defined SUPPORT_PCRE16
|
|
|
|
#define DEFAULT_TEST_MODE PCRE16_MODE
|
|
|
|
#define PCRE2_CONFIG pcre2_config_16
|
|
|
|
#define PCRE2_JIT_STACK pcre2_jit_stack_16
|
|
|
|
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
|
|
|
|
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
|
|
|
|
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
|
|
|
|
#define PCRE2_VERSION pcre2_version_16
|
|
|
|
#define VERSION_TYPE PCRE2_UCHAR16
|
|
|
|
|
|
|
|
#elif defined SUPPORT_PCRE32
|
|
|
|
#define DEFAULT_TEST_MODE PCRE32_MODE
|
|
|
|
#define PCRE2_CONFIG pcre2_config_32
|
|
|
|
#define PCRE2_JIT_STACK pcre2_jit_stack_32
|
|
|
|
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
|
|
|
|
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
|
|
|
|
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
|
|
|
|
#define PCRE2_VERSION pcre2_version_32
|
|
|
|
#define VERSION_TYPE PCRE2_UCHAR32
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* Table of names for newline types. Must be kept in step with the definitions
|
|
|
|
of PCRE2_NEWLINE_xx in pcre2.h. */
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
static const char *newlines[] = {
|
|
|
|
"DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* Modifier types and applicability */
|
|
|
|
|
|
|
|
enum { MOD_CTC, /* Applies to a compile context */
|
2014-04-30 18:55:24 +02:00
|
|
|
MOD_CTM, /* Applies to a match context */
|
2014-04-18 17:37:56 +02:00
|
|
|
MOD_PAT, /* Applies to a pattern */
|
|
|
|
MOD_DAT, /* Applies to a data line */
|
|
|
|
MOD_PD, /* Applies to a pattern or a data line */
|
|
|
|
MOD_CTL, /* Is a control bit */
|
|
|
|
MOD_BSR, /* Is a BSR value */
|
|
|
|
MOD_IN2, /* Is one or two integer values */
|
|
|
|
MOD_INT, /* Is an integer value */
|
|
|
|
MOD_NL, /* Is a newline value */
|
|
|
|
MOD_NN, /* Is a number or a name; more than one may occur */
|
|
|
|
MOD_OPT, /* Is an option bit */
|
|
|
|
MOD_STR }; /* Is a string */
|
|
|
|
|
|
|
|
/* Control bits */
|
|
|
|
|
|
|
|
#define CTL_AFTERTEXT 0x00000001
|
|
|
|
#define CTL_ALLAFTERTEXT 0x00000002
|
|
|
|
#define CTL_ALLCAPTURES 0x00000004
|
|
|
|
#define CTL_ALTGLOBMATCH 0x00000008
|
|
|
|
#define CTL_BYTECODE 0x00000010
|
|
|
|
#define CTL_CALLOUT_CAPTURE 0x00000020
|
|
|
|
#define CTL_CALLOUT_NONE 0x00000040
|
|
|
|
#define CTL_DFA 0x00000080
|
|
|
|
#define CTL_DFA_RESTART 0x00000100
|
|
|
|
#define CTL_DFA_SHORTEST 0x00000200
|
|
|
|
#define CTL_FLIPBYTES 0x00000400
|
|
|
|
#define CTL_FULLBYTECODE 0x00000800
|
|
|
|
#define CTL_GETLIST 0x00001000
|
|
|
|
#define CTL_GLOBMATCH 0x00002000
|
|
|
|
#define CTL_INFO 0x00004000
|
|
|
|
#define CTL_JITVERIFY 0x00008000
|
|
|
|
#define CTL_LIMITS 0x00010000
|
|
|
|
#define CTL_MARKS 0x00020000
|
|
|
|
#define CTL_MEMORY 0x00040000
|
|
|
|
#define CTL_PERLCOMPAT 0x00080000
|
|
|
|
#define CTL_POSIX 0x00100000
|
|
|
|
|
|
|
|
#define CTL_DEBUG (CTL_FULLBYTECODE|CTL_INFO) /* For setting */
|
|
|
|
#define CTL_ANYINFO (CTL_DEBUG|CTL_BYTECODE) /* For testing */
|
|
|
|
|
|
|
|
/* Structures and tables for handling modifiers. */
|
|
|
|
|
|
|
|
typedef struct patctl { /* Structure for pattern control settings. */
|
|
|
|
uint32_t options;
|
|
|
|
uint32_t control;
|
|
|
|
uint32_t jit;
|
|
|
|
uint32_t stackguard_test;
|
|
|
|
uint32_t tables_id;
|
|
|
|
char locale[32];
|
|
|
|
char save[64];
|
|
|
|
} patctl;
|
|
|
|
|
|
|
|
#define MAXCPYGET 10
|
|
|
|
#define LENCPYGET 64
|
|
|
|
|
|
|
|
typedef struct datctl { /* Structure for data line control settings. */
|
|
|
|
uint32_t options;
|
|
|
|
uint32_t control;
|
|
|
|
uint32_t cfail[2];
|
|
|
|
uint32_t copy_numbers[MAXCPYGET];
|
|
|
|
uint32_t get_numbers[MAXCPYGET];
|
|
|
|
uint32_t jitstack;
|
2014-04-30 18:55:24 +02:00
|
|
|
uint32_t oveccount;
|
2014-04-18 17:37:56 +02:00
|
|
|
uint32_t offset;
|
|
|
|
char copy_names[LENCPYGET];
|
|
|
|
char get_names[LENCPYGET];
|
|
|
|
} datctl;
|
|
|
|
|
|
|
|
/* Ids for which context to modify. */
|
|
|
|
|
|
|
|
enum { CTX_PAT, CTX_DEFPAT, CTX_DAT, CTX_DEFDAT, CTX_DEFANY };
|
|
|
|
|
|
|
|
/* Macros to simplify the big table below. */
|
|
|
|
|
|
|
|
#define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
|
|
|
|
#define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
|
|
|
|
#define PO(name) offsetof(patctl, name)
|
|
|
|
#define PD(name) PO(name)
|
|
|
|
#define DO(name) offsetof(datctl, name)
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* Table of all long-form modifiers. Must be in collating sequence because it
|
|
|
|
is searched by binary chop. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
typedef struct modstruct {
|
|
|
|
const char *name;
|
|
|
|
uint16_t which;
|
|
|
|
uint16_t type;
|
|
|
|
uint32_t value;
|
|
|
|
size_t offset;
|
|
|
|
} modstruct;
|
|
|
|
|
|
|
|
static modstruct modlist[] = {
|
|
|
|
{ "aftertext", MOD_PAT, MOD_CTL, CTL_AFTERTEXT, PO(control) },
|
|
|
|
{ "allaftertext", MOD_PAT, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
|
|
|
|
{ "allcaptures", MOD_PAT, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
|
|
|
|
{ "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
|
|
|
|
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
|
|
|
|
{ "altglobmatch", MOD_PAT, MOD_CTL, CTL_ALTGLOBMATCH, PO(control) },
|
|
|
|
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
|
|
|
|
{ "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
|
|
|
|
{ "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
|
|
|
|
{ "bytecode", MOD_PAT, MOD_CTL, CTL_BYTECODE, PO(control) },
|
|
|
|
{ "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
|
|
|
|
{ "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
|
|
|
|
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
|
|
|
|
{ "caseless", MOD_PAT, MOD_OPT, PCRE2_CASELESS, PO(options) },
|
|
|
|
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
|
2014-04-30 18:55:24 +02:00
|
|
|
{ "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
|
2014-04-18 17:37:56 +02:00
|
|
|
{ "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
|
|
|
|
{ "dfa_restart", MOD_DAT, MOD_OPT, CTL_DFA_RESTART, DO(options) },
|
|
|
|
{ "dfa_shortest", MOD_DAT, MOD_OPT, CTL_DFA_SHORTEST, DO(options) },
|
|
|
|
{ "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
|
|
|
|
{ "dotall", MOD_PAT, MOD_OPT, PCRE2_DOTALL, PO(options) },
|
|
|
|
{ "dupnames", MOD_PAT, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
|
|
|
|
{ "extended", MOD_PAT, MOD_OPT, PCRE2_EXTENDED, PO(options) },
|
|
|
|
{ "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
|
|
|
|
{ "flipbytes", MOD_PAT, MOD_CTL, CTL_FLIPBYTES, PO(control) },
|
|
|
|
{ "fullbytecode", MOD_PAT, MOD_CTL, CTL_FULLBYTECODE, PO(control) },
|
|
|
|
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
|
2014-04-30 18:55:24 +02:00
|
|
|
{ "getlist", MOD_DAT, MOD_CTL, CTL_GETLIST, DO(control) },
|
2014-04-18 17:37:56 +02:00
|
|
|
{ "globmatch", MOD_PAT, MOD_CTL, CTL_GLOBMATCH, PO(control) },
|
|
|
|
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
|
|
|
|
{ "jit", MOD_PAT, MOD_INT, 1, PO(jit) },
|
|
|
|
{ "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) },
|
|
|
|
{ "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
|
|
|
|
{ "limits", MOD_DAT, MOD_CTL, CTL_LIMITS, DO(control) },
|
|
|
|
{ "locale", MOD_PAT, MOD_STR, 0, PO(locale) },
|
|
|
|
{ "marks", MOD_PAT, MOD_CTL, CTL_MARKS, PO(control) },
|
|
|
|
{ "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
|
|
|
|
{ "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
|
|
|
|
{ "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
|
|
|
|
{ "multiline", MOD_PAT, MOD_OPT, PCRE2_MULTILINE, PO(options) },
|
|
|
|
{ "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
|
|
|
|
{ "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
|
|
|
|
{ "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
|
|
|
|
{ "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
|
|
|
|
{ "no_auto_possess", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
|
|
|
|
{ "no_start_optimize", MOD_PD, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PD(options) },
|
|
|
|
{ "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
|
|
|
|
{ "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
|
|
|
|
{ "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
|
|
|
|
{ "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
|
|
|
|
{ "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
|
|
|
|
{ "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
|
2014-04-30 18:55:24 +02:00
|
|
|
{ "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
|
2014-04-18 17:37:56 +02:00
|
|
|
{ "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
|
|
|
|
{ "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
|
|
|
|
{ "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
|
|
|
|
{ "perlcompat", MOD_PAT, MOD_CTL, CTL_PERLCOMPAT, PO(control) },
|
|
|
|
{ "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
|
|
|
|
{ "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
|
|
|
|
{ "save", MOD_PAT, MOD_STR, 0, PO(save) },
|
|
|
|
{ "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
|
|
|
|
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
|
|
|
|
{ "ucp", MOD_PAT, MOD_OPT, PCRE2_UCP, PO(options) },
|
|
|
|
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
|
|
|
|
{ "utf", MOD_PAT, MOD_OPT, PCRE2_UTF, PO(options) }
|
|
|
|
};
|
|
|
|
|
|
|
|
#define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* Controls and options that are supported for use with the POSIX interface. */
|
|
|
|
|
|
|
|
#define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
|
|
|
|
PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
|
|
|
|
PCRE2_UCP|PCRE2_UTF|PCRE2_UNGREEDY)
|
|
|
|
|
|
|
|
#define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
|
|
|
|
CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_POSIX)
|
|
|
|
|
|
|
|
#define POSIX_SUPPORTED_MATCH_OPTIONS ( \
|
|
|
|
PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
|
|
|
|
|
|
|
|
#define POSIX_SUPPORTED_MATCH_CONTROLS ( 0 )
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
/* Table of single-character and doubled-character abbreviated modifiers. The
|
|
|
|
index field is initialized to -1, but the first time the modifier is
|
|
|
|
encountered, it is filled in with the index of the full entry in modlist, to
|
2014-04-30 18:55:24 +02:00
|
|
|
save repeated searching when processing multiple test items. This short list is
|
|
|
|
searched serially, so its order does not matter. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
typedef struct c1modstruct {
|
|
|
|
const char *fullname;
|
|
|
|
uint32_t onechar;
|
|
|
|
int index;
|
|
|
|
} c1modstruct;
|
|
|
|
|
|
|
|
static c1modstruct c1modlist[] = {
|
|
|
|
{ "bytecode", 'B', -1 },
|
|
|
|
{ "fullbytecode", ('B'<<8)|'B', -1 },
|
|
|
|
{ "debug", 'D', -1 },
|
|
|
|
{ "info", 'I', -1 },
|
|
|
|
{ "partial_soft", 'P', -1 },
|
|
|
|
{ "partial_hard", ('P'<<8)|'P', -1 },
|
|
|
|
{ "globmatch", 'g', -1 },
|
|
|
|
{ "altglobmatch", ('g'<<8)|'g', -1 },
|
|
|
|
{ "caseless", 'i', -1 },
|
|
|
|
{ "multiline", 'm', -1 },
|
|
|
|
{ "dotall", 's', -1 },
|
|
|
|
{ "extended", 'x', -1 }
|
|
|
|
};
|
|
|
|
|
|
|
|
#define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
|
|
|
|
|
|
|
|
/* Table of arguments for the -C command line option. Use macros to make the
|
|
|
|
table easier to read. */
|
|
|
|
|
|
|
|
#if defined SUPPORT_PCRE8
|
|
|
|
#define SUPPORT_8 1
|
|
|
|
#endif
|
|
|
|
#if defined SUPPORT_PCRE16
|
|
|
|
#define SUPPORT_16 1
|
|
|
|
#endif
|
|
|
|
#if defined SUPPORT_PCRE32
|
|
|
|
#define SUPPORT_32 1
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef SUPPORT_8
|
|
|
|
#define SUPPORT_8 0
|
|
|
|
#endif
|
|
|
|
#ifndef SUPPORT_16
|
|
|
|
#define SUPPORT_16 0
|
|
|
|
#endif
|
|
|
|
#ifndef SUPPORT_32
|
|
|
|
#define SUPPORT_32 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef EBCDIC
|
|
|
|
#define SUPPORT_EBCDIC 1
|
|
|
|
#define EBCDIC_NL CHAR_LF
|
|
|
|
#else
|
|
|
|
#define SUPPORT_EBCDIC 0
|
|
|
|
#define EBCDIC_NL 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef struct coptstruct {
|
|
|
|
const char *name;
|
|
|
|
uint32_t type;
|
|
|
|
uint32_t value;
|
|
|
|
} coptstruct;
|
|
|
|
|
|
|
|
enum { CONF_BSR,
|
|
|
|
CONF_FIX,
|
|
|
|
CONF_FIZ,
|
|
|
|
CONF_INT,
|
|
|
|
CONF_NL
|
|
|
|
};
|
|
|
|
|
|
|
|
static coptstruct coptlist[] = {
|
|
|
|
{ "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
|
|
|
|
{ "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
|
|
|
|
{ "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
|
|
|
|
{ "jit", CONF_INT, PCRE2_CONFIG_JIT },
|
|
|
|
{ "linksize", CONF_INT, PCRE2_CONFIG_LINK_SIZE },
|
|
|
|
{ "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
|
|
|
|
{ "pcre16", CONF_FIX, SUPPORT_16 },
|
|
|
|
{ "pcre32", CONF_FIX, SUPPORT_32 },
|
|
|
|
{ "pcre8", CONF_FIX, SUPPORT_8 },
|
|
|
|
{ "utf", CONF_INT, PCRE2_CONFIG_UTF }
|
|
|
|
};
|
|
|
|
|
|
|
|
#define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
|
|
|
|
|
|
|
|
#undef SUPPORT_8
|
|
|
|
#undef SUPPORT_16
|
|
|
|
#undef SUPPORT_32
|
|
|
|
#undef SUPPORT_EBCDIC
|
|
|
|
|
|
|
|
|
|
|
|
/* Static variables */
|
|
|
|
|
|
|
|
static FILE *infile;
|
|
|
|
static FILE *outfile;
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* FIXME */
|
|
|
|
static BOOL locale_set = FALSE;
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
static int buffer_size = 50000;
|
2014-04-30 18:55:24 +02:00
|
|
|
static size_t dbuffer_size = 1u << 14; /* Bytes */
|
|
|
|
static int code_unit_size; /* Bytes */
|
2014-04-18 17:37:56 +02:00
|
|
|
static int test_mode = DEFAULT_TEST_MODE;
|
|
|
|
static int timeit = 0;
|
|
|
|
static int timeitm = 0;
|
|
|
|
|
|
|
|
clock_t total_compile_time = 0;
|
|
|
|
clock_t total_match_time = 0;
|
|
|
|
|
|
|
|
static uint8_t *buffer = NULL;
|
|
|
|
static uint8_t *pbuffer = NULL;
|
|
|
|
static uint8_t *dbuffer = NULL;
|
|
|
|
|
|
|
|
static VERSION_TYPE version[64];
|
|
|
|
|
|
|
|
static patctl def_patctl;
|
|
|
|
static patctl pat_patctl;
|
|
|
|
static datctl def_datctl;
|
|
|
|
static datctl dat_datctl;
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
static regex_t preg = { NULL, NULL, 0, 0 };
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* We need buffers for building 16/32-bit strings; 8-bit strings don't need
|
|
|
|
rebuilding, but set up an appropriate name (pbuffer8) for use in macros. It
|
|
|
|
will be pointed to the same memory as pbuffer. We also need the tables of
|
|
|
|
operator lengths that are used for 16/32-bit compiling, in order to swap bytes
|
|
|
|
in a pattern for saving/reloading testing. Luckily, the data for these tables
|
|
|
|
is defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE
|
|
|
|
(which are used in the tables) are adjusted appropriately for the 16/32-bit
|
|
|
|
world. LINK_SIZE is also used later in this program. */
|
|
|
|
|
|
|
|
static uint8_t *pbuffer8 = NULL;
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
#undef IMM2_SIZE
|
|
|
|
#define IMM2_SIZE 1
|
|
|
|
|
|
|
|
#if LINK_SIZE == 2
|
|
|
|
#undef LINK_SIZE
|
|
|
|
#define LINK_SIZE 1
|
|
|
|
#elif LINK_SIZE == 3 || LINK_SIZE == 4
|
|
|
|
#undef LINK_SIZE
|
|
|
|
#define LINK_SIZE 2
|
|
|
|
#else
|
|
|
|
#error LINK_SIZE must be either 2, 3, or 4
|
|
|
|
#endif /* LINK_SIZE */
|
|
|
|
|
|
|
|
static int pbuffer16_size = 0;
|
|
|
|
static uint16_t *pbuffer16 = NULL;
|
|
|
|
#ifdef FIXME
|
|
|
|
static const uint16_t OP_lengths16[] = { OP_LENGTHS };
|
|
|
|
#endif /* FIXME */
|
|
|
|
#endif /* SUPPORT_PCRE16 */
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
#undef IMM2_SIZE
|
|
|
|
#define IMM2_SIZE 1
|
|
|
|
#undef LINK_SIZE
|
|
|
|
#define LINK_SIZE 1
|
|
|
|
|
|
|
|
static int pbuffer32_size = 0;
|
|
|
|
static uint32_t *pbuffer32 = NULL;
|
|
|
|
#ifdef FIXME
|
|
|
|
static const uint32_t OP_lengths32[] = { OP_LENGTHS };
|
|
|
|
#endif /* FIXME */
|
|
|
|
#endif /* SUPPORT_PCRE32 */
|
|
|
|
|
|
|
|
|
|
|
|
/* ---------------- Mode-dependent variables -------------------*/
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
pcre2_code_8 *compiled_code8;
|
|
|
|
pcre2_compile_context_8 *pat_context8, *default_pat_context8;
|
|
|
|
pcre2_match_context_8 *dat_context8, *default_dat_context8;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
pcre2_code_16 *compiled_code16;
|
|
|
|
pcre2_compile_context_16 *pat_context16, *default_pat_context16;
|
|
|
|
pcre2_match_context_16 *dat_context16, *default_dat_context16;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
pcre2_code_32 *compiled_code32;
|
|
|
|
pcre2_compile_context_32 *pat_context32, *default_pat_context32;
|
|
|
|
pcre2_match_context_32 *dat_context32, *default_dat_context32;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* ---------------- Macros that work in all modes ----------------- */
|
|
|
|
|
|
|
|
#define CAST8VAR(x) CASTVAR(uint8_t *, x)
|
|
|
|
#define SET(x,y) SETOP(x,y,=)
|
|
|
|
#define SETPLUS(x,y) SETOP(x,y,+=)
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ---------------- Mode-dependent, runtime-testing macros ------------------*/
|
|
|
|
|
|
|
|
/* Define macros for variables and functions that must be selected dynamically
|
|
|
|
depending on the mode setting (8, 16, 32). These are dependent on which modes
|
2014-04-30 18:55:24 +02:00
|
|
|
are supported. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
|
|
|
|
defined (SUPPORT_PCRE32)) >= 2
|
|
|
|
|
|
|
|
/* ----- All three modes supported ----- */
|
|
|
|
|
|
|
|
#if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define CASTVAR(t,x) ( \
|
|
|
|
(test_mode == PCRE8_MODE)? (t)G(x,8) : \
|
|
|
|
(test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
#define DATCTXCPY(a,b) \
|
|
|
|
if (test_mode == PCRE8_MODE) \
|
|
|
|
memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
|
|
|
|
else if (test_mode == PCRE16_MODE) \
|
|
|
|
memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
|
|
|
|
else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
|
|
|
|
(test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
#define PATCTXCPY(a,b) \
|
|
|
|
if (test_mode == PCRE8_MODE) \
|
|
|
|
memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
|
|
|
|
else if (test_mode == PCRE16_MODE) \
|
|
|
|
memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
|
|
|
|
else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define PCHARS(lv, p, offset, len, utf, f) \
|
|
|
|
if (test_mode == PCRE32_MODE) \
|
|
|
|
lv = pchars32(p, offset, len, utf, f); \
|
|
|
|
else if (test_mode == PCRE16_MODE) \
|
|
|
|
lv = pchars16(p, offset, len, utf, f); \
|
|
|
|
else \
|
|
|
|
lv = pchars8(p, offset, len, utf, f)
|
|
|
|
|
|
|
|
#define PCHARSV(p, offset, len, utf, f) \
|
|
|
|
if (test_mode == PCRE32_MODE) \
|
|
|
|
(void)pchars32((PCRE2_SPTR32)p+offset, len, utf, f); \
|
|
|
|
else if (test_mode == PCRE16_MODE) \
|
|
|
|
(void)pchars16((PCRE2_SPTR16)p+offset, len, utf, f); \
|
|
|
|
else \
|
|
|
|
(void)pchars8((PCRE2_SPTR8)p+offset, len, utf, f)
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
|
|
|
if (test_mode == PCRE8_MODE) \
|
|
|
|
G(a,8) = G(pcre2_compile,8)(G(b,8),c,d,e,f,G(g,8)); \
|
|
|
|
else if (test_mode == PCRE16_MODE) \
|
|
|
|
G(a,16) = G(pcre2_compile,16)(G(b,16),c,d,e,f,G(g,16)); \
|
|
|
|
else \
|
|
|
|
G(a,32) = G(pcre2_compile,32)(G(b,32),c,d,e,f,G(g,32))
|
|
|
|
|
|
|
|
#define PCRE2_JIT_COMPILE(a,b) \
|
|
|
|
if (test_mode == PCRE8_MODE) \
|
|
|
|
G(pcre2_jit_compile,8)(G(a,8),b); \
|
|
|
|
else if (test_mode == PCRE16_MODE) \
|
|
|
|
G(pcre2_jit_compile,16)(G(a,16),b); \
|
|
|
|
else \
|
|
|
|
G(pcre2_jit_compile,32)(G(a,32),b)
|
|
|
|
|
|
|
|
#define PCRE2_PATTERN_INFO(a,b,c,d) \
|
|
|
|
if (test_mode == PCRE8_MODE) \
|
|
|
|
a = G(pcre2_pattern_info,8)(G(b,8),c,d); \
|
|
|
|
else if (test_mode == PCRE16_MODE) \
|
|
|
|
a = G(pcre2_pattern_info,16)(G(b,16),c,d); \
|
|
|
|
else \
|
|
|
|
a = G(pcre2_pattern_info,32)(G(b,32),c,d)
|
|
|
|
|
|
|
|
#define PTR(x) ( \
|
|
|
|
(test_mode == PCRE8_MODE)? (void *)G(x,8) : \
|
|
|
|
(test_mode == PCRE16_MODE)? (void *)G(x,16) : \
|
|
|
|
(void *)G(x,32))
|
2014-03-07 18:28:52 +01:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define SETOP(x,y,z) \
|
|
|
|
if (test_mode == PCRE8_MODE) G(x,8) z y; \
|
|
|
|
else if (test_mode == PCRE16_MODE) G(x,16) z y; \
|
|
|
|
else G(x,32) z y
|
|
|
|
|
|
|
|
#define SETCASTPTR(x,y) \
|
|
|
|
if (test_mode == PCRE8_MODE) \
|
|
|
|
G(x,8) = (uint8_t *)y; \
|
|
|
|
else if (test_mode == PCRE16_MODE) \
|
|
|
|
G(x,16) = (uint16_t *)y; \
|
|
|
|
else \
|
|
|
|
G(x,32) = (uint32_t *)y
|
|
|
|
|
|
|
|
#define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
|
|
|
|
(test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
|
|
|
|
((int)strlen32((PCRE2_SPTR32)p)))
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#define SUB1(a,b) \
|
|
|
|
if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
|
|
|
|
else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
|
|
|
|
else G(a,32)(G(b,32))
|
|
|
|
|
|
|
|
#define SUB2(a,b,c) \
|
|
|
|
if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
|
|
|
|
else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
|
|
|
|
else G(a,32)(G(b,32),G(c,32))
|
|
|
|
|
|
|
|
#define TEST(x,r,y) ( \
|
|
|
|
(test_mode == PCRE8_MODE && G(x,8) r (y)) || \
|
|
|
|
(test_mode == PCRE16_MODE && G(x,16) r (y)) || \
|
|
|
|
(test_mode == PCRE32_MODE && G(x,32) r (y)))
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define VAR(x) ( \
|
|
|
|
(test_mode == PCRE8_MODE)? G(x,8) : \
|
|
|
|
(test_mode == PCRE16_MODE)? G(x,16) : G(x,32))
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ----- Two out of three modes are supported ----- */
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
/* We can use some macro trickery to make a single set of definitions work in
|
|
|
|
the three different cases. */
|
|
|
|
|
|
|
|
/* ----- 32-bit and 16-bit but not 8-bit supported ----- */
|
|
|
|
|
|
|
|
#if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
|
|
|
|
#define BITONE 32
|
|
|
|
#define BITTWO 16
|
|
|
|
|
|
|
|
/* ----- 32-bit and 8-bit but not 16-bit supported ----- */
|
|
|
|
|
|
|
|
#elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
|
|
|
|
#define BITONE 32
|
|
|
|
#define BITTWO 8
|
|
|
|
|
|
|
|
/* ----- 16-bit and 8-bit but not 32-bit supported ----- */
|
|
|
|
|
|
|
|
#else
|
|
|
|
#define BITONE 16
|
|
|
|
#define BITTWO 8
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* ----- Common macros for two-mode cases ----- */
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define CASTVAR(t,x) ( \
|
|
|
|
(test_mode == G(G(PCRE,BITONE(,_MODE))? \
|
|
|
|
(t)G(x,BITONE) : (t)G(x,BITTWO))
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
#define DATCTXCPY(a,b) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
|
|
|
|
else \
|
|
|
|
memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define FLD(a,b) \
|
|
|
|
((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
#define PATCTXCPY(a,b) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
|
|
|
|
else \
|
|
|
|
memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define PCHARS(lv, p, offset, len, utf, f) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
lv = G(pchars,BITONE)(p, offset, len, utf, f); \
|
|
|
|
else \
|
|
|
|
lv = G(PCHARS,BITTWO)(p, offset, len, utf, f)
|
|
|
|
|
|
|
|
#define PCHARSV(p, offset, len, utf, f) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
(void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))p+offset, len, utf, f); \
|
|
|
|
else \
|
|
|
|
(void)G(PCHARS,BITTWO)((G(PCRE2_SPTR,BITTWO))p+offset, len, utf, f)
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
G(a,BITONE) = G(pcre2_compile,BITONE)(G(b,BITONE),c,d,e,f,G(g,BITONE)); \
|
|
|
|
else \
|
|
|
|
G(a,BITTWO) = G(pcre2_compile,BITTWO)(G(b,BITTWO),c,d,e,f,G(g,BITTWO))
|
|
|
|
|
|
|
|
#define PCRE2_JIT_COMPILE(a,b) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
G(pcre2_jit_compile,BITONE)(G(a,BITONE),b); \
|
|
|
|
else \
|
|
|
|
G(pcre2_jit_compile,BITTWO)(G(a,BITTWO),b)
|
|
|
|
|
|
|
|
#define PCRE2_PATTERN_INFO(a,b,c,d) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
a = G(pcre2_pattern_info,BITONE)(G(b,BITONE),c,d); \
|
|
|
|
else \
|
|
|
|
a = G(pcre2_pattern_info,BITTWO)(G(b,BITTWO),c,d)
|
|
|
|
|
|
|
|
#define PTR(x) ( \
|
|
|
|
(test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
|
|
|
|
(void *)G(x,BITTWO))
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define SETOP(x,y,z) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
|
|
|
|
else G(x,BITTWO) z y
|
|
|
|
|
|
|
|
#define SETCASTPTR(x,y) \
|
|
|
|
if (test_mode == PCRE8_MODE) \
|
|
|
|
G(x,BITONE) = (G(G(uint,BITONE),_t) *)y; \
|
|
|
|
else \
|
|
|
|
G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)y
|
|
|
|
|
|
|
|
#define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
|
|
|
|
G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p)) : \
|
|
|
|
G(strlen(BITTWO)((G(PCRE2_SPTR,BITTWO))p)))
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#define SUB1(a,b) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
G(a,BITONE))(G(b,BITONE)); \
|
|
|
|
else \
|
|
|
|
G(a,BITTWO))(G(b,BITTWO))
|
|
|
|
|
|
|
|
#define SUB2(a,b,c) \
|
|
|
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
|
|
|
G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
|
|
|
|
else \
|
|
|
|
G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
|
|
|
|
|
|
|
|
#define TEST(x,r,y) ( \
|
|
|
|
(test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
|
|
|
|
(test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
#define VAR(x) ( \
|
|
|
|
(test_mode == G(G(PCRE,BITONE(,_MODE))? G(x,BITONE) : G(x,BITTWO))
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
#endif /* Two out of three modes */
|
|
|
|
|
|
|
|
/* ----- End of cases where more than one mode is supported ----- */
|
|
|
|
|
|
|
|
|
|
|
|
/* ----- Only 8-bit mode is supported ----- */
|
|
|
|
|
|
|
|
#elif defined SUPPORT_PCRE8
|
2014-04-30 18:55:24 +02:00
|
|
|
#define CASTVAR(t,x) (t)G(x,8)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
|
|
|
|
#define FLD(a,b) G(a,8)->b
|
2014-04-30 18:55:24 +02:00
|
|
|
#define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
|
|
|
|
#define PCHARS(lv, p, offset, len, utf, f) \
|
|
|
|
lv = pchars8(p, offset, len, utf, f)
|
|
|
|
#define PCHARSV(p, offset, len, utf, f) \
|
|
|
|
(void)pchars8((PCRE2_SPTR8)p+offset, len, utf, f)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
|
|
|
G(a,8) = G(pcre2_compile,8)(G(b,8),c,d,e,f,G(g,8))
|
|
|
|
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_8(G(a,8),b)
|
|
|
|
#define PCRE2_PATTERN_INFO(a,b,c,d) \
|
|
|
|
a = pcre2_pattern_info_8(G(b,8),c,d)
|
|
|
|
#define PTR(x) (void *)G(x,8)
|
2014-04-30 18:55:24 +02:00
|
|
|
#define SETOP(x,y,z) G(x,8) z y
|
|
|
|
#define SETCASTPTR(x,y) G(x,8) = (uint8_t) *)y
|
|
|
|
#define STRLEN(p) (int)strlen(p)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define SUB1(a,b) G(a,8)(G(b,8))
|
|
|
|
#define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
|
|
|
|
#define TEST(x,r,y) (G(a,8) r (y))
|
2014-04-30 18:55:24 +02:00
|
|
|
#define VAR(x) G(x,8)
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ----- Only 16-bit mode is supported ----- */
|
|
|
|
|
|
|
|
#elif defined SUPPORT_PCRE16
|
2014-04-30 18:55:24 +02:00
|
|
|
#define CASTVAR(t,x) (t)G(x,16)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
|
|
|
|
#define FLD(a,b) G(a,16)->b
|
2014-04-30 18:55:24 +02:00
|
|
|
#define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
|
|
|
|
#define PCHARS(lv, p, offset, len, utf, f) \
|
|
|
|
lv = pchars16(p, offset, len, utf, f)
|
|
|
|
#define PCHARSV(p, offset, len, utf, f) \
|
|
|
|
(void)pchars16((PCRE2_SPTR16)p+offset, len, utf, f)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
|
|
|
G(a,16) = G(pcre2_compile,16)(G(b,16),c,d,e,f,G(g,16))
|
|
|
|
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b)
|
|
|
|
#define PCRE2_PATTERN_INFO(a,b,c,d) \
|
|
|
|
a = pcre2_pattern_info_16(G(b,16),c,d)
|
|
|
|
#define PTR(x) (void *)G(x,16)
|
2014-04-30 18:55:24 +02:00
|
|
|
#define SET(x,y,z) G(x,16) z y
|
|
|
|
#define SETCASTPTR(x,y) G(x,16) = (uint16_t) *)y
|
|
|
|
#define STRLEN(p) (int)strlen16(p)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define SUB1(a,b) G(a,16)(G(b,16))
|
|
|
|
#define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
|
|
|
|
#define TEST(x,r,y) (G(a,16) r (y))
|
2014-04-30 18:55:24 +02:00
|
|
|
#define VAR(x) G(x,16)
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ----- Only 32-bit mode is supported ----- */
|
|
|
|
|
|
|
|
#elif defined SUPPORT_PCRE32
|
2014-04-30 18:55:24 +02:00
|
|
|
#define CASTVAR(t,x) (t)G(x,32)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
|
|
|
|
#define FLD(a,b) G(a,32)->b
|
2014-04-30 18:55:24 +02:00
|
|
|
#define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
|
|
|
|
#define PCHARS(lv, p, offset, len, utf, f) \
|
|
|
|
lv = pchars32(p, offset, len, utf, f)
|
|
|
|
#define PCHARSV(p, offset, len, utf, f) \
|
|
|
|
(void)pchars32((PCRE2_SPTR32)p+offset, len, utf, f)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
|
|
|
G(a,32) = G(pcre2_compile,32)(G(b,32),c,d,e,f,G(g,32))
|
|
|
|
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b)
|
|
|
|
#define PCRE2_PATTERN_INFO(a,b,c,d) \
|
|
|
|
a = pcre2_pattern_info_32(G(b,32),c,d)
|
|
|
|
#define PTR(x) (void *)G(x,32)
|
2014-04-30 18:55:24 +02:00
|
|
|
#define SET(x,y,z) G(x,32) z y
|
|
|
|
#define SETCASTPTR(x,y) G(x,32) = (uint32_t) *)y
|
|
|
|
#define STRLEN(p) (int)strle32(p)
|
2014-04-18 17:37:56 +02:00
|
|
|
#define SUB1(a,b) G(a,32)(G(b,32))
|
|
|
|
#define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
|
|
|
|
#define TEST(x,r,y) (G(a,32) r (y))
|
2014-04-30 18:55:24 +02:00
|
|
|
#define VAR(x) G(x,32)
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* ----- End of mode-specific function call macros ----- */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
2014-04-30 18:55:24 +02:00
|
|
|
* Convert UTF-8 character to code point *
|
2014-04-18 17:37:56 +02:00
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* This function reads one or more bytes that represent a UTF-8 character,
|
|
|
|
and returns the codepoint of that character. Note that the function supports
|
|
|
|
the original UTF-8 definition of RFC 2279, allowing for values in the range 0
|
|
|
|
to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
|
|
|
|
codepoints greater than 0x10ffff which are useful for testing PCRE's error
|
|
|
|
checking, and also for generating 32-bit non-UTF data values above the UTF
|
|
|
|
limit.
|
|
|
|
|
|
|
|
Argument:
|
|
|
|
utf8bytes a pointer to the byte vector
|
|
|
|
vptr a pointer to an int to receive the value
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
Returns: > 0 => the number of bytes consumed
|
2014-04-18 17:37:56 +02:00
|
|
|
-6 to 0 => malformed UTF-8 character at offset = (-return)
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
2014-04-30 18:55:24 +02:00
|
|
|
utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
uint32_t c = *utf8bytes++;
|
|
|
|
uint32_t d = c;
|
|
|
|
int i, j, s;
|
|
|
|
|
|
|
|
for (i = -1; i < 6; i++) /* i is number of additional bytes */
|
|
|
|
{
|
|
|
|
if ((d & 0x80) == 0) break;
|
|
|
|
d <<= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i == -1) { *vptr = c; return 1; } /* ascii character */
|
|
|
|
if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
|
|
|
|
|
|
|
|
/* i now has a value in the range 1-5 */
|
|
|
|
|
|
|
|
s = 6*i;
|
|
|
|
d = (c & utf8_table3[i]) << s;
|
|
|
|
|
|
|
|
for (j = 0; j < i; j++)
|
|
|
|
{
|
|
|
|
c = *utf8bytes++;
|
|
|
|
if ((c & 0xc0) != 0x80) return -(j+1);
|
|
|
|
s -= 6;
|
|
|
|
d |= (c & 0x3f) << s;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check that encoding was the correct unique one */
|
|
|
|
|
|
|
|
for (j = 0; j < utf8_table1_size; j++)
|
|
|
|
if (d <= (uint32_t)utf8_table1[j]) break;
|
|
|
|
if (j != i) return -(i+1);
|
|
|
|
|
|
|
|
/* Valid value */
|
|
|
|
|
|
|
|
*vptr = d;
|
|
|
|
return i+1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/*************************************************
|
|
|
|
* Print one character *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Print a single character either literally, or as a hex escape.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
c the character
|
|
|
|
utf TRUE in UTF mode
|
|
|
|
f the FILE to print to
|
|
|
|
|
|
|
|
Returns: number of characters written
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
pchar(uint32_t c, BOOL utf, FILE *f)
|
|
|
|
{
|
|
|
|
int n = 0;
|
|
|
|
if (PRINTOK(c))
|
|
|
|
{
|
|
|
|
if (f != NULL) fprintf(f, "%c", c);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c < 0x100)
|
|
|
|
{
|
|
|
|
if (utf)
|
|
|
|
{
|
|
|
|
if (f != NULL) fprintf(f, "\\x{%02x}", c);
|
|
|
|
return 6;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (f != NULL) fprintf(f, "\\x%02x", c);
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
|
|
|
|
return n >= 0 ? n : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
/*************************************************
|
|
|
|
* Find length of 0-terminated 16-bit string *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
static int strlen16(PCRE2_SPTR16 p)
|
|
|
|
{
|
|
|
|
PCRE2_SPTR16 pp = p;
|
|
|
|
while (*pp != 0) pp++;
|
|
|
|
return (int)(pp - p);
|
|
|
|
}
|
|
|
|
#endif /* SUPPORT_PCRE16 */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
/*************************************************
|
|
|
|
* Find length of 0-terminated 32-bit string *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
static int strlen32(PCRE2_SPTR32 p)
|
|
|
|
{
|
|
|
|
PCRE2_SPTR32 pp = p;
|
|
|
|
while (*pp != 0) pp++;
|
|
|
|
return (int)(pp - p);
|
|
|
|
}
|
|
|
|
#endif /* SUPPORT_PCRE32 */
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
/*************************************************
|
|
|
|
* Print 8-bit character string *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
|
|
|
|
If handed a NULL file, just counts chars without printing. */
|
|
|
|
|
|
|
|
static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
|
|
|
|
{
|
|
|
|
uint32_t c = 0;
|
|
|
|
int yield = 0;
|
|
|
|
if (length < 0) length = strlen((char *)p);
|
|
|
|
while (length-- > 0)
|
|
|
|
{
|
|
|
|
if (utf)
|
|
|
|
{
|
|
|
|
int rc = utf82ord(p, &c);
|
|
|
|
if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
|
|
|
|
{
|
|
|
|
length -= rc - 1;
|
|
|
|
p += rc;
|
|
|
|
yield += pchar(c, utf, f);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
c = *p++;
|
|
|
|
yield += pchar(c, utf, f);
|
|
|
|
}
|
|
|
|
return yield;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
/*************************************************
|
|
|
|
* Print 16-bit character string *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
|
|
|
|
If handed a NULL file, just counts chars without printing. */
|
|
|
|
|
|
|
|
static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
|
|
|
|
{
|
|
|
|
int yield = 0;
|
|
|
|
if (length < 0) length = strlen16(p);
|
|
|
|
while (length-- > 0)
|
|
|
|
{
|
|
|
|
uint32_t c = *p++ & 0xffff;
|
|
|
|
if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
|
|
|
|
{
|
|
|
|
int d = *p & 0xffff;
|
|
|
|
if (d >= 0xDC00 && d <= 0xDFFF)
|
|
|
|
{
|
|
|
|
c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
|
|
|
|
length--;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
yield += pchar(c, utf, f);
|
|
|
|
}
|
|
|
|
return yield;
|
|
|
|
}
|
|
|
|
#endif /* SUPPORT_PCRE16 */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
/*************************************************
|
|
|
|
* Print 32-bit character string *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
|
|
|
|
If handed a NULL file, just counts chars without printing. */
|
|
|
|
|
|
|
|
static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
|
|
|
|
{
|
|
|
|
int yield = 0;
|
|
|
|
(void)(utf); /* Avoid compiler warning */
|
|
|
|
if (length < 0) length = strlen32(p);
|
|
|
|
while (length-- > 0)
|
|
|
|
{
|
|
|
|
uint32_t c = *p++;
|
|
|
|
yield += pchar(c, utf, f);
|
|
|
|
}
|
|
|
|
return yield;
|
|
|
|
}
|
|
|
|
#endif /* SUPPORT_PCRE32 */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Convert character value to UTF-8 *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* This function takes an integer value in the range 0 - 0x7fffffff
|
|
|
|
and encodes it as a UTF-8 character in 0 to 6 bytes.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
cvalue the character value
|
|
|
|
utf8bytes pointer to buffer for result - at least 6 bytes long
|
|
|
|
|
|
|
|
Returns: number of characters placed in the buffer
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
|
|
|
|
{
|
|
|
|
register int i, j;
|
|
|
|
if (cvalue > 0x7fffffffu)
|
|
|
|
return -1;
|
|
|
|
for (i = 0; i < utf8_table1_size; i++)
|
|
|
|
if (cvalue <= (uint32_t)utf8_table1[i]) break;
|
|
|
|
utf8bytes += i;
|
|
|
|
for (j = i; j > 0; j--)
|
|
|
|
{
|
|
|
|
*utf8bytes-- = 0x80 | (cvalue & 0x3f);
|
|
|
|
cvalue >>= 6;
|
|
|
|
}
|
|
|
|
*utf8bytes = utf8_table2[i] | cvalue;
|
|
|
|
return i + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
/*************************************************
|
|
|
|
* Convert a string to 16-bit *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* The input is always interpreted as a string of UTF-8 bytes. If all the input
|
|
|
|
bytes are ASCII, the space needed for a 16-bit string is exactly double the
|
|
|
|
8-bit size. Otherwise, the size needed for a 16-bit string is no more than
|
|
|
|
double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
|
|
|
|
in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
|
|
|
|
result is always left in pbuffer16.
|
|
|
|
|
|
|
|
Note that this function does not object to surrogate values. This is
|
|
|
|
deliberate; it makes it possible to construct UTF-16 strings that are invalid,
|
|
|
|
for the purpose of testing that they are correctly faulted.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
p points to a byte string
|
|
|
|
utf non-zero if converting to UTF-16
|
|
|
|
len number of bytes in the string (excluding trailing zero)
|
|
|
|
|
|
|
|
Returns: number of 16-bit data items used (excluding trailing zero)
|
|
|
|
OR -1 if a UTF-8 string is malformed
|
|
|
|
OR -2 if a value > 0x10ffff is encountered in UTF mode
|
|
|
|
OR -3 if a value > 0xffff is encountered when not in UTF mode
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
to16(uint8_t *p, int utf, int len)
|
|
|
|
{
|
|
|
|
uint16_t *pp;
|
|
|
|
|
|
|
|
if (pbuffer16_size < 2*len + 2)
|
|
|
|
{
|
|
|
|
if (pbuffer16 != NULL) free(pbuffer16);
|
|
|
|
pbuffer16_size = 2*len + 2;
|
|
|
|
pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
|
|
|
|
if (pbuffer16 == NULL)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "pcretest: malloc(%d) failed for pbuffer16\n", pbuffer16_size);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pp = pbuffer16;
|
|
|
|
|
|
|
|
while (len > 0)
|
|
|
|
{
|
|
|
|
uint32_t c;
|
|
|
|
int chlen = utf82ord(p, &c);
|
|
|
|
if (chlen <= 0) return -1;
|
|
|
|
if (c > 0x10ffff) return -2;
|
|
|
|
p += chlen;
|
|
|
|
len -= chlen;
|
|
|
|
if (c < 0x10000) *pp++ = c; else
|
|
|
|
{
|
|
|
|
if (!utf) return -3;
|
|
|
|
c -= 0x10000;
|
|
|
|
*pp++ = 0xD800 | (c >> 10);
|
|
|
|
*pp++ = 0xDC00 | (c & 0x3ff);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*pp = 0;
|
|
|
|
return pp - pbuffer16;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
/*************************************************
|
|
|
|
* Convert a string to 32-bit *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* The input is always interpreted as a string of UTF-8 bytes. If all the input
|
|
|
|
bytes are ASCII, the space needed for a 32-bit string is exactly four times the
|
|
|
|
8-bit size. Otherwise, the size needed for a 32-bit string is no more than four
|
|
|
|
times, because the number of characters must be less than the number of bytes.
|
|
|
|
The result is always left in pbuffer32.
|
|
|
|
|
|
|
|
Note that this function does not object to surrogate values. This is
|
|
|
|
deliberate; it makes it possible to construct UTF-32 strings that are invalid,
|
|
|
|
for the purpose of testing that they are correctly faulted.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
p points to a byte string
|
|
|
|
utf true if UTF-8 (to be converted to UTF-32)
|
|
|
|
len number of bytes in the string (excluding trailing zero)
|
|
|
|
|
|
|
|
Returns: number of 32-bit data items used (excluding trailing zero)
|
|
|
|
OR -1 if a UTF-8 string is malformed
|
|
|
|
OR -2 if a value > 0x10ffff is encountered in UTF mode
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
to32(uint8_t *p, int utf, int len)
|
|
|
|
{
|
|
|
|
uint32_t *pp;
|
|
|
|
|
|
|
|
if (pbuffer32_size < 4*len + 4)
|
|
|
|
{
|
|
|
|
if (pbuffer32 != NULL) free(pbuffer32);
|
|
|
|
pbuffer32_size = 4*len + 4;
|
|
|
|
pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
|
|
|
|
if (pbuffer32 == NULL)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "pcretest: malloc(%d) failed for pbuffer32\n", pbuffer32_size);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pp = pbuffer32;
|
|
|
|
|
|
|
|
while (len > 0)
|
|
|
|
{
|
|
|
|
uint32_t c;
|
|
|
|
int chlen = utf82ord(p, &c);
|
|
|
|
if (chlen <= 0) return -1;
|
|
|
|
if (utf && c > 0x10ffff) return -2;
|
|
|
|
p += chlen;
|
|
|
|
len -= chlen;
|
|
|
|
*pp++ = c;
|
|
|
|
}
|
|
|
|
|
|
|
|
*pp = 0;
|
|
|
|
return pp - pbuffer32;
|
|
|
|
}
|
|
|
|
#endif /* SUPPORT_PCRE32 */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Read or extend an input line *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Input lines are read into buffer, but both patterns and data lines can be
|
|
|
|
continued over multiple input lines. In addition, if the buffer fills up, we
|
|
|
|
want to automatically expand it so as to be able to handle extremely large
|
|
|
|
lines that are needed for certain stress tests. When the input buffer is
|
|
|
|
expanded, the other two buffers must also be expanded likewise, and the
|
|
|
|
contents of pbuffer, which are a copy of the input for callouts, must be
|
|
|
|
preserved (for when expansion happens for a data line). This is not the most
|
|
|
|
optimal way of handling this, but hey, this is just a test program!
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
f the file to read
|
|
|
|
start where in buffer to start (this *must* be within buffer)
|
|
|
|
prompt for stdin or readline()
|
|
|
|
|
|
|
|
Returns: pointer to the start of new data
|
|
|
|
could be a copy of start, or could be moved
|
|
|
|
NULL if no data read and EOF reached
|
|
|
|
*/
|
|
|
|
|
|
|
|
static uint8_t *
|
|
|
|
extend_inputline(FILE *f, uint8_t *start, const char *prompt)
|
|
|
|
{
|
|
|
|
uint8_t *here = start;
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
size_t rlen = (size_t)(buffer_size - (here - buffer));
|
|
|
|
|
|
|
|
if (rlen > 1000)
|
|
|
|
{
|
|
|
|
int dlen;
|
|
|
|
|
|
|
|
/* If libreadline or libedit support is required, use readline() to read a
|
|
|
|
line if the input is a terminal. Note that readline() removes the trailing
|
|
|
|
newline, so we must put it back again, to be compatible with fgets(). */
|
|
|
|
|
|
|
|
#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
|
|
|
|
if (isatty(fileno(f)))
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
char *s = readline(prompt);
|
|
|
|
if (s == NULL) return (here == start)? NULL : start;
|
|
|
|
len = strlen(s);
|
|
|
|
if (len > 0) add_history(s);
|
|
|
|
if (len > rlen - 1) len = rlen - 1;
|
|
|
|
memcpy(here, s, len);
|
|
|
|
here[len] = '\n';
|
|
|
|
here[len+1] = 0;
|
|
|
|
free(s);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Read the next line by normal means, prompting if the file is stdin. */
|
|
|
|
|
|
|
|
{
|
|
|
|
if (f == stdin) printf("%s", prompt);
|
|
|
|
if (fgets((char *)here, rlen, f) == NULL)
|
|
|
|
return (here == start)? NULL : start;
|
|
|
|
}
|
|
|
|
|
|
|
|
dlen = (int)strlen((char *)here);
|
|
|
|
if (dlen > 0 && here[dlen - 1] == '\n') return start;
|
|
|
|
here += dlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int new_buffer_size = 2*buffer_size;
|
|
|
|
uint8_t *new_buffer = (uint8_t *)malloc(new_buffer_size);
|
|
|
|
uint8_t *new_pbuffer = (uint8_t *)malloc(new_buffer_size);
|
|
|
|
|
|
|
|
if (new_buffer == NULL || new_pbuffer == NULL)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_buffer_size);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(new_buffer, buffer, buffer_size);
|
|
|
|
memcpy(new_pbuffer, pbuffer, buffer_size);
|
|
|
|
|
|
|
|
buffer_size = new_buffer_size;
|
|
|
|
|
|
|
|
start = new_buffer + (start - buffer);
|
|
|
|
here = new_buffer + (here - buffer);
|
|
|
|
|
|
|
|
free(buffer);
|
|
|
|
free(pbuffer);
|
|
|
|
|
|
|
|
buffer = new_buffer;
|
|
|
|
pbuffer = pbuffer8 = new_pbuffer;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Control never gets here */
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Case-independent strncmp() function *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
Arguments:
|
|
|
|
s first string
|
|
|
|
t second string
|
|
|
|
n number of characters to compare
|
|
|
|
|
|
|
|
Returns: < 0, = 0, or > 0, according to the comparison
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
strncmpic(const uint8_t *s, const uint8_t *t, int n)
|
2014-03-07 18:28:52 +01:00
|
|
|
{
|
2014-04-18 17:37:56 +02:00
|
|
|
while (n--)
|
|
|
|
{
|
|
|
|
int c = tolower(*s++) - tolower(*t++);
|
|
|
|
if (c) return c;
|
|
|
|
}
|
2014-03-07 18:28:52 +01:00
|
|
|
return 0;
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Read number from string *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
|
|
|
|
around with conditional compilation, just do the job by hand. It is only used
|
|
|
|
for unpicking arguments, so just keep it simple.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
str string to be converted
|
|
|
|
endptr where to put the end pointer
|
|
|
|
|
|
|
|
Returns: the unsigned long
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
get_value(const char *str, const char **endptr)
|
|
|
|
{
|
|
|
|
int result = 0;
|
|
|
|
while(*str != 0 && isspace(*str)) str++;
|
|
|
|
while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
|
|
|
|
*endptr = str;
|
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Scan the main modifier list *
|
|
|
|
*************************************************/
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* This function searches the modifier list for a long modifier name.
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
Argument:
|
2014-04-30 18:55:24 +02:00
|
|
|
p start of the name
|
|
|
|
lenp length of the name
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
Returns: an index in the modifier list, or -1 on failure
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
2014-04-30 18:55:24 +02:00
|
|
|
scan_modifiers(const uint8_t *p, unsigned int len)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
int bot = 0;
|
|
|
|
int top = MODLISTCOUNT;
|
|
|
|
|
|
|
|
while (top > bot)
|
|
|
|
{
|
|
|
|
int mid = (bot + top)/2;
|
2014-04-30 18:55:24 +02:00
|
|
|
unsigned int mlen = strlen(modlist[mid].name);
|
|
|
|
int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
|
2014-04-18 17:37:56 +02:00
|
|
|
if (c == 0)
|
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
if (len == mlen) return mid;
|
|
|
|
c = len - mlen;
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
if (c > 0) bot = mid + 1; else top = mid;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Check a modifer and find its field *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* This function is called when a modifier has been identified. We check that
|
|
|
|
it is allowed here and find the field that is to be changed.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
m the modifier list entry
|
|
|
|
ctx CTX_PAT => pattern context
|
|
|
|
CTX_DEFPAT => default pattern context
|
|
|
|
CTX_DAT => data context
|
2014-04-30 18:55:24 +02:00
|
|
|
CTX_DEFDAT => default data context
|
2014-04-18 17:37:56 +02:00
|
|
|
CTX_DEFANY => any default context (depends on the modifier)
|
|
|
|
pctl point to pattern control block
|
|
|
|
dctl point to data control block
|
|
|
|
c a single character or 0
|
|
|
|
|
|
|
|
Returns: a field pointer or NULL
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void *
|
|
|
|
check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
|
|
|
|
{
|
|
|
|
void *field = NULL;
|
|
|
|
switch (m->which)
|
|
|
|
{
|
|
|
|
case MOD_CTC:
|
|
|
|
if (ctx == CTX_DEFPAT || ctx == CTX_DEFANY) field = PTR(default_pat_context);
|
|
|
|
else if (ctx == CTX_PAT) field = PTR(pat_context);
|
2014-04-30 18:55:24 +02:00
|
|
|
break;
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
case MOD_CTM:
|
|
|
|
if (ctx == CTX_DEFDAT || ctx == CTX_DEFANY) field = PTR(default_dat_context);
|
|
|
|
else if (ctx == CTX_DAT) field = PTR(dat_context);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_DAT:
|
|
|
|
if (dctl != NULL) field = dctl;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_PAT:
|
|
|
|
if (pctl != NULL) field = pctl;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_PD:
|
|
|
|
if (dctl != NULL) field = dctl;
|
|
|
|
else if (pctl != NULL) field = pctl;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (field == NULL)
|
|
|
|
{
|
|
|
|
if (c == 0)
|
|
|
|
fprintf(outfile, "** '%s' is not valid here\n", m->name);
|
|
|
|
else
|
|
|
|
fprintf(outfile, "** /%c is not valid here\n", c);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (char *)field + m->offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Decode a modifier list *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* A pointers to a context or control block is NULL when called in cases when
|
|
|
|
that block is not relevant. They are never all relevant in one call. In
|
|
|
|
particular, at least one of patctl and datctl is always NULL.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
p point to modifier string
|
|
|
|
ctx CTX_PAT => pattern context
|
|
|
|
CTX_DEFPAT => default pattern context
|
|
|
|
CTX_DAT => data context
|
2014-04-30 18:55:24 +02:00
|
|
|
CTX_DEFDAT => default data context
|
2014-04-18 17:37:56 +02:00
|
|
|
CTX_DEFANY => any default context (depends on the modifier)
|
|
|
|
pctl point to pattern control block
|
|
|
|
dctl point to data control block
|
|
|
|
|
|
|
|
Returns: TRUE if successful decode, FALSE otherwise
|
|
|
|
*/
|
|
|
|
|
|
|
|
static BOOL
|
|
|
|
decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
|
|
|
|
{
|
|
|
|
uint8_t *ep, *pp;
|
|
|
|
BOOL first = TRUE;
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
void *field;
|
|
|
|
modstruct *m;
|
|
|
|
BOOL off = FALSE;
|
|
|
|
unsigned int i, len;
|
|
|
|
int index;
|
|
|
|
char *endptr;
|
2014-04-30 18:55:24 +02:00
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
/* Skip white space and commas; after a comma we have passed the first
|
|
|
|
item. */
|
|
|
|
|
|
|
|
while (isspace(*p)) p++;
|
|
|
|
if (*p == ',') first = FALSE;
|
|
|
|
while (isspace(*p) || *p == ',') p++;
|
|
|
|
if (*p == 0) break;
|
|
|
|
|
|
|
|
/* Find the end of the item. */
|
|
|
|
|
|
|
|
for (ep = p; *ep != 0 && *ep != ',' && !isspace(*ep); ep++);
|
|
|
|
|
|
|
|
/* Remember if the first character is '-'. */
|
|
|
|
|
|
|
|
if (*p == '-')
|
|
|
|
{
|
|
|
|
off = TRUE;
|
|
|
|
p++;
|
|
|
|
}
|
2014-04-30 18:55:24 +02:00
|
|
|
|
|
|
|
/* Find the length of a full-length modifier name, and scan for it. */
|
|
|
|
|
|
|
|
pp = p;
|
|
|
|
while (pp < ep && *pp != '=') pp++;
|
|
|
|
index = scan_modifiers(p, pp - p);
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
/* If the first modifier is unrecognized, try to interpret it as a sequence
|
|
|
|
of single-character abbreviated modifiers. None of these modifiers have any
|
|
|
|
associated data. They just set options or control bits. */
|
|
|
|
|
|
|
|
if (index < 0)
|
|
|
|
{
|
|
|
|
uint32_t cc;
|
|
|
|
|
|
|
|
if (!first)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
|
|
|
|
if (ep - p == 1)
|
|
|
|
fprintf(outfile, "** Single-character modifiers must come first\n");
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
|
|
|
|
{
|
|
|
|
if (p[1] == cc) /* Handle doubled characters */
|
|
|
|
{
|
|
|
|
cc = (cc << 8) | cc;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < C1MODLISTCOUNT; i++)
|
|
|
|
if (cc == c1modlist[i].onechar) break;
|
|
|
|
|
|
|
|
if (i >= C1MODLISTCOUNT)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Unrecognized single-character modifier '%c'\n",
|
|
|
|
*p);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c1modlist[i].index >= 0)
|
|
|
|
{
|
|
|
|
index = c1modlist[i].index;
|
|
|
|
}
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
|
|
|
|
strlen(c1modlist[i].fullname));
|
2014-04-18 17:37:56 +02:00
|
|
|
if (index < 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Internal error: single-character equivalent "
|
|
|
|
"modifier '%s' not found\n", c1modlist[i].fullname);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
c1modlist[i].index = index; /* Cache for next time */
|
|
|
|
}
|
|
|
|
|
|
|
|
field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
|
|
|
|
if (field == NULL) return FALSE;
|
|
|
|
*((uint32_t *)field) |= modlist[index].value;
|
|
|
|
}
|
|
|
|
|
|
|
|
continue; /* With tne next (fullname) modifier */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We have a match on a full-name modifier. Check for the existence of data
|
|
|
|
when needed. */
|
|
|
|
|
|
|
|
m = modlist + index; /* Save typing */
|
|
|
|
if (m->type != MOD_CTL && m->type != MOD_OPT)
|
|
|
|
{
|
|
|
|
if (*pp++ != '=')
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** '=' expected after '%s'\n", m->name);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
if (off)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* These on/off types have no data. */
|
|
|
|
|
|
|
|
else if (*pp != ',' && *pp != '\n' && *pp != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set the data length for those types that have data. Then find the field
|
|
|
|
that is to be set. If check_modifier() returns NULL, it has already output an
|
|
|
|
error message. */
|
|
|
|
|
|
|
|
len = ep - pp;
|
|
|
|
field = check_modifier(m, ctx, pctl, dctl, 0);
|
|
|
|
if (field == NULL) return FALSE;
|
|
|
|
|
|
|
|
/* Process according to data type. */
|
|
|
|
|
|
|
|
switch (m->type)
|
|
|
|
{
|
|
|
|
case MOD_CTL:
|
|
|
|
case MOD_OPT:
|
|
|
|
if (off) *((uint32_t *)field) &= ~m->value;
|
|
|
|
else *((uint32_t *)field) |= m->value;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_BSR:
|
|
|
|
if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
|
|
|
|
*((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
|
|
|
|
else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
|
|
|
|
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
|
|
|
|
else goto INVALID_VALUE;
|
|
|
|
pp = ep;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_IN2:
|
|
|
|
if (!isdigit(*pp)) goto INVALID_VALUE;
|
|
|
|
((uint32_t *)field)[0] = (uint32_t)strtoul((const char *)pp, &endptr, 10);
|
|
|
|
if (*endptr == '/')
|
|
|
|
((uint32_t *)field)[1] = (uint32_t)strtoul((const char *)endptr+1, &endptr, 10);
|
|
|
|
else ((uint32_t *)field)[1] = 0;
|
|
|
|
pp = (uint8_t *)endptr;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_INT:
|
|
|
|
if (!isdigit(*pp)) goto INVALID_VALUE;
|
|
|
|
*((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10);
|
|
|
|
pp = (uint8_t *)endptr;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_NL:
|
|
|
|
for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
|
|
|
|
if (len == strlen(newlines[i]) &&
|
|
|
|
strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
|
|
|
|
if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
|
|
|
|
*((uint16_t *)field) = i;
|
|
|
|
pp = ep;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_NN: /* Name or number; may be several */
|
|
|
|
if (isdigit(*pp))
|
|
|
|
{
|
|
|
|
int ct = MAXCPYGET - 1;
|
|
|
|
field = (char *)field - m->offset + m->value; /* Adjust field ptr */
|
|
|
|
while (*((uint32_t *)field) != 0 && ct-- > 0) /* Skip previous */
|
|
|
|
field = (char *)field + sizeof(uint32_t);
|
|
|
|
if (ct <= 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
*((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10);
|
|
|
|
pp = (uint8_t *)endptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Multiple strings are put end to end. */
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
|
|
|
char *nn = (char *)field;
|
|
|
|
while (*nn != 0) nn += strlen(nn) + 1;
|
|
|
|
if (nn + len + 1 - (char *)field > LENCPYGET)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Too many named '%s' modifiers\n", m->name);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
memcpy(nn, pp, len);
|
|
|
|
nn[len] = 0 ;
|
|
|
|
nn[len+1] = 0;
|
|
|
|
pp = ep;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_STR:
|
|
|
|
memcpy(field, pp, len);
|
2014-04-30 18:55:24 +02:00
|
|
|
((uint8_t *)field)[len] = 0;
|
2014-04-18 17:37:56 +02:00
|
|
|
pp = ep;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*pp != ',' && *pp != '\n' && *pp != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
p = pp;
|
|
|
|
}
|
|
|
|
|
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
INVALID_VALUE:
|
|
|
|
fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Get info from a pattern *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* A wrapped call to pcre2_pattern_info(), applied to the current compiled
|
|
|
|
pattern.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
what code for the required information
|
|
|
|
where where to put the answer
|
|
|
|
|
|
|
|
Returns: the return from pcre2_pattern_info()
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
pattern_info(int what, void *where)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
|
2014-04-30 18:55:24 +02:00
|
|
|
if (rc >= 0 || rc == PCRE2_ERROR_UNSET) return 0;
|
|
|
|
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
|
|
|
|
what);
|
|
|
|
if (rc == PCRE2_ERROR_BADMODE)
|
|
|
|
fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
|
|
|
|
"%d-bit mode\n", test_mode,
|
|
|
|
8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
|
2014-04-18 17:37:56 +02:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
2014-04-30 18:55:24 +02:00
|
|
|
* Show something in a list *
|
2014-04-18 17:37:56 +02:00
|
|
|
*************************************************/
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* This function just helps to keep the code that uses it tidier. It's used for
|
|
|
|
various lists of things where there needs to be introductory text before the
|
|
|
|
first item. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
static void
|
|
|
|
prmsg(const char **msg, const char *s)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%s %s", *msg, s);
|
|
|
|
*msg = "";
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Show compile controls *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Called for unsupported POSIX modifiers. */
|
|
|
|
|
|
|
|
static void
|
|
|
|
show_compile_controls(uint32_t controls)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
|
|
|
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
|
|
|
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
|
|
|
((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
|
|
|
|
((controls & CTL_ALTGLOBMATCH) != 0)? " altglobmatch" : "",
|
|
|
|
((controls & CTL_BYTECODE) != 0)? " bytecode" : "",
|
|
|
|
((controls & CTL_FLIPBYTES) != 0)? " flipbytes" : "",
|
|
|
|
((controls & CTL_FULLBYTECODE) != 0)? " fullbytecode" : "",
|
|
|
|
((controls & CTL_GLOBMATCH) != 0)? " globmatch" : "",
|
|
|
|
((controls & CTL_INFO) != 0)? " info" : "",
|
|
|
|
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
|
|
|
|
((controls & CTL_MARKS) != 0)? " marks" : "",
|
|
|
|
((controls & CTL_PERLCOMPAT) != 0)? " perlcompat" : "",
|
|
|
|
((controls & CTL_POSIX) != 0)? " posix" : "");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Show compile options *
|
|
|
|
*************************************************/
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* Called from show_pattern_info() and for unsupported POSIX options. */
|
|
|
|
|
|
|
|
static void
|
|
|
|
show_compile_options(uint32_t options)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
|
|
|
((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
|
|
|
((options & PCRE2_CASELESS) != 0)? " caseless" : "",
|
|
|
|
((options & PCRE2_EXTENDED) != 0)? " extended" : "",
|
|
|
|
((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
|
|
|
|
((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
|
|
|
|
((options & PCRE2_DOTALL) != 0)? " dotall" : "",
|
|
|
|
((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
|
|
|
|
((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
|
|
|
|
((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
|
|
|
((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
|
|
|
|
((options & PCRE2_UTF) != 0)? " utf" : "",
|
|
|
|
((options & PCRE2_UCP) != 0)? " ucp" : "",
|
|
|
|
((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
|
|
|
((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
|
|
|
|
((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
|
|
|
|
((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
|
|
|
|
((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
|
|
|
|
((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
|
|
|
|
((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
|
|
|
|
((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
|
|
|
|
((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Show match controls *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Called for unsupported POSIX modifiers. */
|
|
|
|
|
|
|
|
static void
|
|
|
|
show_match_controls(uint32_t controls)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%s%s%s%s%s%s",
|
|
|
|
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
|
|
|
|
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
|
|
|
|
((controls & CTL_DFA) != 0)? " dfa" : "",
|
|
|
|
((controls & CTL_GETLIST) != 0)? " getlist" : "",
|
|
|
|
((controls & CTL_LIMITS) != 0)? " limits" : "",
|
|
|
|
((controls & CTL_MEMORY) != 0)? " memory" : "");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Show match options *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Called for unsupported POSIX options. */
|
|
|
|
|
|
|
|
static void
|
|
|
|
show_match_options(uint32_t options)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s",
|
|
|
|
((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
|
|
|
((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
|
|
|
|
((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
|
|
|
|
((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
|
|
|
|
((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
|
|
|
((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
|
|
|
((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
|
|
|
((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
|
|
|
((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
|
|
|
((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
|
|
|
((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Show information about a pattern *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* This function is called after a pattern has been compiled or loaded from a
|
|
|
|
file, if any of the information-requesting controls have been set.
|
|
|
|
|
|
|
|
Arguments: none
|
|
|
|
|
|
|
|
Returns: PR_OK continue processing next line
|
|
|
|
PR_SKIP skip to a blank line
|
|
|
|
PR_ABEND abort the pcre2test run
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
show_pattern_info(void)
|
|
|
|
{
|
|
|
|
uint32_t compile_options, pattern_options;
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
if ((pat_patctl.control & (CTL_BYTECODE|CTL_FULLBYTECODE)) != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "------------------------------------------------------------------\n");
|
2014-04-30 18:55:24 +02:00
|
|
|
|
|
|
|
|
|
|
|
fprintf(outfile, "Code output not yet available\n");
|
|
|
|
#ifdef FIXME
|
2014-04-18 17:37:56 +02:00
|
|
|
PCRE2_PRINTINT(outfile, (pat_patctl.control & CTL_FULLBYTECODE) != 0);
|
2014-04-30 18:55:24 +02:00
|
|
|
#endif
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((pat_patctl.control & CTL_INFO) != 0)
|
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
const void *nametable;
|
|
|
|
const uint8_t *start_bits;
|
2014-04-18 17:37:56 +02:00
|
|
|
int count, backrefmax, first_ctype, last_ctype, jchanged,
|
2014-04-30 18:55:24 +02:00
|
|
|
hascrorlf, maxlookbehind, match_empty, minlength;
|
2014-04-18 17:37:56 +02:00
|
|
|
int nameentrysize, namecount;
|
2014-04-30 18:55:24 +02:00
|
|
|
uint32_t bsr_convention, newline_convention;
|
|
|
|
uint32_t first_cunit, last_cunit;
|
|
|
|
uint32_t match_limit = 0, recursion_limit = 0;
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
/* These info requests should always succeed. */
|
2014-04-30 18:55:24 +02:00
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax) +
|
2014-04-30 18:55:24 +02:00
|
|
|
pattern_info(PCRE2_INFO_BSR_CONVENTION, &bsr_convention) +
|
|
|
|
pattern_info(PCRE2_INFO_CAPTURECOUNT, &count) +
|
|
|
|
pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits) +
|
2014-04-18 17:37:56 +02:00
|
|
|
pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit) +
|
|
|
|
pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype) +
|
|
|
|
pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf) +
|
|
|
|
pattern_info(PCRE2_INFO_JCHANGED, &jchanged) +
|
|
|
|
pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit) +
|
|
|
|
pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype) +
|
|
|
|
pattern_info(PCRE2_INFO_MATCH_EMPTY, &match_empty) +
|
2014-04-30 18:55:24 +02:00
|
|
|
pattern_info(PCRE2_INFO_MATCH_LIMIT, &match_limit) +
|
2014-04-18 17:37:56 +02:00
|
|
|
pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind) +
|
2014-04-30 18:55:24 +02:00
|
|
|
pattern_info(PCRE2_INFO_MINLENGTH, &minlength) +
|
2014-04-18 17:37:56 +02:00
|
|
|
pattern_info(PCRE2_INFO_NAMECOUNT, &namecount) +
|
|
|
|
pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize) +
|
|
|
|
pattern_info(PCRE2_INFO_NAMETABLE, &nametable) +
|
2014-04-30 18:55:24 +02:00
|
|
|
pattern_info(PCRE2_INFO_NEWLINE_CONVENTION, &newline_convention) +
|
2014-04-18 17:37:56 +02:00
|
|
|
pattern_info(PCRE2_INFO_RECURSION_LIMIT, &recursion_limit)
|
|
|
|
!= 0)
|
|
|
|
return PR_ABEND;
|
|
|
|
|
|
|
|
fprintf(outfile, "Capturing subpattern count = %d\n", count);
|
|
|
|
|
|
|
|
if (backrefmax > 0)
|
|
|
|
fprintf(outfile, "Max back reference = %d\n", backrefmax);
|
|
|
|
|
|
|
|
if (maxlookbehind > 0)
|
|
|
|
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
|
|
|
|
|
|
|
if (match_limit > 0)
|
|
|
|
fprintf(outfile, "Match limit = %u\n", match_limit);
|
|
|
|
|
|
|
|
if (recursion_limit > 0)
|
|
|
|
fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
|
|
|
|
|
|
|
|
if (namecount > 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Named capturing subpatterns:\n");
|
|
|
|
while (namecount-- > 0)
|
|
|
|
{
|
|
|
|
int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
|
|
|
|
int length = (int)STRLEN(nametable + imm2_size);
|
|
|
|
fprintf(outfile, " ");
|
2014-04-30 18:55:24 +02:00
|
|
|
PCHARSV(nametable, imm2_size, length, FALSE, outfile);
|
2014-04-18 17:37:56 +02:00
|
|
|
while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
if (test_mode == PCRE32_MODE)
|
|
|
|
fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
if (test_mode == PCRE16_MODE)
|
|
|
|
fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
if (test_mode == PCRE8_MODE)
|
2014-04-30 18:55:24 +02:00
|
|
|
fprintf(outfile, "%3d\n", (int)(
|
|
|
|
((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
|
2014-04-18 17:37:56 +02:00
|
|
|
#endif
|
2014-04-30 18:55:24 +02:00
|
|
|
nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
|
|
|
|
if (match_empty) fprintf(outfile, "May match empty string\n");
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
pattern_info(PCRE2_INFO_COMPILE_OPTIONS, &compile_options);
|
|
|
|
if (compile_options == 0) fprintf(outfile, "No options\n"); else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Options: ");
|
|
|
|
show_compile_options(compile_options);
|
|
|
|
fprintf(outfile, "\n");
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
pattern_info(PCRE2_INFO_PATTERN_OPTIONS, &pattern_options);
|
|
|
|
if (pattern_options != 0) fprintf(outfile, "Pattern options:FIXME\n");
|
|
|
|
{
|
|
|
|
/* FIXME */
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
if (bsr_convention != PCRE2_BSR_DEFAULT)
|
|
|
|
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
|
|
|
|
"any Unicode newline" : "CR, LF, or CRLF");
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
switch (newline_convention)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
case PCRE2_NEWLINE_CR:
|
|
|
|
fprintf(outfile, "Newline is CR\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
break;
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
case PCRE2_NEWLINE_LF:
|
|
|
|
fprintf(outfile, "Newline is LF\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
break;
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
case PCRE2_NEWLINE_CRLF:
|
|
|
|
fprintf(outfile, "Newline is CRLF\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
break;
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
case PCRE2_NEWLINE_ANYCRLF:
|
|
|
|
fprintf(outfile, "Newline is CR, LF, or CRLF\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
break;
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
case PCRE2_NEWLINE_ANY:
|
|
|
|
fprintf(outfile, "Newline is any Unicode newline\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (first_ctype == 2)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "First char at start or follows newline\n");
|
|
|
|
}
|
|
|
|
else if (first_ctype == 1)
|
|
|
|
{
|
|
|
|
const char *caseless =
|
2014-04-30 18:55:24 +02:00
|
|
|
((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
|
2014-04-18 17:37:56 +02:00
|
|
|
"" : " (caseless)";
|
|
|
|
if (PRINTOK(first_cunit))
|
|
|
|
fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "First code unit = ");
|
2014-04-30 18:55:24 +02:00
|
|
|
pchar(first_cunit, FALSE, outfile);
|
2014-04-18 17:37:56 +02:00
|
|
|
fprintf(outfile, "%s\n", caseless);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "No first code unit\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (last_ctype == 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "No last code unit\n");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const char *caseless =
|
2014-04-30 18:55:24 +02:00
|
|
|
((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
|
2014-04-18 17:37:56 +02:00
|
|
|
"" : " (caseless)";
|
|
|
|
if (PRINTOK(last_cunit))
|
|
|
|
fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Last code unit = ");
|
2014-04-30 18:55:24 +02:00
|
|
|
pchar(last_cunit, FALSE, outfile);
|
2014-04-18 17:37:56 +02:00
|
|
|
fprintf(outfile, "%s\n", caseless);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
fprintf(outfile, "Subject length lower bound = %d\n", minlength);
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
if (start_bits == NULL)
|
|
|
|
fprintf(outfile, "No starting code unit list\n");
|
|
|
|
else
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
int i;
|
|
|
|
int c = 24;
|
|
|
|
fprintf(outfile, "Starting code units: ");
|
|
|
|
for (i = 0; i < 256; i++)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
if ((start_bits[i/8] & (1<<(i&7))) != 0)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
if (c > 75)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
fprintf(outfile, "\n ");
|
|
|
|
c = 2;
|
|
|
|
}
|
|
|
|
if (PRINTOK(i) && i != ' ')
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%c ", i);
|
|
|
|
c += 2;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "\\x%02x ", i);
|
|
|
|
c += 5;
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-04-30 18:55:24 +02:00
|
|
|
fprintf(outfile, "\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
if (pat_patctl.jit != 0)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
size_t jitsize;
|
|
|
|
if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize) == 0)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
if (jitsize > 0)
|
2014-04-18 17:37:56 +02:00
|
|
|
fprintf(outfile, "JIT study was successful\n");
|
|
|
|
else
|
|
|
|
#ifdef SUPPORT_JIT
|
|
|
|
fprintf(outfile, "JIT study was not successful\n");
|
|
|
|
#else
|
|
|
|
fprintf(outfile, "JIT support is not available in this version of PCRE\n");
|
|
|
|
#endif
|
2014-04-30 18:55:24 +02:00
|
|
|
}
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Process command line *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* This function is called for lines beginning with # and a character that is
|
|
|
|
not ! or whitespace, when encountered between tests. The line is in buffer.
|
|
|
|
|
|
|
|
Arguments: none
|
|
|
|
|
|
|
|
Returns: PR_OK continue processing next line
|
|
|
|
PR_SKIP skip to a blank line
|
|
|
|
PR_ABEND abort the pcre2test run
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
process_command(void)
|
|
|
|
{
|
|
|
|
if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
|
|
|
|
{
|
|
|
|
(void)decode_modifiers(buffer + 8, CTX_DEFPAT, &def_patctl, NULL);
|
|
|
|
}
|
|
|
|
else if (strncmp((char *)buffer, "#data", 5) == 0 && isspace(buffer[5]))
|
|
|
|
{
|
|
|
|
(void)decode_modifiers(buffer + 5, CTX_DEFDAT, NULL, &def_datctl);
|
|
|
|
}
|
|
|
|
else if (strncmp((char *)buffer, "#load", 5) == 0 && isspace(buffer[5]))
|
|
|
|
{
|
|
|
|
/* FIXME */
|
|
|
|
fprintf(outfile, "** #load not yet implemented\n");
|
|
|
|
return PR_ABEND;
|
|
|
|
|
|
|
|
#ifdef FIXME
|
|
|
|
|
|
|
|
|
|
|
|
/* See if the pattern is to be loaded pre-compiled from a file. */
|
|
|
|
|
|
|
|
if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
|
|
|
|
{
|
|
|
|
uint32_t magic;
|
|
|
|
uint8_t sbuf[8];
|
|
|
|
FILE *f;
|
|
|
|
|
|
|
|
p++;
|
|
|
|
if (*p == '!')
|
|
|
|
{
|
|
|
|
do_debug = TRUE;
|
|
|
|
do_showinfo = TRUE;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
|
|
|
|
pp = p + (int)strlen((char *)p);
|
|
|
|
while (isspace(pp[-1])) pp--;
|
|
|
|
*pp = 0;
|
|
|
|
|
|
|
|
f = fopen((char *)p, "rb");
|
|
|
|
if (f == NULL)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
|
|
|
|
|
|
|
|
true_size =
|
|
|
|
(sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
|
|
|
|
true_study_size =
|
|
|
|
(sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
|
|
|
|
|
|
|
|
re = (pcre *)new_malloc(true_size);
|
|
|
|
if (re == NULL)
|
|
|
|
{
|
|
|
|
printf("** Failed to get %d bytes of memory for pcre object\n",
|
|
|
|
(int)true_size);
|
|
|
|
yield = 1;
|
|
|
|
goto EXIT;
|
|
|
|
}
|
|
|
|
if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
|
|
|
|
|
|
|
|
magic = REAL_PCRE_MAGIC(re);
|
|
|
|
if (magic != MAGIC_NUMBER)
|
|
|
|
{
|
|
|
|
if (swap_uint32(magic) == MAGIC_NUMBER)
|
|
|
|
{
|
|
|
|
do_flip = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
|
|
|
|
new_free(re);
|
|
|
|
fclose(f);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We hide the byte-invert info for little and big endian tests. */
|
|
|
|
fprintf(outfile, "Compiled pattern%s loaded from %s\n",
|
|
|
|
do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
|
|
|
|
|
|
|
|
/* Now see if there is any following study data. */
|
|
|
|
|
|
|
|
if (true_study_size != 0)
|
|
|
|
{
|
|
|
|
pcre_study_data *psd;
|
|
|
|
|
|
|
|
extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
|
|
|
|
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
|
|
|
|
|
|
|
psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
|
|
|
|
extra->study_data = psd;
|
|
|
|
|
|
|
|
if (fread(psd, 1, true_study_size, f) != true_study_size)
|
|
|
|
{
|
|
|
|
FAIL_READ:
|
|
|
|
fprintf(outfile, "Failed to read data from %s\n", p);
|
|
|
|
if (extra != NULL)
|
|
|
|
{
|
|
|
|
PCRE_FREE_STUDY(extra);
|
|
|
|
}
|
|
|
|
new_free(re);
|
|
|
|
fclose(f);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
fprintf(outfile, "Study data loaded from %s\n", p);
|
|
|
|
do_study = 1; /* To get the data output if requested */
|
|
|
|
}
|
|
|
|
else fprintf(outfile, "No study data\n");
|
|
|
|
|
|
|
|
/* Flip the necessary bytes. */
|
|
|
|
if (do_flip)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
|
|
|
|
if (rc == PCRE_ERROR_BADMODE)
|
|
|
|
{
|
|
|
|
uint32_t flags_in_host_byte_order;
|
|
|
|
if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
|
|
|
|
flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
|
|
|
|
else
|
|
|
|
flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
|
|
|
|
/* Simulate the result of the function call below. */
|
|
|
|
fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
|
|
|
|
test_mode == PCRE32_MODE ? "32" : test_mode == PCRE16_MODE ? "16" : "",
|
|
|
|
PCRE_INFO_OPTIONS);
|
|
|
|
fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
|
2014-04-30 18:55:24 +02:00
|
|
|
"%d-bit mode\n", test_mode, 8 * (flags_in_host_byte_order & test_mode_MASK));
|
2014-04-18 17:37:56 +02:00
|
|
|
new_free(re);
|
|
|
|
fclose(f);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Need to know if UTF-8 for printing data strings. */
|
|
|
|
|
|
|
|
if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
|
|
|
|
{
|
|
|
|
new_free(re);
|
|
|
|
fclose(f);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
use_utf = (get_options & PCRE_UTF8) != 0;
|
|
|
|
|
|
|
|
fclose(f);
|
|
|
|
goto SHOW_INFO;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* FIXME */
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Process pattern line *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* This function is called when the input buffer contains the start of a
|
|
|
|
pattern. The first character is known to be a valid delimiter. The pattern is
|
|
|
|
read, modifiers are interpreted, and a suitable local context is set up for
|
|
|
|
this test. The pattern is then compiled.
|
|
|
|
|
|
|
|
Arguments: none
|
|
|
|
|
|
|
|
Returns: PR_OK continue processing next line
|
|
|
|
PR_SKIP skip to a blank line
|
|
|
|
PR_ABEND abort the pcre2test run
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
process_pattern(void)
|
|
|
|
{
|
|
|
|
uint8_t *p = buffer;
|
|
|
|
unsigned int delimiter = *p++;
|
|
|
|
int patlen, errorcode;
|
|
|
|
size_t erroroffset;
|
|
|
|
|
|
|
|
/* Initialize the context and pattern/data controls for this test from the
|
|
|
|
defaults. */
|
|
|
|
|
|
|
|
PATCTXCPY(pat_context, default_pat_context);
|
|
|
|
memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
|
|
|
|
|
|
|
|
/* Find the end of the pattern, reading more lines if necessary. */
|
|
|
|
|
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
while (*p != 0)
|
|
|
|
{
|
|
|
|
if (*p == '\\' && p[1] != 0) p++;
|
|
|
|
else if (*p == delimiter) break;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
if (*p != 0) break;
|
|
|
|
if ((p = extend_inputline(infile, p, " > ")) == NULL)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Unexpected EOF\n");
|
|
|
|
return PR_ABEND;
|
|
|
|
}
|
|
|
|
if (infile != stdin) fprintf(outfile, "%s", (char *)p);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the first character after the delimiter is backslash, make
|
|
|
|
the pattern end with backslash. This is purely to provide a way
|
|
|
|
of testing for the error message when a pattern ends with backslash. */
|
|
|
|
|
|
|
|
if (p[1] == '\\') *p++ = '\\';
|
|
|
|
|
|
|
|
/* Terminate the pattern at the delimiter, and save a copy of the pattern
|
|
|
|
for callouts. */
|
|
|
|
|
|
|
|
*p++ = 0;
|
|
|
|
patlen = p - buffer - 1;
|
|
|
|
strncpy((char *)pbuffer, (char *)(buffer+1), patlen);
|
|
|
|
|
|
|
|
/* Look for modifiers and options after the final delimiter. If successful,
|
|
|
|
compile the pattern. */
|
|
|
|
|
|
|
|
if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
|
|
|
|
|
|
|
|
/* Handle compiling via the POSIX interface, which doesn't support the
|
|
|
|
timing, showing, or debugging options, nor the ability to pass over
|
|
|
|
local character tables. Neither does it have 16-bit or 32-bit support. */
|
|
|
|
|
|
|
|
if ((pat_patctl.control & CTL_POSIX) != 0)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
int cflags = 0;
|
2014-04-30 18:55:24 +02:00
|
|
|
const char *msg = "** Ignored with POSIX interface:";
|
|
|
|
|
|
|
|
if (test_mode != 8)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
|
|
|
|
return PR_SKIP;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
/* Check for features that the POSIX interface does not support. */
|
|
|
|
|
|
|
|
if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
|
|
|
|
if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
|
|
|
|
if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
|
|
|
|
if (timeit > 0) prmsg(&msg, "timing");
|
|
|
|
if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
|
|
|
|
if (pat_patctl.save[0] != 0) prmsg(&msg, "save");
|
|
|
|
|
|
|
|
if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%s", msg);
|
|
|
|
show_compile_options(pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS);
|
|
|
|
msg = "";
|
|
|
|
}
|
|
|
|
if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%s", msg);
|
|
|
|
show_compile_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS);
|
|
|
|
msg = "";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (msg[0] == 0) fprintf(outfile, "\n");
|
|
|
|
|
|
|
|
/* Translate PCRE2 options to POSIX options and then compile. On success, set
|
|
|
|
up a match_data block to be used for all matches. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
if ((pat_patctl.options & PCRE2_UTF) != 0) cflags |= REG_UTF;
|
|
|
|
if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
|
|
|
|
if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
|
|
|
|
if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
|
|
|
|
if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
|
|
|
|
if ((pat_patctl.options & PCRE2_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
|
|
|
|
if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
|
|
|
|
|
|
|
|
rc = regcomp(&preg, (char *)pbuffer, cflags);
|
2014-04-30 18:55:24 +02:00
|
|
|
if (rc != 0) /* Failure */
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
(void)regerror(rc, &preg, (char *)buffer, buffer_size);
|
|
|
|
fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
|
|
|
|
return PR_SKIP;
|
|
|
|
}
|
|
|
|
return PR_OK;
|
2014-04-30 18:55:24 +02:00
|
|
|
#endif /* SUPPORT_PCRE8 */
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle compiling via the native interface, converting the input in non-8-bit
|
|
|
|
modes. */
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
if (test_mode == PCRE16_MODE)
|
|
|
|
patlen = to16(pbuffer, pat_patctl.options & PCRE2_UTF,
|
|
|
|
(int)strlen((char *)pbuffer));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
if (test_mode == PCRE32_MODE)
|
|
|
|
patlen = to32(pbuffer, pat_patctl.options & PCRE2_UTF,
|
|
|
|
(int)strlen((char *)pbuffer));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
switch(patlen)
|
|
|
|
{
|
|
|
|
case -1:
|
|
|
|
fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
|
|
|
|
"converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
|
|
|
|
return PR_SKIP;
|
|
|
|
|
|
|
|
case -2:
|
|
|
|
fprintf(outfile, "** Failed: character value greater than 0x10ffff "
|
|
|
|
"cannot be converted to UTF\n");
|
|
|
|
return PR_SKIP;
|
|
|
|
|
|
|
|
case -3:
|
|
|
|
fprintf(outfile, "** Failed: character value greater than 0xffff "
|
|
|
|
"cannot be converted to 16-bit in non-UTF mode\n");
|
|
|
|
return PR_SKIP;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Compile many times when timing */
|
|
|
|
|
|
|
|
if (timeit > 0)
|
|
|
|
{
|
|
|
|
register int i;
|
|
|
|
clock_t time_taken;
|
|
|
|
clock_t start_time = clock();
|
|
|
|
for (i = 0; i < timeit; i++)
|
|
|
|
{
|
|
|
|
PCRE2_COMPILE(compiled_code, pbuffer, patlen,
|
|
|
|
pat_patctl.options, &errorcode, &erroroffset, pat_context);
|
|
|
|
if (TEST(compiled_code, !=, NULL))
|
|
|
|
{ SUB1(pcre2_code_free, compiled_code); }
|
|
|
|
}
|
|
|
|
total_compile_time += (time_taken = clock() - start_time);
|
|
|
|
fprintf(outfile, "Compile time %.4f milliseconds\n",
|
|
|
|
(((double)time_taken * 1000.0) / (double)timeit) /
|
|
|
|
(double)CLOCKS_PER_SEC);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* FIXME: implement timing for JIT compile. */
|
|
|
|
|
|
|
|
/* A final compile that is used "for real". */
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options, &errorcode,
|
2014-04-18 17:37:56 +02:00
|
|
|
&erroroffset, pat_context);
|
|
|
|
|
|
|
|
/* Compilation failed; go back for another re, skipping to blank line
|
|
|
|
if non-interactive. */
|
|
|
|
|
|
|
|
if (TEST(compiled_code, ==, NULL))
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Failed: %d at offset %d\n", errorcode, (int)erroroffset);
|
|
|
|
|
|
|
|
/* FIXME get error message */
|
|
|
|
|
|
|
|
return PR_SKIP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Call the JIT compiler if requested. */
|
|
|
|
|
|
|
|
if (pat_patctl.jit != 0)
|
|
|
|
{ PCRE2_JIT_COMPILE(compiled_code, pat_patctl.jit); }
|
|
|
|
|
|
|
|
/* Output code size and other information if requested. */
|
|
|
|
|
|
|
|
if ((pat_patctl.control & CTL_MEMORY) != 0)
|
|
|
|
{
|
|
|
|
size_t size;
|
|
|
|
size_t name_entry_size;
|
|
|
|
int name_count;
|
|
|
|
(void)pattern_info(PCRE2_INFO_SIZE, &size);
|
|
|
|
(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count);
|
|
|
|
(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
|
|
|
fprintf(outfile, "Memory allocation (code space): %d\n",
|
|
|
|
(int)(size - name_count * name_entry_size -
|
|
|
|
FLD(compiled_code, name_table_offset)));
|
|
|
|
if (pat_patctl.jit != 0)
|
|
|
|
{
|
|
|
|
(void)pattern_info(PCRE2_INFO_JITSIZE, &size);
|
|
|
|
fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
|
|
|
|
}
|
|
|
|
}
|
2014-04-30 18:55:24 +02:00
|
|
|
|
|
|
|
if ((pat_patctl.control & CTL_ANYINFO) != 0)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
int rc = show_pattern_info();
|
2014-04-30 18:55:24 +02:00
|
|
|
if (rc != PR_OK) return rc;
|
|
|
|
}
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#ifdef FIXME
|
|
|
|
|
|
|
|
/* If the '>' option was present, we write out the regex to a file, and
|
|
|
|
that is all. The first 8 bytes of the file are the regex length and then
|
|
|
|
the study length, in big-endian order. */
|
|
|
|
|
|
|
|
if (to_file != NULL)
|
|
|
|
{
|
|
|
|
FILE *f = fopen((char *)to_file, "wb");
|
|
|
|
if (f == NULL)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
uint8_t sbuf[8];
|
|
|
|
|
|
|
|
/* Extract the size for possible writing before possibly flipping it,
|
|
|
|
and remember the store that was got. */
|
|
|
|
|
|
|
|
true_size = REAL_PCRE_SIZE(re);
|
|
|
|
|
|
|
|
if (do_flip) regexflip(re, extra);
|
|
|
|
sbuf[0] = (uint8_t)((true_size >> 24) & 255);
|
|
|
|
sbuf[1] = (uint8_t)((true_size >> 16) & 255);
|
|
|
|
sbuf[2] = (uint8_t)((true_size >> 8) & 255);
|
|
|
|
sbuf[3] = (uint8_t)((true_size) & 255);
|
|
|
|
sbuf[4] = (uint8_t)((true_study_size >> 24) & 255);
|
|
|
|
sbuf[5] = (uint8_t)((true_study_size >> 16) & 255);
|
|
|
|
sbuf[6] = (uint8_t)((true_study_size >> 8) & 255);
|
|
|
|
sbuf[7] = (uint8_t)((true_study_size) & 255);
|
|
|
|
|
|
|
|
if (fwrite(sbuf, 1, 8, f) < 8 ||
|
|
|
|
fwrite(re, 1, true_size, f) < true_size)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Compiled pattern written to %s\n", to_file);
|
|
|
|
|
|
|
|
/* If there is study data, write it. */
|
|
|
|
|
|
|
|
if (extra != NULL)
|
|
|
|
{
|
|
|
|
if (fwrite(extra->study_data, 1, true_study_size, f) <
|
|
|
|
true_study_size)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Write error on %s: %s\n", to_file,
|
|
|
|
strerror(errno));
|
|
|
|
}
|
|
|
|
else fprintf(outfile, "Study data written to %s\n", to_file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fclose(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
new_free(re);
|
|
|
|
if (extra != NULL)
|
|
|
|
{
|
|
|
|
PCRE_FREE_STUDY(extra);
|
|
|
|
}
|
|
|
|
if (locale_set)
|
|
|
|
{
|
|
|
|
new_free((void *)tables);
|
|
|
|
setlocale(LC_CTYPE, "C");
|
2014-04-30 18:55:24 +02:00
|
|
|
locale_set = FALSE;
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
continue; /* With next regex */
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* FIXME */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Process data line *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* The line is in buffer; it will not be empty.
|
|
|
|
|
|
|
|
Arguments: none
|
|
|
|
|
|
|
|
Returns: PR_OK continue processing next line
|
|
|
|
PR_SKIP skip to a blank line
|
|
|
|
PR_ABEND abort the pcre2test run
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
process_data(void)
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
uint32_t c;
|
2014-04-30 18:55:24 +02:00
|
|
|
uint8_t *p, *pp, *start_dup;
|
|
|
|
size_t needlen;
|
|
|
|
BOOL utf;
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
uint8_t *q8;
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
uint16_t *q16;
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
uint32_t *q32;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
DATCTXCPY(dat_context, default_dat_context);
|
|
|
|
memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
utf = (pat_patctl.control & CTL_POSIX) == 0 &&
|
|
|
|
(FLD(compiled_code, compile_options) & PCRE2_UTF) != 0;
|
|
|
|
start_dup = NULL;
|
2014-04-18 17:37:56 +02:00
|
|
|
len = strlen((const char *)buffer);
|
|
|
|
while (len > 0 && isspace(buffer[len-1])) len--;
|
|
|
|
buffer[len] = 0;
|
|
|
|
p = buffer;
|
|
|
|
while (isspace(*p)) p++;
|
|
|
|
|
|
|
|
/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
|
|
|
|
invalid input to pcre2_exec, you must use \x?? or \x{} sequences. */
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
if (utf)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
uint8_t *q;
|
|
|
|
uint32_t cc;
|
|
|
|
int n = 1;
|
|
|
|
for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
|
|
|
|
if (n <= 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
|
|
|
|
"in UTF mode\n");
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef SUPPORT_VALGRIND
|
|
|
|
/* Mark the dbuffer as addressable but undefined again. */
|
|
|
|
if (dbuffer != NULL)
|
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Allocate a buffer to hold the data line; len+1 is an upper bound on
|
2014-04-30 18:55:24 +02:00
|
|
|
the number of code units that will be needed. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
needlen = (size_t)(len * code_unit_size);
|
|
|
|
while (dbuffer == NULL || needlen >= dbuffer_size)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
dbuffer_size *= 2;
|
2014-04-30 18:55:24 +02:00
|
|
|
dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
|
2014-04-18 17:37:56 +02:00
|
|
|
if (dbuffer == NULL)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
2014-04-30 18:55:24 +02:00
|
|
|
SETCASTPTR(q, dbuffer);
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
/* Scan the data line, interpreting data escapes, and put the result into a
|
2014-04-30 18:55:24 +02:00
|
|
|
buffer the appropriate width buffer. In UTF mode, input can be UTF-8. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
while ((c = *p++) != 0)
|
|
|
|
{
|
|
|
|
int i = 0;
|
2014-04-30 18:55:24 +02:00
|
|
|
size_t duplen;
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* ] may mark the end of a duplicated sequence */
|
|
|
|
|
|
|
|
if (c == ']' && start_dup != NULL)
|
|
|
|
{
|
|
|
|
size_t qoffset = CAST8VAR(q) - (uint8_t *)dbuffer;
|
|
|
|
|
|
|
|
if (*p++ != '{')
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Expected '{' after \\[....]\n");
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
while (isdigit(*p)) i = i * 10 + *p++ - '0';
|
|
|
|
if (*p++ != '}')
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Expected '}' after \\[...]{...\n");
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
if (i-- == 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Zero repeat not allowed\n");
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
duplen = CAST8VAR(q) - start_dup;
|
|
|
|
needlen += duplen * (i - 1);
|
|
|
|
while (needlen >= dbuffer_size)
|
|
|
|
{
|
|
|
|
dbuffer_size *= 2;
|
|
|
|
dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
|
|
|
|
if (dbuffer == NULL)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
SETCASTPTR(q, dbuffer + qoffset);
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i-- > 0)
|
|
|
|
{
|
|
|
|
memcpy(CAST8VAR(q), start_dup, duplen);
|
|
|
|
SETPLUS(q, duplen/code_unit_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
start_dup = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle a non-escaped character */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
if (c != '\\')
|
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle backslash escapes */
|
|
|
|
|
|
|
|
else switch ((c = *p++))
|
|
|
|
{
|
|
|
|
case '\\': break;
|
|
|
|
case 'a': c = 7; break;
|
|
|
|
case 'b': c = '\b'; break;
|
|
|
|
case 'e': c = 27; break;
|
|
|
|
case 'f': c = '\f'; break;
|
|
|
|
case 'n': c = '\n'; break;
|
|
|
|
case 'r': c = '\r'; break;
|
|
|
|
case 't': c = '\t'; break;
|
|
|
|
case 'v': c = '\v'; break;
|
|
|
|
|
|
|
|
case '0': case '1': case '2': case '3':
|
|
|
|
case '4': case '5': case '6': case '7':
|
|
|
|
c -= '0';
|
|
|
|
while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
|
|
|
|
c = c * 8 + *p++ - '0';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'o':
|
|
|
|
if (*p == '{')
|
|
|
|
{
|
|
|
|
uint8_t *pt = p;
|
|
|
|
c = 0;
|
|
|
|
for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
|
|
|
|
{
|
|
|
|
if (++i == 12)
|
|
|
|
fprintf(outfile, "** Too many octal digits in \\o{...} item; "
|
|
|
|
"using only the first twelve.\n");
|
|
|
|
else c = c * 8 + *pt - '0';
|
|
|
|
}
|
|
|
|
if (*pt == '}') p = pt + 1;
|
|
|
|
else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'x':
|
|
|
|
if (*p == '{')
|
|
|
|
{
|
|
|
|
uint8_t *pt = p;
|
|
|
|
c = 0;
|
|
|
|
|
|
|
|
/* We used to have "while (isxdigit(*(++pt)))" here, but it fails
|
|
|
|
when isxdigit() is a macro that refers to its argument more than
|
|
|
|
once. This is banned by the C Standard, but apparently happens in at
|
|
|
|
least one MacOS environment. */
|
|
|
|
|
|
|
|
for (pt++; isxdigit(*pt); pt++)
|
|
|
|
{
|
|
|
|
if (++i == 9)
|
|
|
|
fprintf(outfile, "** Too many hex digits in \\x{...} item; "
|
|
|
|
"using only the first eight.\n");
|
|
|
|
else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
|
|
|
|
}
|
|
|
|
if (*pt == '}')
|
|
|
|
{
|
|
|
|
p = pt + 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Not correct form for \x{...}; fall through */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* \x without {} always defines just one byte in 8-bit mode. This
|
|
|
|
allows UTF-8 characters to be constructed byte by byte, and also allows
|
2014-04-30 18:55:24 +02:00
|
|
|
invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
|
|
|
|
Otherwise, pass it down as data. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
c = 0;
|
|
|
|
while (i++ < 2 && isxdigit(*p))
|
|
|
|
{
|
|
|
|
c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
#if defined SUPPORT_PCRE8
|
2014-04-30 18:55:24 +02:00
|
|
|
if (utf && (test_mode == PCRE8_MODE))
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
*q8++ = c;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0: /* \ followed by EOF allows for an empty line */
|
|
|
|
p--;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case '=': /* \= terminates the data, starts modifiers */
|
|
|
|
goto ENDSTRING;
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
case '[': /* \[ introduces a duplicated character sequence */
|
|
|
|
if (start_dup != NULL)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Nested duplication is not supported\n");
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
start_dup = CAST8VAR(q);
|
|
|
|
continue;
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
default:
|
|
|
|
fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
|
2014-04-30 18:55:24 +02:00
|
|
|
return PR_OK;
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* We now have a character value in c that may be greater than 255.
|
|
|
|
In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
|
|
|
|
than 127 in UTF mode must have come from \x{...} or octal constructs
|
|
|
|
because values from \x.. get this far only in non-UTF mode. */
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
if (test_mode == PCRE8_MODE)
|
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
if (utf)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
if (c > 0x7fffffff)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
|
|
|
|
"and so cannot be converted to UTF-8\n", c);
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
q8 += ord2utf8(c, q8);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (c > 0xffu)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Character \\x{%x} is greater than 255 "
|
|
|
|
"and UTF-8 mode is not enabled.\n", c);
|
|
|
|
fprintf(outfile, "** Truncation will probably give the wrong "
|
|
|
|
"result.\n");
|
|
|
|
}
|
|
|
|
*q8++ = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
if (test_mode == PCRE16_MODE)
|
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
if (utf)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
|
|
|
if (c > 0x10ffffu)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Failed: character \\x{%x} is greater than "
|
|
|
|
"0x10ffff and so cannot be converted to UTF-16\n", c);
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
else if (c >= 0x10000u)
|
|
|
|
{
|
|
|
|
c-= 0x10000u;
|
|
|
|
*q16++ = 0xD800 | (c >> 10);
|
|
|
|
*q16++ = 0xDC00 | (c & 0x3ff);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
*q16++ = c;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (c > 0xffffu)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
|
|
|
|
"and UTF-16 mode is not enabled.\n", c);
|
|
|
|
fprintf(outfile, "** Truncation will probably give the wrong "
|
|
|
|
"result.\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
*q16++ = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
if (test_mode == PCRE32_MODE)
|
|
|
|
{
|
|
|
|
*q32++ = c;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
ENDSTRING:
|
2014-04-30 18:55:24 +02:00
|
|
|
SET(*q, 0);
|
|
|
|
len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* If we have explicit valgrind support, mark the data from after its end to
|
|
|
|
the end of the buffer as unaddressable, so that a read over the end of the
|
|
|
|
buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
|
|
|
|
building with valgrind support, at least move the data to the end of the buffer
|
|
|
|
so that it might at least cause a crash. If we are using the POSIX interface,
|
|
|
|
we must include the terminating zero. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
pp = dbuffer;
|
|
|
|
c = code_unit_size * ((pat_patctl.control & CTL_POSIX) != 0)? 1:0;
|
|
|
|
|
|
|
|
#ifdef SUPPORT_VALGRIND
|
|
|
|
VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
|
|
|
|
#else
|
|
|
|
pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* If the string was terminated by \= we must now interpret modifiers. */
|
|
|
|
|
|
|
|
if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
|
|
|
|
return PR_OK;
|
|
|
|
|
|
|
|
if ((dat_datctl.control & (CTL_DFA|CTL_LIMITS)) == (CTL_DFA|CTL_LIMITS))
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
printf("** Find match limits not relevant for DFA matching: ignored\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
2014-04-30 18:55:24 +02:00
|
|
|
|
|
|
|
/* Now run the pattern match: len contains the byte length and pp points to the
|
|
|
|
subject string. POSIX matching is only possible in 8-bit mode, and it does not
|
|
|
|
support timing or other fancy features. Some were checked at compile time, but
|
|
|
|
we need to check the match-time settings here. */
|
|
|
|
|
|
|
|
if ((pat_patctl.control & CTL_POSIX) != 0)
|
2014-04-18 17:37:56 +02:00
|
|
|
{
|
2014-04-30 18:55:24 +02:00
|
|
|
int rc;
|
|
|
|
int eflags = 0;
|
|
|
|
regmatch_t *pmatch = NULL;
|
|
|
|
const char *msg = "** Ignored with POSIX interface:";
|
|
|
|
|
|
|
|
if (dat_datctl.cfail[0] != 0 || dat_datctl.cfail[1] != 0)
|
|
|
|
prmsg(&msg, "callout_fail");
|
|
|
|
if (dat_datctl.copy_numbers[0] != 0 || dat_datctl.copy_names[0] != 0)
|
|
|
|
prmsg(&msg, "copy");
|
|
|
|
if (dat_datctl.get_numbers[0] != 0 || dat_datctl.get_names[0] != 0)
|
|
|
|
prmsg(&msg, "get");
|
|
|
|
if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
|
|
|
|
|
|
|
|
if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%s", msg);
|
|
|
|
show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
|
|
|
|
msg = "";
|
|
|
|
}
|
|
|
|
if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%s", msg);
|
|
|
|
show_match_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS);
|
|
|
|
msg = "";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (msg[0] == 0) fprintf(outfile, "\n");
|
|
|
|
|
|
|
|
if (dat_datctl.oveccount > 0)
|
|
|
|
pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
|
|
|
|
if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
|
|
|
|
if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
|
|
|
|
if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
|
|
|
|
|
|
|
|
rc = regexec(&preg, (const char *)pp + dat_datctl.offset,
|
|
|
|
dat_datctl.oveccount, pmatch, eflags);
|
|
|
|
if (rc != 0)
|
|
|
|
{
|
|
|
|
(void)regerror(rc, &preg, (char *)buffer, buffer_size);
|
|
|
|
fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
|
|
|
|
}
|
|
|
|
else if ((pat_patctl.options & PCRE2_NO_AUTO_CAPTURE) != 0)
|
|
|
|
fprintf(outfile, "Matched with REG_NOSUB\n");
|
|
|
|
else if (dat_datctl.oveccount == 0)
|
|
|
|
fprintf(outfile, "Matched without capture\n");
|
|
|
|
else
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
|
|
|
|
{
|
|
|
|
if (pmatch[i].rm_so >= 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%2d: ", (int)i);
|
|
|
|
PCHARSV(dbuffer, pmatch[i].rm_so,
|
|
|
|
pmatch[i].rm_eo - pmatch[i].rm_so, FALSE, outfile);
|
|
|
|
fprintf(outfile, "\n");
|
|
|
|
if ((i == 0 && (pat_patctl.control & CTL_AFTERTEXT) != 0) ||
|
|
|
|
(pat_patctl.control & CTL_ALLAFTERTEXT) != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%2d+ ", (int)i);
|
|
|
|
PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
|
|
|
|
FALSE, outfile);
|
|
|
|
fprintf(outfile, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(pmatch);
|
|
|
|
return PR_OK;
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
2014-04-30 18:55:24 +02:00
|
|
|
|
|
|
|
|
|
|
|
#ifdef FIXME
|
|
|
|
|
|
|
|
/* Handle matching via the native interface - repeats for /g and /G */
|
|
|
|
|
|
|
|
/* Ensure that there is a JIT callback if we want to verify that JIT was
|
|
|
|
actually used. If jit_stack == NULL, no stack has yet been assigned. */
|
|
|
|
|
|
|
|
if (verify_jit && jit_stack == NULL && extra != NULL)
|
|
|
|
{ PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
|
|
|
|
|
|
|
|
for (;; gmatched++) /* Loop for /g or /G */
|
|
|
|
{
|
|
|
|
markptr = NULL;
|
|
|
|
jit_was_used = FALSE;
|
|
|
|
|
|
|
|
if (timeitm > 0)
|
|
|
|
{
|
|
|
|
register int i;
|
|
|
|
clock_t time_taken;
|
|
|
|
clock_t start_time = clock();
|
|
|
|
|
|
|
|
#if !defined NODFA
|
|
|
|
if (all_use_dfa || use_dfa)
|
|
|
|
{
|
|
|
|
if ((options & PCRE_DFA_RESTART) != 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Timing DFA restarts is not supported\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (dfa_workspace == NULL)
|
|
|
|
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
|
|
|
|
for (i = 0; i < timeitm; i++)
|
|
|
|
{
|
|
|
|
PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
|
|
|
|
(options | g_notempty), use_offsets, use_size_offsets,
|
|
|
|
dfa_workspace, DFA_WS_DIMENSION);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
|
|
|
|
for (i = 0; i < timeitm; i++)
|
|
|
|
{
|
|
|
|
PCRE_EXEC(count, re, extra, bptr, len, start_offset,
|
|
|
|
(options | g_notempty), use_offsets, use_size_offsets);
|
|
|
|
}
|
|
|
|
total_match_time += (time_taken = clock() - start_time);
|
|
|
|
fprintf(outfile, "Execute time %.4f milliseconds\n",
|
|
|
|
(((double)time_taken * 1000.0) / (double)timeitm) /
|
|
|
|
(double)CLOCKS_PER_SEC);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If find_match_limit is set, we want to do repeated matches with
|
|
|
|
varying limits in order to find the minimum value for the match limit and
|
|
|
|
for the recursion limit. The match limits are relevant only to the normal
|
|
|
|
running of pcre_exec(), so disable the JIT optimization. This makes it
|
|
|
|
possible to run the same set of tests with and without JIT externally
|
|
|
|
requested. */
|
|
|
|
|
|
|
|
if (find_match_limit)
|
|
|
|
{
|
|
|
|
if (extra != NULL) { PCRE_FREE_STUDY(extra); }
|
|
|
|
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
|
|
|
|
extra->flags = 0;
|
|
|
|
|
|
|
|
(void)check_match_limit(re, extra, bptr, len, start_offset,
|
|
|
|
options|g_notempty, use_offsets, use_size_offsets,
|
|
|
|
PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
|
|
|
|
PCRE_ERROR_MATCHLIMIT, "match()");
|
|
|
|
|
|
|
|
count = check_match_limit(re, extra, bptr, len, start_offset,
|
|
|
|
options|g_notempty, use_offsets, use_size_offsets,
|
|
|
|
PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
|
|
|
|
PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If callout_data is set, use the interface with additional data */
|
|
|
|
|
|
|
|
else if (callout_data_set)
|
|
|
|
{
|
|
|
|
if (extra == NULL)
|
|
|
|
{
|
|
|
|
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
|
|
|
|
extra->flags = 0;
|
|
|
|
}
|
|
|
|
extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
|
|
|
|
extra->callout_data = &callout_data;
|
|
|
|
PCRE_EXEC(count, re, extra, bptr, len, start_offset,
|
|
|
|
options | g_notempty, use_offsets, use_size_offsets);
|
|
|
|
extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The normal case is just to do the match once, with the default
|
|
|
|
value of match_limit. */
|
|
|
|
|
|
|
|
#if !defined NODFA
|
|
|
|
else if (all_use_dfa || use_dfa)
|
|
|
|
{
|
|
|
|
if (dfa_workspace == NULL)
|
|
|
|
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
|
|
|
|
if (dfa_matched++ == 0)
|
|
|
|
dfa_workspace[0] = -1; /* To catch bad restart */
|
|
|
|
PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
|
|
|
|
(options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
|
|
|
|
DFA_WS_DIMENSION);
|
|
|
|
if (count == 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
|
|
|
|
count = use_size_offsets/2;
|
|
|
|
}
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
#endif
|
2014-04-30 18:55:24 +02:00
|
|
|
|
|
|
|
else
|
|
|
|
{
|
|
|
|
PCRE_EXEC(count, re, extra, bptr, len, start_offset,
|
|
|
|
options | g_notempty, use_offsets, use_size_offsets);
|
|
|
|
if (count == 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Matched, but too many substrings\n");
|
|
|
|
/* 2 is a special case; match can be returned */
|
|
|
|
count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Matched */
|
|
|
|
|
|
|
|
if (count >= 0)
|
|
|
|
{
|
|
|
|
int i, maxcount;
|
|
|
|
void *cnptr, *gnptr;
|
|
|
|
|
|
|
|
#if !defined NODFA
|
|
|
|
if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
|
|
|
|
#endif
|
|
|
|
/* 2 is a special case; match can be returned */
|
|
|
|
maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
|
|
|
|
|
|
|
|
/* This is a check against a lunatic return value. */
|
|
|
|
|
|
|
|
if (count > maxcount)
|
|
|
|
{
|
|
|
|
fprintf(outfile,
|
|
|
|
"** PCRE error: returned count %d is too big for offset size %d\n",
|
|
|
|
count, use_size_offsets);
|
|
|
|
count = use_size_offsets/3;
|
|
|
|
if (do_g || do_G)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
|
|
|
|
do_g = do_G = FALSE; /* Break g/G loop */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* do_allcaps requests showing of all captures in the pattern, to check
|
|
|
|
unset ones at the end. */
|
|
|
|
|
|
|
|
if (do_allcaps)
|
|
|
|
{
|
|
|
|
if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
|
|
|
|
goto SKIP_DATA;
|
|
|
|
count++; /* Allow for full match */
|
|
|
|
if (count * 2 > use_size_offsets) count = use_size_offsets/2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Output the captured substrings. Note that, for the matched string,
|
|
|
|
the use of \K in an assertion can make the start later than the end. */
|
|
|
|
|
|
|
|
for (i = 0; i < count * 2; i += 2)
|
|
|
|
{
|
|
|
|
if (use_offsets[i] < 0)
|
|
|
|
{
|
|
|
|
if (use_offsets[i] != -1)
|
|
|
|
fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
|
|
|
|
use_offsets[i], i);
|
|
|
|
if (use_offsets[i+1] != -1)
|
|
|
|
fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
|
|
|
|
use_offsets[i+1], i+1);
|
|
|
|
fprintf(outfile, "%2d: <unset>\n", i/2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int start = use_offsets[i];
|
|
|
|
int end = use_offsets[i+1];
|
|
|
|
|
|
|
|
if (start > end)
|
|
|
|
{
|
|
|
|
start = use_offsets[i+1];
|
|
|
|
end = use_offsets[i];
|
|
|
|
fprintf(outfile, "Start of matched string is beyond its end - "
|
|
|
|
"displaying from end to start.\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(outfile, "%2d: ", i/2);
|
|
|
|
PCHARSV(bptr, start, end - start, outfile);
|
|
|
|
if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
|
|
|
|
fprintf(outfile, "\n");
|
|
|
|
|
|
|
|
/* Note: don't use the start/end variables here because we want to
|
|
|
|
show the text from what is reported as the end. */
|
|
|
|
|
|
|
|
if (do_showcaprest || (i == 0 && do_showrest))
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%2d+ ", i/2);
|
|
|
|
PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
|
|
|
|
outfile);
|
|
|
|
fprintf(outfile, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (markptr != NULL)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "MK: ");
|
|
|
|
PCHARSV(markptr, 0, -1, outfile);
|
|
|
|
fprintf(outfile, "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < 32; i++)
|
|
|
|
{
|
|
|
|
if ((copystrings & (1 << i)) != 0)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char copybuffer[256];
|
|
|
|
PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
|
|
|
|
copybuffer, sizeof(copybuffer));
|
|
|
|
if (rc < 0)
|
|
|
|
fprintf(outfile, "copy substring %d failed %d\n", i, rc);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%2dC ", i);
|
|
|
|
PCHARSV(copybuffer, 0, rc, outfile);
|
|
|
|
fprintf(outfile, " (%d)\n", rc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cnptr = copynames;
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char copybuffer[256];
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
#ifdef SUPPORT_PCRE32
|
2014-04-30 18:55:24 +02:00
|
|
|
if (pcre_mode == PCRE32_MODE)
|
|
|
|
{
|
|
|
|
if (*(pcre_uint32 *)cnptr == 0) break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
if (pcre_mode == PCRE16_MODE)
|
|
|
|
{
|
|
|
|
if (*(pcre_uint16 *)cnptr == 0) break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
if (pcre_mode == PCRE8_MODE)
|
|
|
|
{
|
|
|
|
if (*(pcre_uint8 *)cnptr == 0) break;
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
#endif
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
|
|
|
|
cnptr, copybuffer, sizeof(copybuffer));
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
if (rc < 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "copy substring ");
|
|
|
|
PCHARSV(cnptr, 0, -1, outfile);
|
|
|
|
fprintf(outfile, " failed %d\n", rc);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, " C ");
|
|
|
|
PCHARSV(copybuffer, 0, rc, outfile);
|
|
|
|
fprintf(outfile, " (%d) ", rc);
|
|
|
|
PCHARSV(cnptr, 0, -1, outfile);
|
|
|
|
putc('\n', outfile);
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < 32; i++)
|
|
|
|
{
|
|
|
|
if ((getstrings & (1 << i)) != 0)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
const char *substring;
|
|
|
|
PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
|
|
|
|
if (rc < 0)
|
|
|
|
fprintf(outfile, "get substring %d failed %d\n", i, rc);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%2dG ", i);
|
|
|
|
PCHARSV(substring, 0, rc, outfile);
|
|
|
|
fprintf(outfile, " (%d)\n", rc);
|
|
|
|
PCRE_FREE_SUBSTRING(substring);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
gnptr = getnames;
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
const char *substring;
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
if (pcre_mode == PCRE32_MODE)
|
|
|
|
{
|
|
|
|
if (*(pcre_uint32 *)gnptr == 0) break;
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
#endif
|
2014-04-30 18:55:24 +02:00
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
if (pcre_mode == PCRE16_MODE)
|
|
|
|
{
|
|
|
|
if (*(pcre_uint16 *)gnptr == 0) break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
if (pcre_mode == PCRE8_MODE)
|
|
|
|
{
|
|
|
|
if (*(pcre_uint8 *)gnptr == 0) break;
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
#endif
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
|
|
|
|
gnptr, &substring);
|
|
|
|
if (rc < 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "get substring ");
|
|
|
|
PCHARSV(gnptr, 0, -1, outfile);
|
|
|
|
fprintf(outfile, " failed %d\n", rc);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, " G ");
|
|
|
|
PCHARSV(substring, 0, rc, outfile);
|
|
|
|
fprintf(outfile, " (%d) ", rc);
|
|
|
|
PCHARSV(gnptr, 0, -1, outfile);
|
|
|
|
PCRE_FREE_SUBSTRING(substring);
|
|
|
|
putc('\n', outfile);
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
if (getlist)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
const char **stringlist;
|
|
|
|
PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
|
|
|
|
if (rc < 0)
|
|
|
|
fprintf(outfile, "get substring list failed %d\n", rc);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (i = 0; i < count; i++)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "%2dL ", i);
|
|
|
|
PCHARSV(stringlist[i], 0, -1, outfile);
|
|
|
|
putc('\n', outfile);
|
|
|
|
}
|
|
|
|
if (stringlist[i] != NULL)
|
|
|
|
fprintf(outfile, "string list not terminated by NULL\n");
|
|
|
|
PCRE_FREE_SUBSTRING_LIST(stringlist);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* There was a partial match. If the bumpalong point is not the same as
|
|
|
|
the first inspected character, show the offset explicitly. */
|
|
|
|
|
|
|
|
else if (count == PCRE_ERROR_PARTIAL)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Partial match");
|
|
|
|
if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
|
|
|
|
fprintf(outfile, " at offset %d", use_offsets[2]);
|
|
|
|
if (markptr != NULL)
|
|
|
|
{
|
|
|
|
fprintf(outfile, ", mark=");
|
|
|
|
PCHARSV(markptr, 0, -1, outfile);
|
|
|
|
}
|
|
|
|
if (use_size_offsets > 1)
|
|
|
|
{
|
|
|
|
fprintf(outfile, ": ");
|
|
|
|
PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
|
|
|
|
outfile);
|
|
|
|
}
|
|
|
|
if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
|
|
|
|
fprintf(outfile, "\n");
|
|
|
|
break; /* Out of the /g loop */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Failed to match. If this is a /g or /G loop and we previously set
|
|
|
|
g_notempty after a null match, this is not necessarily the end. We want
|
|
|
|
to advance the start offset, and continue. We won't be at the end of the
|
|
|
|
string - that was checked before setting g_notempty.
|
|
|
|
|
|
|
|
Complication arises in the case when the newline convention is "any",
|
|
|
|
"crlf", or "anycrlf". If the previous match was at the end of a line
|
|
|
|
terminated by CRLF, an advance of one character just passes the \r,
|
|
|
|
whereas we should prefer the longer newline sequence, as does the code in
|
|
|
|
pcre_exec(). Fudge the offset value to achieve this. We check for a
|
|
|
|
newline setting in the pattern; if none was set, use PCRE_CONFIG() to
|
|
|
|
find the default.
|
|
|
|
|
|
|
|
Otherwise, in the case of UTF-8 matching, the advance must be one
|
|
|
|
character, not one byte. */
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (g_notempty != 0)
|
|
|
|
{
|
|
|
|
int onechar = 1;
|
|
|
|
unsigned int obits = REAL_PCRE_OPTIONS(re);
|
|
|
|
use_offsets[0] = start_offset;
|
|
|
|
if ((obits & PCRE_NEWLINE_BITS) == 0)
|
|
|
|
{
|
|
|
|
int d;
|
|
|
|
(void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
|
|
|
|
/* Note that these values are always the ASCII ones, even in
|
|
|
|
EBCDIC environments. CR = 13, NL = 10. */
|
|
|
|
obits = (d == 13)? PCRE_NEWLINE_CR :
|
|
|
|
(d == 10)? PCRE_NEWLINE_LF :
|
|
|
|
(d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
|
|
|
|
(d == -2)? PCRE_NEWLINE_ANYCRLF :
|
|
|
|
(d == -1)? PCRE_NEWLINE_ANY : 0;
|
|
|
|
}
|
|
|
|
if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
|
|
|
|
(obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
|
|
|
|
(obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
|
|
|
|
&&
|
|
|
|
start_offset < len - 1 && (
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
(pcre_mode == PCRE8_MODE &&
|
|
|
|
bptr[start_offset] == '\r' &&
|
|
|
|
bptr[start_offset + 1] == '\n') ||
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
(pcre_mode == PCRE16_MODE &&
|
|
|
|
((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
|
|
|
|
((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
|
2014-04-18 17:37:56 +02:00
|
|
|
#endif
|
2014-04-30 18:55:24 +02:00
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
(pcre_mode == PCRE32_MODE &&
|
|
|
|
((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
|
|
|
|
((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
|
|
|
|
#endif
|
|
|
|
0))
|
|
|
|
onechar++;
|
|
|
|
else if (utf)
|
|
|
|
{
|
|
|
|
while (start_offset + onechar < len)
|
|
|
|
{
|
|
|
|
if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
|
|
|
|
onechar++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
use_offsets[1] = start_offset + onechar;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
switch(count)
|
|
|
|
{
|
|
|
|
case PCRE_ERROR_NOMATCH:
|
|
|
|
if (gmatched == 0)
|
|
|
|
{
|
|
|
|
if (markptr == NULL)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "No match");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(outfile, "No match, mark = ");
|
|
|
|
PCHARSV(markptr, 0, -1, outfile);
|
|
|
|
}
|
|
|
|
if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
|
|
|
|
putc('\n', outfile);
|
|
|
|
}
|
|
|
|
break;
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
case PCRE_ERROR_BADUTF8:
|
|
|
|
case PCRE_ERROR_SHORTUTF8:
|
|
|
|
fprintf(outfile, "Error %d (%s UTF-%d string)", count,
|
|
|
|
(count == PCRE_ERROR_BADUTF8)? "bad" : "short",
|
|
|
|
8 * CHAR_SIZE);
|
|
|
|
if (use_size_offsets >= 2)
|
|
|
|
fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
|
|
|
|
use_offsets[1]);
|
|
|
|
fprintf(outfile, "\n");
|
|
|
|
break;
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
case PCRE_ERROR_BADUTF8_OFFSET:
|
|
|
|
fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
|
|
|
|
8 * CHAR_SIZE);
|
|
|
|
break;
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
default:
|
|
|
|
if (count < 0 &&
|
|
|
|
(-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
|
|
|
|
fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
|
|
|
|
else
|
|
|
|
fprintf(outfile, "Error %d (Unexpected value)\n", count);
|
|
|
|
break;
|
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
break; /* Out of the /g loop */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If not /g or /G we are done */
|
|
|
|
|
|
|
|
if (!do_g && !do_G) break;
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* If we have matched an empty string, first check to see if we are at
|
|
|
|
the end of the subject. If so, the /g loop is over. Otherwise, mimic what
|
|
|
|
Perl's /g options does. This turns out to be rather cunning. First we set
|
|
|
|
PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
|
|
|
|
same point. If this fails (picked up above) we advance to the next
|
|
|
|
character. */
|
2014-04-18 17:37:56 +02:00
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
g_notempty = 0;
|
|
|
|
|
|
|
|
if (use_offsets[0] == use_offsets[1])
|
|
|
|
{
|
|
|
|
if (use_offsets[0] == len) break;
|
|
|
|
g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For /g, update the start offset, leaving the rest alone */
|
|
|
|
|
|
|
|
if (do_g) start_offset = use_offsets[1];
|
|
|
|
|
|
|
|
/* For /G, update the pointer and length */
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bptr += use_offsets[1] * CHAR_SIZE;
|
|
|
|
len -= use_offsets[1];
|
|
|
|
}
|
|
|
|
} /* End of loop for /g and /G */
|
|
|
|
|
|
|
|
NEXT_DATA: continue;
|
|
|
|
} /* End of loop for data lines */
|
|
|
|
|
|
|
|
#endif
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
return PR_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Print PCRE version *
|
|
|
|
*************************************************/
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* The version string was read into 'version' at the start of execution. */
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
static void
|
|
|
|
print_version(FILE *f)
|
|
|
|
{
|
|
|
|
VERSION_TYPE *vp;
|
|
|
|
fprintf(f, "PCRE version ");
|
|
|
|
for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
|
2014-04-30 18:55:24 +02:00
|
|
|
fprintf(f, "\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Print newline configuration *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* Output is always to stdout.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
rc the return code from PCRE_CONFIG_NEWLINE
|
|
|
|
isc TRUE if called from "-C newline"
|
|
|
|
Returns: nothing
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
print_newline_config(unsigned int rc, BOOL isc)
|
|
|
|
{
|
|
|
|
if (!isc) printf(" Newline sequence is ");
|
|
|
|
if (rc < sizeof(newlines)/sizeof(char *))
|
|
|
|
printf("%s\n", newlines[rc]);
|
|
|
|
else
|
|
|
|
printf("a non-standard value: %d\n", rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Usage function *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
|
|
|
printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
|
|
|
|
printf("Input and output default to stdin and stdout.\n");
|
|
|
|
#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
|
|
|
|
printf("If input is a terminal, readline() is used to read from it.\n");
|
|
|
|
#else
|
|
|
|
printf("This version of pcre2test is not linked with readline().\n");
|
|
|
|
#endif
|
|
|
|
printf("\nOptions:\n");
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
printf(" -8 use the 8-bit library\n");
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
printf(" -16 use the 16-bit library\n");
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
printf(" -32 use the 32-bit library\n");
|
|
|
|
#endif
|
2014-04-30 18:55:24 +02:00
|
|
|
printf(" -b set default pattern control 'fullbytecode'\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
printf(" -C show PCRE2 compile-time options and exit\n");
|
|
|
|
printf(" -C arg show a specific compile-time option and exit\n");
|
|
|
|
printf(" with its value if numeric (else 0). The arg can be:\n");
|
|
|
|
printf(" linksize internal link size [2, 3, 4]\n");
|
|
|
|
printf(" pcre8 8 bit library support enabled [0, 1]\n");
|
|
|
|
printf(" pcre16 16 bit library support enabled [0, 1]\n");
|
|
|
|
printf(" pcre32 32 bit library support enabled [0, 1]\n");
|
|
|
|
printf(" utf Unicode Transformation Format supported [0, 1]\n");
|
|
|
|
printf(" jit Just-in-time compiler supported [0, 1]\n");
|
|
|
|
printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
|
|
|
|
printf(" bsr \\R type [ANYCRLF, ANY]\n");
|
2014-04-30 18:55:24 +02:00
|
|
|
printf(" -d set default pattern control 'debug'\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
printf(" -data <s> set default data control fields\n");
|
|
|
|
printf(" -help show usage information\n");
|
2014-04-30 18:55:24 +02:00
|
|
|
printf(" -i set default pattern control 'info'\n");
|
2014-04-18 17:37:56 +02:00
|
|
|
printf(" -q quiet: do not output PCRE version number at start\n");
|
|
|
|
printf(" -pattern <s> set default pattern control fields\n");
|
|
|
|
printf(" -S <n> set stack size to <n> megabytes\n");
|
|
|
|
printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
|
|
|
|
printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
|
|
|
|
printf(" -T same as -t, but show total times at the end\n");
|
|
|
|
printf(" -TM same as -tm, but show total time at the end\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Handle -C option *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* This option outputs configuration options and sets an appropriate return
|
|
|
|
code when asked for a single option. The code is abstracted into a separate
|
|
|
|
function because of its size. Use whichever pcre2_config() function is
|
|
|
|
available.
|
|
|
|
|
|
|
|
Argument: an option name or NULL
|
|
|
|
Returns: the return code
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
c_option(const char *arg)
|
|
|
|
{
|
|
|
|
unsigned long int lrc;
|
|
|
|
int rc;
|
|
|
|
int yield = 0;
|
|
|
|
|
|
|
|
if (arg != NULL)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < COPTLISTCOUNT; i++)
|
|
|
|
if (strcmp(arg, coptlist[i].name) == 0) break;
|
|
|
|
|
|
|
|
if (i >= COPTLISTCOUNT)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "** Unknown -C option '%s'\n", arg);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (coptlist[i].type)
|
|
|
|
{
|
|
|
|
case CONF_BSR:
|
|
|
|
(void)PCRE2_CONFIG(coptlist[i].value, &rc);
|
|
|
|
printf("%s\n", rc? "ANYCRLF" : "ANY");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CONF_FIX:
|
|
|
|
yield = coptlist[i].value;
|
|
|
|
printf("%d\n", yield);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CONF_FIZ:
|
|
|
|
rc = coptlist[i].value;
|
|
|
|
printf("%d\n", rc);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CONF_INT:
|
|
|
|
(void)PCRE2_CONFIG(coptlist[i].value, &yield);
|
|
|
|
printf("%d\n", yield);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CONF_NL:
|
|
|
|
(void)PCRE2_CONFIG(coptlist[i].value, &rc);
|
|
|
|
print_newline_config(rc, TRUE);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For VMS, return the value by setting a symbol, for certain values only. */
|
|
|
|
|
|
|
|
#ifdef __VMS
|
|
|
|
if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
|
|
|
|
{
|
|
|
|
char ucname[16];
|
|
|
|
strcpy(ucname, coptlist[i].name);
|
|
|
|
for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i];
|
|
|
|
vms_setsymbol(ucname, 0, rc);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return yield;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* No argument for -C: output all configuration information. */
|
|
|
|
|
|
|
|
print_version(stdout);
|
|
|
|
printf("\nCompiled with\n");
|
|
|
|
|
|
|
|
#ifdef EBCDIC
|
|
|
|
printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
printf(" 8-bit support\n");
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
printf(" 16-bit support\n");
|
|
|
|
#endif
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
printf(" 32-bit support\n");
|
|
|
|
#endif
|
|
|
|
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc);
|
|
|
|
printf (" %sUTF support\n", rc ? "" : "No ");
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc);
|
|
|
|
if (rc != 0)
|
|
|
|
{
|
|
|
|
const char *arch;
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, (void *)(&arch));
|
|
|
|
printf(" Just-in-time compiler support: %s\n", arch);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
printf(" No just-in-time compiler support\n");
|
|
|
|
}
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &rc);
|
|
|
|
print_newline_config(rc, FALSE);
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &rc);
|
|
|
|
printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
|
|
|
|
"all Unicode newlines");
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_LINK_SIZE, &rc);
|
|
|
|
printf(" Internal link size = %d\n", rc);
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENS_LIMIT, &lrc);
|
|
|
|
printf(" Parentheses nest limit = %ld\n", lrc);
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCH_LIMIT, &lrc);
|
|
|
|
printf(" Default match limit = %ld\n", lrc);
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
|
|
|
|
printf(" Default recursion depth limit = %ld\n", lrc);
|
|
|
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &rc);
|
|
|
|
printf(" Match recursion uses %s", rc? "stack" : "heap");
|
|
|
|
|
|
|
|
#ifdef FIXME
|
|
|
|
if (showstore)
|
|
|
|
{
|
|
|
|
PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
|
|
|
|
printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
printf("\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Main Program *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
uint32_t yield = 0;
|
|
|
|
uint32_t op = 1;
|
|
|
|
uint32_t stack_size;
|
|
|
|
BOOL notdone = TRUE;
|
|
|
|
BOOL quiet = FALSE;
|
|
|
|
BOOL showtotaltimes = FALSE;
|
|
|
|
BOOL skipping = FALSE;
|
|
|
|
char *arg_data = NULL;
|
|
|
|
char *arg_pattern = NULL;
|
|
|
|
|
|
|
|
PCRE2_JIT_STACK *jit_stack = NULL;
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* The offsets to the options and control bits fields of the pattern and data
|
|
|
|
control blocks must be the same so that common options and controls such as
|
|
|
|
"anchored" or "memory" can work for either of them from a single table entry.
|
|
|
|
We cannot test this till runtime because "offsetof" does not work in the
|
|
|
|
preprocessor. */
|
|
|
|
|
|
|
|
if (PO(options) != DO(options) || PO(control) != DO(control))
|
|
|
|
{
|
|
|
|
fprintf(stderr, "** Coding error: "
|
|
|
|
"options and control offsets for pattern and data must be the same.\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
/* Get the PCRE version number. */
|
|
|
|
|
|
|
|
PCRE2_VERSION(version, VERSION_SIZE);
|
|
|
|
|
|
|
|
/* Get buffers from malloc() so that valgrind will check their misuse when
|
|
|
|
debugging. They grow automatically when very long lines are read. The 16-
|
|
|
|
and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
|
|
|
|
|
|
|
|
buffer = (uint8_t *)malloc(buffer_size);
|
|
|
|
pbuffer = pbuffer8 = (uint8_t *)malloc(buffer_size);
|
|
|
|
|
|
|
|
/* The following _setmode() stuff is some Windows magic that tells its runtime
|
|
|
|
library to translate CRLF into a single LF character. At least, that's what
|
|
|
|
I've been told: never having used Windows I take this all on trust. Originally
|
|
|
|
it set 0x8000, but then I was advised that _O_BINARY was better. */
|
|
|
|
|
|
|
|
#if defined(_WIN32) || defined(WIN32)
|
|
|
|
_setmode( _fileno( stdout ), _O_BINARY );
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Initialization that does not depend on the running mode. */
|
|
|
|
|
|
|
|
memset(&def_patctl, sizeof(patctl), 0);
|
|
|
|
memset(&def_datctl, sizeof(datctl), 0);
|
2014-04-30 18:55:24 +02:00
|
|
|
def_datctl.oveccount = DEFAULT_OVECCOUNT;
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
/* Scan command line options. */
|
|
|
|
|
|
|
|
while (argc > 1 && argv[op][0] == '-')
|
|
|
|
{
|
|
|
|
const char *endptr;
|
|
|
|
char *arg = argv[op];
|
|
|
|
|
|
|
|
/* Display and/or set return code for configuration options. */
|
|
|
|
|
|
|
|
if (strcmp(arg, "-C") == 0)
|
|
|
|
{
|
|
|
|
yield = c_option(argv[op + 1]);
|
|
|
|
goto EXIT;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Select operating mode */
|
|
|
|
|
|
|
|
if (strcmp(arg, "-8") == 0)
|
|
|
|
{
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
test_mode = PCRE8_MODE;
|
|
|
|
#else
|
|
|
|
fprintf(stderr,
|
|
|
|
"** This version of PCRE was built without 8-bit support\n");
|
|
|
|
exit(1);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
else if (strcmp(arg, "-16") == 0)
|
|
|
|
{
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
test_mode = PCRE16_MODE;
|
|
|
|
#else
|
|
|
|
fprintf(stderr,
|
|
|
|
"** This version of PCRE was built without 16-bit support\n");
|
|
|
|
exit(1);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
else if (strcmp(arg, "-32") == 0)
|
|
|
|
{
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
test_mode = PCRE32_MODE;
|
|
|
|
#else
|
|
|
|
fprintf(stderr,
|
|
|
|
"** This version of PCRE was built without 32-bit support\n");
|
|
|
|
exit(1);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set quiet (no version verification) */
|
|
|
|
|
|
|
|
else if (strcmp(arg, "-q") == 0) quiet = TRUE;
|
|
|
|
|
|
|
|
/* Set system stack size */
|
|
|
|
|
|
|
|
else if (strcmp(arg, "-S") == 0 && argc > 2 &&
|
|
|
|
((stack_size = get_value(argv[op+1], &endptr)), *endptr == 0))
|
|
|
|
{
|
|
|
|
#if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
|
|
|
|
fprintf(stderr, "PCRE: -S is not supported on this OS\n");
|
|
|
|
exit(1);
|
|
|
|
#else
|
|
|
|
int rc;
|
|
|
|
struct rlimit rlim;
|
|
|
|
getrlimit(RLIMIT_STACK, &rlim);
|
|
|
|
rlim.rlim_cur = stack_size * 1024 * 1024;
|
|
|
|
rc = setrlimit(RLIMIT_STACK, &rlim);
|
|
|
|
if (rc != 0)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "PCRE: setrlimit() failed with error %d\n", rc);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
op++;
|
|
|
|
argc--;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
/* Set some common pattern controls */
|
|
|
|
|
|
|
|
else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBYTECODE;
|
|
|
|
else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
|
|
|
|
else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
/* Set timing parameters */
|
|
|
|
|
|
|
|
else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
|
|
|
|
strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
|
|
|
|
{
|
|
|
|
int temp;
|
|
|
|
int both = arg[2] == 0;
|
|
|
|
showtotaltimes = arg[1] == 'T';
|
|
|
|
if (argc > 2 && (temp = get_value(argv[op+1], &endptr), *endptr == 0))
|
|
|
|
{
|
|
|
|
timeitm = temp;
|
|
|
|
op++;
|
|
|
|
argc--;
|
|
|
|
}
|
|
|
|
else timeitm = LOOPREPEAT;
|
|
|
|
if (both) timeit = timeitm;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Give help */
|
|
|
|
|
|
|
|
else if (strcmp(arg, "-help") == 0 ||
|
|
|
|
strcmp(arg, "--help") == 0)
|
|
|
|
{
|
|
|
|
usage();
|
|
|
|
goto EXIT;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The following options save their data for processing once we know what
|
|
|
|
the running mode is. */
|
|
|
|
|
|
|
|
else if (strcmp(arg, "-data") == 0)
|
|
|
|
{
|
|
|
|
arg_data = argv[op+1];
|
|
|
|
goto CHECK_VALUE_EXISTS;
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (strcmp(arg, "-pattern") == 0)
|
|
|
|
{
|
|
|
|
arg_pattern = argv[op+1];
|
|
|
|
CHECK_VALUE_EXISTS:
|
|
|
|
if (argc <= 2)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "** Missing value for %s\n", arg);
|
|
|
|
yield = 1;
|
|
|
|
goto EXIT;
|
|
|
|
}
|
|
|
|
op++;
|
|
|
|
argc--;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Unrecognized option */
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
|
|
|
|
usage();
|
|
|
|
yield = 1;
|
|
|
|
goto EXIT;
|
|
|
|
}
|
|
|
|
op++;
|
|
|
|
argc--;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize things that cannot be done until we know which test mode we are
|
|
|
|
running in. */
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
code_unit_size = test_mode/8;
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
if (test_mode == PCRE8_MODE)
|
|
|
|
{
|
|
|
|
default_pat_context8 = pcre2_compile_context_create_8(NULL);
|
|
|
|
pat_context8 = pcre2_compile_context_create_8(NULL);
|
|
|
|
default_dat_context8 = pcre2_match_context_create_8(NULL);
|
|
|
|
dat_context8 = pcre2_match_context_create_8(NULL);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
if (test_mode == PCRE16_MODE)
|
|
|
|
{
|
|
|
|
default_pat_context16 = pcre2_compile_context_create_16(NULL);
|
|
|
|
pat_context16 = pcre2_compile_context_create_16(NULL);
|
|
|
|
default_dat_context16 = pcre2_match_context_create_16(NULL);
|
|
|
|
dat_context16 = pcre2_match_context_create_16(NULL);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
if (test_mode == PCRE32_MODE)
|
|
|
|
{
|
|
|
|
default_pat_context32 = pcre2_compile_context_create_32(NULL);
|
|
|
|
pat_context32 = pcre2_compile_context_create_32(NULL);
|
|
|
|
default_dat_context32 = pcre2_match_context_create_32(NULL);
|
|
|
|
dat_context32 = pcre2_match_context_create_32(NULL);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Handle command line modifier settings, sending any error messages to
|
|
|
|
stderr. We need to know the mode before modifying the context, and it is tidier
|
|
|
|
to do them all in the same way. */
|
|
|
|
|
|
|
|
outfile = stderr;
|
|
|
|
if ((arg_pattern != NULL &&
|
|
|
|
!decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
|
|
|
|
(arg_data != NULL &&
|
|
|
|
!decode_modifiers((uint8_t *)arg_data, CTX_DEFDAT, NULL, &def_datctl)))
|
|
|
|
{
|
|
|
|
yield = 1;
|
|
|
|
goto EXIT;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sort out the input and output files, defaulting to stdin/stdout. */
|
|
|
|
|
|
|
|
infile = stdin;
|
|
|
|
outfile = stdout;
|
|
|
|
|
|
|
|
if (argc > 1)
|
|
|
|
{
|
|
|
|
infile = fopen(argv[op], INPUT_MODE);
|
|
|
|
if (infile == NULL)
|
|
|
|
{
|
|
|
|
printf("** Failed to open %s\n", argv[op]);
|
|
|
|
yield = 1;
|
|
|
|
goto EXIT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (argc > 2)
|
|
|
|
{
|
|
|
|
outfile = fopen(argv[op+1], OUTPUT_MODE);
|
|
|
|
if (outfile == NULL)
|
|
|
|
{
|
|
|
|
printf("** Failed to open %s\n", argv[op+1]);
|
|
|
|
yield = 1;
|
|
|
|
goto EXIT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Output a heading line unless quiet, then process input lines. */
|
|
|
|
|
|
|
|
if (!quiet) print_version(outfile);
|
|
|
|
|
2014-04-30 18:55:24 +02:00
|
|
|
SET(compiled_code, NULL);
|
|
|
|
preg.re_pcre2_code = NULL;
|
|
|
|
preg.re_match_data = NULL;
|
|
|
|
|
2014-04-18 17:37:56 +02:00
|
|
|
while (notdone)
|
|
|
|
{
|
|
|
|
uint8_t *p;
|
|
|
|
int rc = PR_OK;
|
|
|
|
BOOL expectdata = TEST(compiled_code, !=, NULL) || preg.re_pcre2_code != NULL;
|
|
|
|
|
|
|
|
if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
|
|
|
|
break;
|
|
|
|
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
|
|
|
|
fflush(outfile);
|
|
|
|
p = buffer;
|
|
|
|
|
|
|
|
/* If we have a pattern set up for testing, or we are skipping after a
|
|
|
|
compile failure, a blank line terminates this test; otherwise process the
|
|
|
|
line as a data line. */
|
|
|
|
|
|
|
|
if (expectdata || skipping)
|
|
|
|
{
|
|
|
|
while (isspace(*p)) p++;
|
|
|
|
if (*p == 0)
|
|
|
|
{
|
|
|
|
if (preg.re_pcre2_code != NULL)
|
2014-04-30 18:55:24 +02:00
|
|
|
{
|
2014-04-18 17:37:56 +02:00
|
|
|
regfree(&preg);
|
|
|
|
preg.re_pcre2_code = NULL;
|
2014-04-30 18:55:24 +02:00
|
|
|
preg.re_match_data = NULL;
|
2014-04-18 17:37:56 +02:00
|
|
|
}
|
2014-04-30 18:55:24 +02:00
|
|
|
if (TEST(compiled_code, !=, NULL))
|
|
|
|
{
|
2014-04-18 17:37:56 +02:00
|
|
|
SUB1(pcre2_code_free, compiled_code);
|
|
|
|
SET(compiled_code, NULL);
|
2014-04-30 18:55:24 +02:00
|
|
|
}
|
2014-04-18 17:37:56 +02:00
|
|
|
skipping = FALSE;
|
|
|
|
}
|
|
|
|
else if (!skipping) rc = process_data();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We do not have a pattern set up for testing. Lines starting with # are
|
|
|
|
either comments or special commands. Blank lines are ignored. Otherwise, the
|
|
|
|
line must start with a valid delimiter. It is then processed as a pattern
|
|
|
|
line. */
|
|
|
|
|
|
|
|
else if (*p == '#')
|
|
|
|
{
|
|
|
|
if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
|
|
|
|
rc = process_command();
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (strchr("\"/!'`-+=:;.,", *p) != NULL)
|
|
|
|
{
|
|
|
|
rc = process_pattern();
|
|
|
|
}
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (isspace(*p)) p++;
|
|
|
|
if (*p != 0)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "** Invalid pattern delimiter '%c'.\n", *buffer);
|
|
|
|
rc = PR_SKIP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rc == PR_SKIP && infile != stdin) skipping = TRUE;
|
|
|
|
else if (rc == PR_ABEND) goto EXIT;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Finish off a normal run. */
|
|
|
|
|
|
|
|
if (infile == stdin) fprintf(outfile, "\n");
|
|
|
|
|
|
|
|
if (showtotaltimes)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "--------------------------------------\n");
|
|
|
|
if (timeit > 0)
|
|
|
|
{
|
|
|
|
fprintf(outfile, "Total compile time %.4f milliseconds\n",
|
|
|
|
(((double)total_compile_time * 1000.0) / (double)timeit) /
|
|
|
|
(double)CLOCKS_PER_SEC);
|
|
|
|
}
|
|
|
|
fprintf(outfile, "Total execute time %.4f milliseconds\n",
|
|
|
|
(((double)total_match_time * 1000.0) / (double)timeitm) /
|
|
|
|
(double)CLOCKS_PER_SEC);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
EXIT:
|
|
|
|
|
|
|
|
if (infile != NULL && infile != stdin) fclose(infile);
|
|
|
|
if (outfile != NULL && outfile != stdout) fclose(outfile);
|
|
|
|
|
|
|
|
free(buffer);
|
|
|
|
free(dbuffer);
|
|
|
|
free(pbuffer);
|
2014-04-30 18:55:24 +02:00
|
|
|
regfree(&preg);
|
|
|
|
SUB1(pcre2_code_free, compiled_code);
|
2014-04-18 17:37:56 +02:00
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE8
|
|
|
|
if (pat_context8 != NULL) pcre2_compile_context_free_8(pat_context8);
|
2014-04-30 18:55:24 +02:00
|
|
|
if (default_pat_context8 != NULL)
|
2014-04-18 17:37:56 +02:00
|
|
|
pcre2_compile_context_free_8(default_pat_context8);
|
|
|
|
if (dat_context8 != NULL) pcre2_match_context_free_8(dat_context8);
|
2014-04-30 18:55:24 +02:00
|
|
|
if (default_dat_context8 != NULL)
|
2014-04-18 17:37:56 +02:00
|
|
|
pcre2_match_context_free_8(default_dat_context8);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE16
|
|
|
|
if (pbuffer16 != NULL) free(pbuffer16);
|
|
|
|
if (pat_context16 != NULL) pcre2_compile_context_free_16(pat_context16);
|
2014-04-30 18:55:24 +02:00
|
|
|
if (default_pat_context16 != NULL)
|
2014-04-18 17:37:56 +02:00
|
|
|
pcre2_compile_context_free_16(default_pat_context16);
|
|
|
|
if (dat_context16 != NULL) pcre2_match_context_free_16(dat_context16);
|
2014-04-30 18:55:24 +02:00
|
|
|
if (default_dat_context16 != NULL)
|
2014-04-18 17:37:56 +02:00
|
|
|
pcre2_match_context_free_16(default_dat_context16);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SUPPORT_PCRE32
|
|
|
|
if (pbuffer32 != NULL) free(pbuffer32);
|
|
|
|
if (pat_context32 != NULL) pcre2_compile_context_free_32(pat_context32);
|
2014-04-30 18:55:24 +02:00
|
|
|
if (default_pat_context32 != NULL)
|
2014-04-18 17:37:56 +02:00
|
|
|
pcre2_compile_context_free_32(default_pat_context32);
|
|
|
|
if (dat_context32 != NULL) pcre2_match_context_free_32(dat_context32);
|
2014-04-30 18:55:24 +02:00
|
|
|
if (default_dat_context32 != NULL)
|
2014-04-18 17:37:56 +02:00
|
|
|
pcre2_match_context_free_32(default_dat_context32);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(__VMS)
|
|
|
|
yield = SS$_NORMAL; /* Return values via DCL symbols */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* FIXME: temp avoid compiler warnings. */
|
|
|
|
|
|
|
|
(void)jit_stack;
|
|
|
|
|
|
|
|
return yield;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* End of pcre2test.c */
|