From 06ba06c763409fef9cabed6bdc7a07e8403478e8 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 18 Apr 2014 15:37:56 +0000 Subject: [PATCH] Refactor the framework for revised API. --- Makefile.am | 18 +- src/pcre2.h | 387 ++- src/pcre2.h.in | 387 ++- src/pcre2_compile.c | 37 +- src/pcre2_context.c | 334 +- src/{pcre2_dfa_exec.c => pcre2_dfa_match.c} | 12 +- src/pcre2_internal.h | 772 ++++- src/pcre2_intstructs.h | 102 + src/pcre2_jit_compile.c | 7 +- src/{pcre2_jit_exec.c => pcre2_jit_match.c} | 12 +- src/pcre2_jit_misc.c | 27 +- src/pcre2_maketables.c | 4 +- src/{pcre2_exec.c => pcre2_match.c} | 12 +- src/pcre2_match_data.c | 27 +- src/pcre2_pattern_info.c | 51 +- src/pcre2_substring.c | 137 +- src/pcre2_tables.c | 665 ++++ src/pcre2_ucd.c | 3297 ++++++++++++++++++ src/pcre2posix.c | 2 +- src/pcre2test.c | 3384 ++++++++++++++++++- 20 files changed, 8948 insertions(+), 726 deletions(-) rename src/{pcre2_dfa_exec.c => pcre2_dfa_match.c} (90%) create mode 100644 src/pcre2_intstructs.h rename src/{pcre2_jit_exec.c => pcre2_jit_match.c} (91%) rename src/{pcre2_exec.c => pcre2_match.c} (91%) create mode 100644 src/pcre2_tables.c create mode 100644 src/pcre2_ucd.c diff --git a/Makefile.am b/Makefile.am index 4889aa3..985c32c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -34,8 +34,8 @@ ACLOCAL_AMFLAGS = -I m4 # doc/html/pcre_config.html \ # doc/html/pcre_copy_named_substring.html \ # doc/html/pcre_copy_substring.html \ -# doc/html/pcre_dfa_exec.html \ -# doc/html/pcre_exec.html \ +# doc/html/pcre_dfa_match.html \ +# doc/html/pcre_match.html \ # doc/html/pcre_free_study.html \ # doc/html/pcre_free_substring.html \ # doc/html/pcre_free_substring_list.html \ @@ -45,7 +45,7 @@ ACLOCAL_AMFLAGS = -I m4 # doc/html/pcre_get_stringtable_entries.html \ # doc/html/pcre_get_substring.html \ # doc/html/pcre_get_substring_list.html \ -# doc/html/pcre_jit_exec.html \ +# doc/html/pcre_jit_match.html \ # doc/html/pcre_jit_stack_alloc.html \ # doc/html/pcre_jit_stack_free.html \ # doc/html/pcre_maketables.html \ @@ -87,8 +87,8 @@ ACLOCAL_AMFLAGS = -I m4 # doc/pcre2_config.3 \ # doc/pcre2_copy_named_substring.3 \ # doc/pcre2_copy_substring.3 \ -# doc/pcre2_dfa_exec.3 \ -# doc/pcre2_exec.3 \ +# doc/pcre2_dfa_match.3 \ +# doc/pcre2_match.3 \ # doc/pcre2_free_study.3 \ # doc/pcre2_free_substring.3 \ # doc/pcre2_free_substring_list.3 \ @@ -98,7 +98,7 @@ ACLOCAL_AMFLAGS = -I m4 # doc/pcre2_get_stringtable_entries.3 \ # doc/pcre2_get_substring.3 \ # doc/pcre2_get_substring_list.3 \ -# doc/pcre2_jit_exec.3 \ +# doc/pcre2_jit_match.3 \ # doc/pcre2_jit_stack_alloc.3 \ # doc/pcre2_jit_stack_free.3 \ # doc/pcre2_maketables.3 \ @@ -265,12 +265,12 @@ COMMON_SOURCES = \ src/pcre2_compile.c \ src/pcre2_config.c \ src/pcre2_context.c \ - src/pcre2_dfa_exec.c \ + src/pcre2_dfa_match.c \ src/pcre2_error.c \ - src/pcre2_exec.c \ + src/pcre2_match.c \ src/pcre2_internal.h \ src/pcre2_jit_compile.c \ - src/pcre2_jit_exec.c \ + src/pcre2_jit_match.c \ src/pcre2_jit_misc.c \ src/pcre2_maketables.c \ src/pcre2_match_data.c \ diff --git a/src/pcre2.h b/src/pcre2.h index 53ef296..88735c7 100644 --- a/src/pcre2.h +++ b/src/pcre2.h @@ -79,28 +79,7 @@ are defined. */ extern "C" { #endif -/* Public options. Those in the context may affect compilation, JIT -compilation, and/or interpretive execution. The following tags indicate which: - -C alters what is compiled -J alters what JIT compiles -E affects pcre2_exec() execution -D affects pcre2_dfa_exec() execution -*/ - -/* The first group of options are those that are set in the context. */ - -#define PCRE2_ALT_BSUX 0x00000001 /* C */ -#define PCRE2_DOLLAR_ENDONLY 0x00000002 /* J E D */ -#define PCRE2_DUPNAMES 0x00000004 /* C */ -#define PCRE2_ALLOW_EMPTY_CLASS 0x00000008 /* C */ -#define PCRE2_MATCH_UNSET_BACKREF 0x00000010 /* C J E */ -#define PCRE2_NEVER_UTF 0x00000020 /* C */ -#define PCRE2_NEVER_UCP 0x00000040 /* C */ -#define PCRE2_UTF 0x00000080 /* C J E D */ -#define PCRE2_UCP 0x00000100 /* C J E D */ - -/* The following can be passed to pcre2_compile(), pcre2_exec(), or +/* The following options can be passed to pcre2_compile(), pcre2_exec(), or pcre2_dfa_exec(). PCRE2_NO_UTF_CHECK affects only the function to which it is passed. */ @@ -108,44 +87,56 @@ passed. */ #define PCRE2_NO_START_OPTIMIZE 0x00000002 #define PCRE2_NO_UTF_CHECK 0x00000004 -/* These are for pcre2_compile() only, affecting what is compiled, but not -otherwise affecting execution. */ +/* Other options that can be passed to pcre2_compile(). They may affect +compilation, JIT compilation, and/or interpretive execution. The following tags +indicate which: -#define PCRE2_AUTO_CALLOUT 0x00000100 -#define PCRE2_CASELESS 0x00000200 -#define PCRE2_DOTALL 0x00000400 -#define PCRE2_EXTENDED 0x00000800 -#define PCRE2_MULTILINE 0x00001000 -#define PCRE2_NO_AUTO_CAPTURE 0x00002000 -#define PCRE2_NO_AUTO_POSSESS 0x00004000 -#define PCRE2_UNGREEDY 0x00008000 +C alters what is compiled +J alters what JIT compiles +E is inspected during pcre2_exec() execution +D is inspected during pcre2_dfa_exec() execution +*/ -/* This pcre2_compile() option affects JIT compilation and interpretive -execution. */ - -#define PCRE2_FIRSTLINE 0x00010000 +#define PCRE2_ALLOW_EMPTY_CLASS 0x00000008 /* C */ +#define PCRE2_ALT_BSUX 0x00000010 /* C */ +#define PCRE2_AUTO_CALLOUT 0x00000020 /* C */ +#define PCRE2_CASELESS 0x00000040 /* C */ +#define PCRE2_DOLLAR_ENDONLY 0x00000080 /* J E D */ +#define PCRE2_DOTALL 0x00000100 /* C */ +#define PCRE2_DUPNAMES 0x00000200 /* C */ +#define PCRE2_EXTENDED 0x00000400 /* C */ +#define PCRE2_FIRSTLINE 0x00000800 /* J E D */ +#define PCRE2_MATCH_UNSET_BACKREF 0x00001000 /* C J E */ +#define PCRE2_MULTILINE 0x00002000 /* C */ +#define PCRE2_NEVER_UCP 0x00004000 /* C */ +#define PCRE2_NEVER_UTF 0x00008000 /* C */ +#define PCRE2_NO_AUTO_CAPTURE 0x00010000 /* C */ +#define PCRE2_NO_AUTO_POSSESS 0x00020000 /* C */ +#define PCRE2_UCP 0x00040000 /* C J E D */ +#define PCRE2_UNGREEDY 0x00080000 /* C */ +#define PCRE2_UTF 0x00100000 /* C J E D */ /* These are for pcre2_jit_compile(). */ -#define PCRE2_JIT 0x00020000 -#define PCRE2_JIT_PARTIAL_SOFT 0x00040000 -#define PCRE2_JIT_PARTIAL_HARD 0x00080000 +#define PCRE2_JIT 0x00000001 /* For full matching */ +#define PCRE2_JIT_PARTIAL_SOFT 0x00000002 +#define PCRE2_JIT_PARTIAL_HARD 0x00000004 /* These are for pcre2_exec() and pcre2_dfa_exec(). */ -#define PCRE2_NOTBOL 0x00000100 -#define PCRE2_NOTEOL 0x00000200 -#define PCRE2_NOTEMPTY 0x00000400 -#define PCRE2_NOTEMPTY_ATSTART 0x00000800 -#define PCRE2_PARTIAL_SOFT 0x00001000 -#define PCRE2_PARTIAL_HARD 0x00002000 +#define PCRE2_NOTBOL 0x00000001 +#define PCRE2_NOTEOL 0x00000002 +#define PCRE2_NOTEMPTY 0x00000004 +#define PCRE2_NOTEMPTY_ATSTART 0x00000008 +#define PCRE2_PARTIAL_SOFT 0x00000010 +#define PCRE2_PARTIAL_HARD 0x00000020 /* These are additional options for pcre2_dfa_exec(). */ -#define PCRE2_DFA_RESTART 0x00010000 -#define PCRE2_DFA_SHORTEST 0x00020000 +#define PCRE2_DFA_RESTART 0x00000040 +#define PCRE2_DFA_SHORTEST 0x00000080 -/* Newline and \R settings, for use in the context. */ +/* Newline and \R settings, for use in the compile context. */ #define PCRE2_NEWLINE_CR 0 #define PCRE2_NEWLINE_LF 1 @@ -238,14 +229,14 @@ execution. */ #define PCRE2_INFO_LASTCODEUNIT 10 #define PCRE2_INFO_LASTCODETYPE 11 #define PCRE2_INFO_MATCH_EMPTY 12 -#define PCRE2_INFO_MATCHLIMIT 13 +#define PCRE2_INFO_MATCH_LIMIT 13 #define PCRE2_INFO_MAXLOOKBEHIND 14 #define PCRE2_INFO_MINLENGTH 15 #define PCRE2_INFO_NAMECOUNT 16 #define PCRE2_INFO_NAMEENTRYSIZE 17 #define PCRE2_INFO_NAMETABLE 18 #define PCRE2_INFO_PATTERN_OPTIONS 19 -#define PCRE2_INFO_RECURSIONLIMIT 20 +#define PCRE2_INFO_RECURSION_LIMIT 20 #define PCRE2_INFO_SIZE 21 /* Request types for pcre2_config(). */ @@ -262,6 +253,10 @@ execution. */ #define PCRE2_CONFIG_STACKRECURSE 9 #define PCRE2_CONFIG_UTF 10 +/* A value that is used to indicate 'unset' in unsigned size_t fields. */ + +#define PCRE2_UNSET (~(size_t)0) + /* Types for patterns and subject strings. */ typedef uint8_t PCRE2_UCHAR8; @@ -277,8 +272,14 @@ typedef const PCRE2_UCHAR32 *PCRE2_SPTR32; declarations are defined in a macro that is expanded for each width later. */ #define PCRE2_TYPES_LIST \ -struct pcre2_real_context; \ -typedef struct pcre2_real_context pcre2_context; \ +struct pcre2_real_general_context; \ +typedef struct pcre2_real_general_context pcre2_general_context; \ +\ +struct pcre2_real_compile_context; \ +typedef struct pcre2_real_compile_context pcre2_compile_context; \ +\ +struct pcre2_real_match_context; \ +typedef struct pcre2_real_match_context pcre2_match_context; \ \ struct pcre2_real_code; \ typedef struct pcre2_real_code pcre2_code; \ @@ -335,104 +336,93 @@ information. */ #define PCRE2_GENERAL_INFO_FUNCTIONS \ PCRE2_EXP_DECL int pcre2_config(int, void *); \ -PCRE2_EXP_DECL size_t pcre2_get_exec_frame_size(void); \ +PCRE2_EXP_DECL size_t pcre2_get_match_frame_size(void); \ PCRE2_EXP_DECL int pcre2_version(PCRE2_UCHAR *, size_t); /* Functions for manipulating contexts. */ -#define PCRE2_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL size_t pcre2_context_size(void); \ +#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL \ - pcre2_context *pcre2_copy_context(pcre2_context *); \ -PCRE2_EXP_DECL void pcre2_free_context(pcre2_context *); \ + pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \ PCRE2_EXP_DECL \ - pcre2_context *pcre2_init_context(pcre2_context *); \ -\ -PCRE2_EXP_DECL uint32_t pcre2_get_bsr_convention(pcre2_context *); \ -PCRE2_EXP_DECL int (*pcre2_get_callout(pcre2_context *)) \ - (pcre2_callout_block *, void *); \ + pcre2_general_context *pcre2_general_context_create( \ + void *(*)(size_t, void *), \ + void (*)(void *, void *), void *); \ +PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *); + +#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL \ - const unsigned char *pcre2_get_character_tables(pcre2_context *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_context_options(pcre2_context *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_match_limit(pcre2_context *); \ -PCRE2_EXP_DECL void pcre2_get_memory_management(pcre2_context *, \ - void *(**)(size_t, void *), \ - void (**)(void *, void *)); \ -PCRE2_EXP_DECL uint32_t pcre2_get_newline_convention(pcre2_context *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_parens_nest_limit(pcre2_context *); \ -PCRE2_EXP_DECL int (*pcre2_get_recursion_guard(pcre2_context *)) \ - (uint32_t, void *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_recursion_limit(pcre2_context *); \ -PCRE2_EXP_DECL void pcre2_get_recursion_memory_management(\ - pcre2_context *, \ - void *(**)(size_t, void *), \ - void (**)(void *, void *)); \ -PCRE2_EXP_DECL void *pcre2_get_user_data(pcre2_context *); \ -\ -PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_callout(pcre2_context *, \ - int (*)(pcre2_callout_block *, void *)); \ -PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_context *, \ + pcre2_compile_context *pcre2_compile_context_copy(pcre2_compile_context *); \ +PCRE2_EXP_DECL \ + pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\ +PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \ +PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_compile_context *, \ + uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \ const unsigned char *); \ -PCRE2_EXP_DECL int pcre2_set_context_options(pcre2_context *, uint32_t, \ +PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_compile_context *, \ uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_memory_management(pcre2_context *, \ - void * (*)(size_t, void *), \ - void (*)(void *, void *)); \ -PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_context *, \ +PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \ uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_context *, \ +PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\ + pcre2_compile_context *, int (*)(uint32_t)); \ + +#define PCRE2_MATCH_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL \ + pcre2_match_context *pcre2_match_context_copy(pcre2_match_context *); \ +PCRE2_EXP_DECL \ + pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ + int (*)(pcre2_callout_block *, void *)); \ +PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_guard(pcre2_context *, \ - int (*)(uint32_t, void *)); \ -PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_memory_management(\ - pcre2_context *, void * (*)(size_t, void *), \ - void (*)(void *, void *)); \ -PCRE2_EXP_DECL int pcre2_set_user_data(pcre2_context *, void *); +PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ + uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ + pcre2_match_context *, void *(*)(size_t, void *), \ + void (*)(void *, void *)); /* Functions concerned with compiling a pattern to PCRE internal code. */ #define PCRE2_COMPILE_FUNCTIONS \ PCRE2_EXP_DECL \ - pcre2_code *pcre2_compile(pcre2_context *, PCRE2_SPTR, int, \ - uint32_t, int *, size_t *); \ -PCRE2_EXP_DECL void pcre2_free_compiled_code(pcre2_context *, \ - pcre2_code *); + pcre2_code *pcre2_compile(PCRE2_SPTR, int, uint32_t, \ + int *, size_t *, pcre2_compile_context *); \ +PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *); /* Functions that give information about a compiled pattern. */ #define PCRE2_PATTERN_INFO_FUNCTIONS \ PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \ - void *); \ -PCRE2_EXP_DECL int pcre2_get_stringtable_entries(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_UCHAR **); \ -PCRE2_EXP_DECL int pcre2_get_substring_number(const pcre2_code *, \ - PCRE2_SPTR); + void *); /* Functions for running a match and inspecting the result. */ #define PCRE2_MATCH_FUNCTIONS \ PCRE2_EXP_DECL \ - pcre2_match_data *pcre2_create_match_data(pcre2_context *, size_t); \ -PCRE2_EXP_DECL int pcre2_dfa_exec(pcre2_context *, const pcre2_code *, \ + pcre2_match_data *pcre2_match_data_create(size_t, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL \ + pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, \ PCRE2_SPTR, int, size_t, uint32_t, \ - pcre2_match_data *, int *, size_t); \ -PCRE2_EXP_DECL int pcre2_exec(pcre2_context *, const pcre2_code *, \ + pcre2_match_data *, pcre2_match_context *, int *, \ + size_t); \ +PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \ PCRE2_SPTR, int, size_t, uint32_t, \ - pcre2_match_data *); \ -PCRE2_EXP_DECL void pcre2_free_match_data(pcre2_context *, \ - pcre2_match_data *); \ + pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \ PCRE2_EXP_DECL size_t pcre2_get_leftchar(pcre2_match_data *); \ PCRE2_EXP_DECL \ PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \ -PCRE2_EXP_DECL size_t *pcre2_get_ovector(pcre2_match_data *); \ -PCRE2_EXP_DECL size_t pcre2_get_ovector_slots(pcre2_match_data *); \ +PCRE2_EXP_DECL size_t pcre2_get_ovector_count(pcre2_match_data *); \ +PCRE2_EXP_DECL size_t *pcre2_get_ovector_pointer(pcre2_match_data *); \ PCRE2_EXP_DECL size_t pcre2_get_rightchar(pcre2_match_data *); \ PCRE2_EXP_DECL size_t pcre2_get_startchar(pcre2_match_data *); @@ -440,48 +430,51 @@ PCRE2_EXP_DECL size_t pcre2_get_startchar(pcre2_match_data *); /* Convenience functions for handling matched substrings. */ #define PCRE2_SUBSTRING_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_copy_named_substring(pcre2_match_data *, \ +PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \ PCRE2_SPTR, PCRE2_UCHAR *, size_t); \ -PCRE2_EXP_DECL int pcre2_copy_substring(pcre2_match_data *, int, \ - PCRE2_UCHAR *, size_t); \ -PCRE2_EXP_DECL void pcre2_free_substring(pcre2_context *, PCRE2_UCHAR *); \ -PCRE2_EXP_DECL void pcre2_free_substring_list(pcre2_context *, \ - PCRE2_SPTR *); \ -PCRE2_EXP_DECL int pcre2_get_named_substring(pcre2_context *, \ - pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **); \ -PCRE2_EXP_DECL int pcre2_get_named_substring_length(pcre2_match_data *, \ +PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \ + int, PCRE2_UCHAR *, size_t); \ +PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \ +PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \ + PCRE2_SPTR, PCRE2_UCHAR **); \ +PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \ + int, PCRE2_UCHAR **); \ +PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \ PCRE2_SPTR); \ -PCRE2_EXP_DECL int pcre2_get_substring(pcre2_context *, \ - pcre2_match_data *, int, PCRE2_UCHAR **); \ -PCRE2_EXP_DECL int pcre2_get_substring_length(pcre2_match_data *, int); \ -PCRE2_EXP_DECL int pcre2_get_substring_list(pcre2_context *, \ - pcre2_match_data *, PCRE2_UCHAR ***); +PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \ + int); \ +PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \ + PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_UCHAR **); \ +PCRE2_EXP_DECL int pcre2_substring_number_from_name(\ + const pcre2_code *, PCRE2_SPTR); \ +PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \ +PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \ + PCRE2_UCHAR ***, size_t **); /* Functions for JIT processing */ #define PCRE2_JIT_FUNCTIONS \ -PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_context *, pcre2_code *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_jit_exec(pcre2_context *, const pcre2_code *, \ +PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_code *, uint32_t); \ +PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \ PCRE2_SPTR, int, size_t, uint32_t, \ pcre2_match_data *, pcre2_jit_stack *); \ -PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_context *); \ +PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\ PCRE2_EXP_DECL \ - pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_context *, size_t, \ - size_t); \ -PCRE2_EXP_DECL void pcre2_jit_stack_assign(pcre2_context *, \ - const pcre2_code *, pcre2_jit_callback, void *); \ -PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_context *, \ - pcre2_jit_stack *); + pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \ + size_t, size_t); \ +PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \ + pcre2_jit_callback, void *); \ +PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *); /* Other miscellaneous functions. */ #define PCRE2_OTHER_FUNCTIONS \ PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, size_t); \ +PCRE2_EXP_DECL size_t pcre2_get_match_frame_size(void); \ PCRE2_EXP_DECL \ - const unsigned char *pcre2_maketables(pcre2_context *); \ + const unsigned char *pcre2_maketables(pcre2_general_context *); \ PCRE2_EXP_DECL int pcre2_pattern_to_host_byte_order(pcre2_code *); @@ -499,90 +492,86 @@ pcre2_compile are called by application code. */ /* Data types */ -#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) -#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) +#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) +#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) -#define pcre2_code PCRE2_SUFFIX(pcre2_code_) -#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) -#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) +#define pcre2_code PCRE2_SUFFIX(pcre2_code_) +#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) +#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) -#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) -#define pcre2_real_context PCRE2_SUFFIX(pcre2_real_context_) -#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) -#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) +#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) +#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_) +#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_) +#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_) +#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) +#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) /* Data blocks */ -#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) -#define pcre2_context PCRE2_SUFFIX(pcre2_context_) -#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) +#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) +#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_) +#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_) +#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_) +#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) /* Functions: the complete list in alphabetical order */ +#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) #define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) +#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) +#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_) +#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_) #define pcre2_config PCRE2_SUFFIX(pcre2_config_) -#define pcre2_context_size PCRE2_SUFFIX(pcre2_context_size_) -#define pcre2_copy_context PCRE2_SUFFIX(pcre2_copy_context_) -#define pcre2_copy_named_substring PCRE2_SUFFIX(pcre2_copy_named_substring_) -#define pcre2_copy_substring PCRE2_SUFFIX(pcre2_copy_substring_) -#define pcre2_create_match_data PCRE2_SUFFIX(pcre2_create_match_data_) -#define pcre2_dfa_exec PCRE2_SUFFIX(pcre2_dfa_exec_) -#define pcre2_exec PCRE2_SUFFIX(pcre2_exec_) -#define pcre2_free_compiled_code PCRE2_SUFFIX(pcre2_free_compiled_code_) -#define pcre2_free_context PCRE2_SUFFIX(pcre2_free_context_) -#define pcre2_free_match_data PCRE2_SUFFIX(pcre2_free_match_data_) -#define pcre2_free_substring PCRE2_SUFFIX(pcre2_free_substring_) -#define pcre2_free_substring_list PCRE2_SUFFIX(pcre2_free_substring_list_) -#define pcre2_get_bsr_convention PCRE2_SUFFIX(pcre2_get_bsr_convention_) -#define pcre2_get_callout PCRE2_SUFFIX(pcre2_get_callout_) -#define pcre2_get_character_tables PCRE2_SUFFIX(pcre2_get_character_tables_) -#define pcre2_get_context_options PCRE2_SUFFIX(pcre2_get_context_options_) +#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_) +#define pcre2_match PCRE2_SUFFIX(pcre2_match_) +#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_) +#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_) +#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_) #define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_) #define pcre2_get_leftchar PCRE2_SUFFIX(pcre2_get_leftchar_) #define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_) -#define pcre2_get_match_limit PCRE2_SUFFIX(pcre2_get_match_limit_) -#define pcre2_get_memory_management PCRE2_SUFFIX(pcre2_get_memory_management_) -#define pcre2_get_named_substring PCRE2_SUFFIX(pcre2_get_named_substring_) -#define pcre2_get_named_substring_length PCRE2_SUFFIX(pcre2_get_named_substring_length_) -#define pcre2_get_newline_convention PCRE2_SUFFIX(pcre2_get_newline_convention_) -#define pcre2_get_ovector PCRE2_SUFFIX(pcre2_get_ovector_) -#define pcre2_get_ovector_slots PCRE2_SUFFIX(pcre2_get_ovector_slots_) -#define pcre2_get_parens_nest_limit PCRE2_SUFFIX(pcre2_get_parens_nest_limit_) -#define pcre2_get_recursion_guard PCRE2_SUFFIX(pcre2_get_recursion_guard_) -#define pcre2_get_recursion_limit PCRE2_SUFFIX(pcre2_get_recursion_limit_) -#define pcre2_get_recursion_memory_management PCRE2_SUFFIX(pcre2_get_recursion_memory_management_) +#define pcre2_get_match_frame_size PCRE2_SUFFIX(pcre2_get_match_frame_size_) +#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_) +#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_) #define pcre2_get_rightchar PCRE2_SUFFIX(pcre2_get_rightchar_) #define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_) -#define pcre2_get_stringtable_entries PCRE2_SUFFIX(pcre2_get_stringtable_entries_) -#define pcre2_get_substring PCRE2_SUFFIX(pcre2_get_substring_) -#define pcre2_get_substring_length PCRE2_SUFFIX(pcre2_get_substring_length_) -#define pcre2_get_substring_list PCRE2_SUFFIX(pcre2_get_substring_list_) -#define pcre2_get_substring_number PCRE2_SUFFIX(pcre2_get_substring_number_) -#define pcre2_get_user_data PCRE2_SUFFIX(pcre2_get_user_data_) -#define pcre2_init_context PCRE2_SUFFIX(pcre2_init_context_) #define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_) -#define pcre2_jit_exec PCRE2_SUFFIX(pcre2_jit_exec_) +#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_) #define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_) #define pcre2_jit_stack_alloc PCRE2_SUFFIX(pcre2_jit_stack_alloc_) #define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_) #define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_) #define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_) +#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_) +#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) +#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) +#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) +#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) +#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) #define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) #define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_) #define pcre2_set_bsr_convention PCRE2_SUFFIX(pcre2_set_bsr_convention_) #define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) -#define pcre2_set_context_options PCRE2_SUFFIX(pcre2_set_context_options_) +#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) -#define pcre2_set_memory_management PCRE2_SUFFIX(pcre2_set_memory_management_) #define pcre2_set_newline_convention PCRE2_SUFFIX(pcre2_set_newline_convention_) -#define pcre2_set_recursion_guard PCRE2_SUFFIX(pcre2_set_recursion_guard_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) -#define pcre2_set_user_data PCRE2_SUFFIX(pcre2_set_user_data_) +#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) +#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) +#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_) +#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_) +#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_) +#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_) +#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_) +#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_) +#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_) +#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_) +#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_) #define pcre2_version PCRE2_SUFFIX(pcre2_version_) @@ -593,7 +582,9 @@ prototypes. */ PCRE2_TYPES_LIST PCRE2_STRUCTURE_LIST PCRE2_GENERAL_INFO_FUNCTIONS -PCRE2_CONTEXT_FUNCTIONS +PCRE2_GENERAL_CONTEXT_FUNCTIONS +PCRE2_COMPILE_CONTEXT_FUNCTIONS +PCRE2_MATCH_CONTEXT_FUNCTIONS PCRE2_COMPILE_FUNCTIONS PCRE2_PATTERN_INFO_FUNCTIONS PCRE2_MATCH_FUNCTIONS @@ -606,7 +597,9 @@ PCRE2_OTHER_FUNCTIONS PCRE2_TYPES_LIST PCRE2_STRUCTURE_LIST PCRE2_GENERAL_INFO_FUNCTIONS -PCRE2_CONTEXT_FUNCTIONS +PCRE2_GENERAL_CONTEXT_FUNCTIONS +PCRE2_COMPILE_CONTEXT_FUNCTIONS +PCRE2_MATCH_CONTEXT_FUNCTIONS PCRE2_COMPILE_FUNCTIONS PCRE2_PATTERN_INFO_FUNCTIONS PCRE2_MATCH_FUNCTIONS @@ -619,7 +612,9 @@ PCRE2_OTHER_FUNCTIONS PCRE2_TYPES_LIST PCRE2_STRUCTURE_LIST PCRE2_GENERAL_INFO_FUNCTIONS -PCRE2_CONTEXT_FUNCTIONS +PCRE2_GENERAL_CONTEXT_FUNCTIONS +PCRE2_COMPILE_CONTEXT_FUNCTIONS +PCRE2_MATCH_CONTEXT_FUNCTIONS PCRE2_COMPILE_FUNCTIONS PCRE2_PATTERN_INFO_FUNCTIONS PCRE2_MATCH_FUNCTIONS @@ -633,7 +628,9 @@ PCRE2_OTHER_FUNCTIONS #undef PCRE2_TYPES_LIST #undef PCRE2_STRUCTURE_LIST #undef PCRE2_GENERAL_INFO_FUNCTIONS -#undef PCRE2_CONTEXT_FUNCTIONS +#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS +#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS +#undef PCRE2_MATCH_CONTEXT_FUNCTIONS #undef PCRE2_COMPILE_FUNCTIONS #undef PCRE2_PATTERN_INFO_FUNCTIONS #undef PCRE2_MATCH_FUNCTIONS diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 7487ff3..153b37f 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -79,28 +79,7 @@ are defined. */ extern "C" { #endif -/* Public options. Those in the context may affect compilation, JIT -compilation, and/or interpretive execution. The following tags indicate which: - -C alters what is compiled -J alters what JIT compiles -E affects pcre2_exec() execution -D affects pcre2_dfa_exec() execution -*/ - -/* The first group of options are those that are set in the context. */ - -#define PCRE2_ALT_BSUX 0x00000001 /* C */ -#define PCRE2_DOLLAR_ENDONLY 0x00000002 /* J E D */ -#define PCRE2_DUPNAMES 0x00000004 /* C */ -#define PCRE2_ALLOW_EMPTY_CLASS 0x00000008 /* C */ -#define PCRE2_MATCH_UNSET_BACKREF 0x00000010 /* C J E */ -#define PCRE2_NEVER_UTF 0x00000020 /* C */ -#define PCRE2_NEVER_UCP 0x00000040 /* C */ -#define PCRE2_UTF 0x00000080 /* C J E D */ -#define PCRE2_UCP 0x00000100 /* C J E D */ - -/* The following can be passed to pcre2_compile(), pcre2_exec(), or +/* The following options can be passed to pcre2_compile(), pcre2_exec(), or pcre2_dfa_exec(). PCRE2_NO_UTF_CHECK affects only the function to which it is passed. */ @@ -108,44 +87,56 @@ passed. */ #define PCRE2_NO_START_OPTIMIZE 0x00000002 #define PCRE2_NO_UTF_CHECK 0x00000004 -/* These are for pcre2_compile() only, affecting what is compiled, but not -otherwise affecting execution. */ +/* Other options that can be passed to pcre2_compile(). They may affect +compilation, JIT compilation, and/or interpretive execution. The following tags +indicate which: -#define PCRE2_AUTO_CALLOUT 0x00000100 -#define PCRE2_CASELESS 0x00000200 -#define PCRE2_DOTALL 0x00000400 -#define PCRE2_EXTENDED 0x00000800 -#define PCRE2_MULTILINE 0x00001000 -#define PCRE2_NO_AUTO_CAPTURE 0x00002000 -#define PCRE2_NO_AUTO_POSSESS 0x00004000 -#define PCRE2_UNGREEDY 0x00008000 +C alters what is compiled +J alters what JIT compiles +E is inspected during pcre2_exec() execution +D is inspected during pcre2_dfa_exec() execution +*/ -/* This pcre2_compile() option affects JIT compilation and interpretive -execution. */ - -#define PCRE2_FIRSTLINE 0x00010000 +#define PCRE2_ALLOW_EMPTY_CLASS 0x00000008 /* C */ +#define PCRE2_ALT_BSUX 0x00000010 /* C */ +#define PCRE2_AUTO_CALLOUT 0x00000020 /* C */ +#define PCRE2_CASELESS 0x00000040 /* C */ +#define PCRE2_DOLLAR_ENDONLY 0x00000080 /* J E D */ +#define PCRE2_DOTALL 0x00000100 /* C */ +#define PCRE2_DUPNAMES 0x00000200 /* C */ +#define PCRE2_EXTENDED 0x00000400 /* C */ +#define PCRE2_FIRSTLINE 0x00000800 /* J E D */ +#define PCRE2_MATCH_UNSET_BACKREF 0x00001000 /* C J E */ +#define PCRE2_MULTILINE 0x00002000 /* C */ +#define PCRE2_NEVER_UCP 0x00004000 /* C */ +#define PCRE2_NEVER_UTF 0x00008000 /* C */ +#define PCRE2_NO_AUTO_CAPTURE 0x00010000 /* C */ +#define PCRE2_NO_AUTO_POSSESS 0x00020000 /* C */ +#define PCRE2_UCP 0x00040000 /* C J E D */ +#define PCRE2_UNGREEDY 0x00080000 /* C */ +#define PCRE2_UTF 0x00100000 /* C J E D */ /* These are for pcre2_jit_compile(). */ -#define PCRE2_JIT 0x00020000 -#define PCRE2_JIT_PARTIAL_SOFT 0x00040000 -#define PCRE2_JIT_PARTIAL_HARD 0x00080000 +#define PCRE2_JIT 0x00000001 /* For full matching */ +#define PCRE2_JIT_PARTIAL_SOFT 0x00000002 +#define PCRE2_JIT_PARTIAL_HARD 0x00000004 /* These are for pcre2_exec() and pcre2_dfa_exec(). */ -#define PCRE2_NOTBOL 0x00000100 -#define PCRE2_NOTEOL 0x00000200 -#define PCRE2_NOTEMPTY 0x00000400 -#define PCRE2_NOTEMPTY_ATSTART 0x00000800 -#define PCRE2_PARTIAL_SOFT 0x00001000 -#define PCRE2_PARTIAL_HARD 0x00002000 +#define PCRE2_NOTBOL 0x00000001 +#define PCRE2_NOTEOL 0x00000002 +#define PCRE2_NOTEMPTY 0x00000004 +#define PCRE2_NOTEMPTY_ATSTART 0x00000008 +#define PCRE2_PARTIAL_SOFT 0x00000010 +#define PCRE2_PARTIAL_HARD 0x00000020 /* These are additional options for pcre2_dfa_exec(). */ -#define PCRE2_DFA_RESTART 0x00010000 -#define PCRE2_DFA_SHORTEST 0x00020000 +#define PCRE2_DFA_RESTART 0x00000040 +#define PCRE2_DFA_SHORTEST 0x00000080 -/* Newline and \R settings, for use in the context. */ +/* Newline and \R settings, for use in the compile context. */ #define PCRE2_NEWLINE_CR 0 #define PCRE2_NEWLINE_LF 1 @@ -238,14 +229,14 @@ execution. */ #define PCRE2_INFO_LASTCODEUNIT 10 #define PCRE2_INFO_LASTCODETYPE 11 #define PCRE2_INFO_MATCH_EMPTY 12 -#define PCRE2_INFO_MATCHLIMIT 13 +#define PCRE2_INFO_MATCH_LIMIT 13 #define PCRE2_INFO_MAXLOOKBEHIND 14 #define PCRE2_INFO_MINLENGTH 15 #define PCRE2_INFO_NAMECOUNT 16 #define PCRE2_INFO_NAMEENTRYSIZE 17 #define PCRE2_INFO_NAMETABLE 18 #define PCRE2_INFO_PATTERN_OPTIONS 19 -#define PCRE2_INFO_RECURSIONLIMIT 20 +#define PCRE2_INFO_RECURSION_LIMIT 20 #define PCRE2_INFO_SIZE 21 /* Request types for pcre2_config(). */ @@ -262,6 +253,10 @@ execution. */ #define PCRE2_CONFIG_STACKRECURSE 9 #define PCRE2_CONFIG_UTF 10 +/* A value that is used to indicate 'unset' in unsigned size_t fields. */ + +#define PCRE2_UNSET (~(size_t)0) + /* Types for patterns and subject strings. */ typedef uint8_t PCRE2_UCHAR8; @@ -277,8 +272,14 @@ typedef const PCRE2_UCHAR32 *PCRE2_SPTR32; declarations are defined in a macro that is expanded for each width later. */ #define PCRE2_TYPES_LIST \ -struct pcre2_real_context; \ -typedef struct pcre2_real_context pcre2_context; \ +struct pcre2_real_general_context; \ +typedef struct pcre2_real_general_context pcre2_general_context; \ +\ +struct pcre2_real_compile_context; \ +typedef struct pcre2_real_compile_context pcre2_compile_context; \ +\ +struct pcre2_real_match_context; \ +typedef struct pcre2_real_match_context pcre2_match_context; \ \ struct pcre2_real_code; \ typedef struct pcre2_real_code pcre2_code; \ @@ -335,104 +336,93 @@ information. */ #define PCRE2_GENERAL_INFO_FUNCTIONS \ PCRE2_EXP_DECL int pcre2_config(int, void *); \ -PCRE2_EXP_DECL size_t pcre2_get_exec_frame_size(void); \ +PCRE2_EXP_DECL size_t pcre2_get_match_frame_size(void); \ PCRE2_EXP_DECL int pcre2_version(PCRE2_UCHAR *, size_t); /* Functions for manipulating contexts. */ -#define PCRE2_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL size_t pcre2_context_size(void); \ +#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL \ - pcre2_context *pcre2_copy_context(pcre2_context *); \ -PCRE2_EXP_DECL void pcre2_free_context(pcre2_context *); \ + pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \ PCRE2_EXP_DECL \ - pcre2_context *pcre2_init_context(pcre2_context *); \ -\ -PCRE2_EXP_DECL uint32_t pcre2_get_bsr_convention(pcre2_context *); \ -PCRE2_EXP_DECL int (*pcre2_get_callout(pcre2_context *)) \ - (pcre2_callout_block *, void *); \ + pcre2_general_context *pcre2_general_context_create( \ + void *(*)(size_t, void *), \ + void (*)(void *, void *), void *); \ +PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *); + +#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL \ - const unsigned char *pcre2_get_character_tables(pcre2_context *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_context_options(pcre2_context *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_match_limit(pcre2_context *); \ -PCRE2_EXP_DECL void pcre2_get_memory_management(pcre2_context *, \ - void *(**)(size_t, void *), \ - void (**)(void *, void *)); \ -PCRE2_EXP_DECL uint32_t pcre2_get_newline_convention(pcre2_context *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_parens_nest_limit(pcre2_context *); \ -PCRE2_EXP_DECL int (*pcre2_get_recursion_guard(pcre2_context *)) \ - (uint32_t, void *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_recursion_limit(pcre2_context *); \ -PCRE2_EXP_DECL void pcre2_get_recursion_memory_management(\ - pcre2_context *, \ - void *(**)(size_t, void *), \ - void (**)(void *, void *)); \ -PCRE2_EXP_DECL void *pcre2_get_user_data(pcre2_context *); \ -\ -PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_callout(pcre2_context *, \ - int (*)(pcre2_callout_block *, void *)); \ -PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_context *, \ + pcre2_compile_context *pcre2_compile_context_copy(pcre2_compile_context *); \ +PCRE2_EXP_DECL \ + pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\ +PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \ +PCRE2_EXP_DECL int pcre2_set_bsr_convention(pcre2_compile_context *, \ + uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \ const unsigned char *); \ -PCRE2_EXP_DECL int pcre2_set_context_options(pcre2_context *, uint32_t, \ +PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_compile_context *, \ uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_memory_management(pcre2_context *, \ - void * (*)(size_t, void *), \ - void (*)(void *, void *)); \ -PCRE2_EXP_DECL int pcre2_set_newline_convention(pcre2_context *, \ +PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \ uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_context *, \ +PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\ + pcre2_compile_context *, int (*)(uint32_t)); \ + +#define PCRE2_MATCH_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL \ + pcre2_match_context *pcre2_match_context_copy(pcre2_match_context *); \ +PCRE2_EXP_DECL \ + pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ + int (*)(pcre2_callout_block *, void *)); \ +PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_guard(pcre2_context *, \ - int (*)(uint32_t, void *)); \ -PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_memory_management(\ - pcre2_context *, void * (*)(size_t, void *), \ - void (*)(void *, void *)); \ -PCRE2_EXP_DECL int pcre2_set_user_data(pcre2_context *, void *); +PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ + uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ + pcre2_match_context *, void *(*)(size_t, void *), \ + void (*)(void *, void *)); /* Functions concerned with compiling a pattern to PCRE internal code. */ #define PCRE2_COMPILE_FUNCTIONS \ PCRE2_EXP_DECL \ - pcre2_code *pcre2_compile(pcre2_context *, PCRE2_SPTR, int, \ - uint32_t, int *, size_t *); \ -PCRE2_EXP_DECL void pcre2_free_compiled_code(pcre2_context *, \ - pcre2_code *); + pcre2_code *pcre2_compile(PCRE2_SPTR, int, uint32_t, \ + int *, size_t *, pcre2_compile_context *); \ +PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *); /* Functions that give information about a compiled pattern. */ #define PCRE2_PATTERN_INFO_FUNCTIONS \ PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \ - void *); \ -PCRE2_EXP_DECL int pcre2_get_stringtable_entries(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_UCHAR **); \ -PCRE2_EXP_DECL int pcre2_get_substring_number(const pcre2_code *, \ - PCRE2_SPTR); + void *); /* Functions for running a match and inspecting the result. */ #define PCRE2_MATCH_FUNCTIONS \ PCRE2_EXP_DECL \ - pcre2_match_data *pcre2_create_match_data(pcre2_context *, size_t); \ -PCRE2_EXP_DECL int pcre2_dfa_exec(pcre2_context *, const pcre2_code *, \ + pcre2_match_data *pcre2_match_data_create(size_t, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL \ + pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, \ PCRE2_SPTR, int, size_t, uint32_t, \ - pcre2_match_data *, int *, size_t); \ -PCRE2_EXP_DECL int pcre2_exec(pcre2_context *, const pcre2_code *, \ + pcre2_match_data *, pcre2_match_context *, int *, \ + size_t); \ +PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \ PCRE2_SPTR, int, size_t, uint32_t, \ - pcre2_match_data *); \ -PCRE2_EXP_DECL void pcre2_free_match_data(pcre2_context *, \ - pcre2_match_data *); \ + pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \ PCRE2_EXP_DECL size_t pcre2_get_leftchar(pcre2_match_data *); \ PCRE2_EXP_DECL \ PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \ -PCRE2_EXP_DECL size_t *pcre2_get_ovector(pcre2_match_data *); \ -PCRE2_EXP_DECL size_t pcre2_get_ovector_slots(pcre2_match_data *); \ +PCRE2_EXP_DECL size_t pcre2_get_ovector_count(pcre2_match_data *); \ +PCRE2_EXP_DECL size_t *pcre2_get_ovector_pointer(pcre2_match_data *); \ PCRE2_EXP_DECL size_t pcre2_get_rightchar(pcre2_match_data *); \ PCRE2_EXP_DECL size_t pcre2_get_startchar(pcre2_match_data *); @@ -440,48 +430,51 @@ PCRE2_EXP_DECL size_t pcre2_get_startchar(pcre2_match_data *); /* Convenience functions for handling matched substrings. */ #define PCRE2_SUBSTRING_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_copy_named_substring(pcre2_match_data *, \ +PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \ PCRE2_SPTR, PCRE2_UCHAR *, size_t); \ -PCRE2_EXP_DECL int pcre2_copy_substring(pcre2_match_data *, int, \ - PCRE2_UCHAR *, size_t); \ -PCRE2_EXP_DECL void pcre2_free_substring(pcre2_context *, PCRE2_UCHAR *); \ -PCRE2_EXP_DECL void pcre2_free_substring_list(pcre2_context *, \ - PCRE2_SPTR *); \ -PCRE2_EXP_DECL int pcre2_get_named_substring(pcre2_context *, \ - pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **); \ -PCRE2_EXP_DECL int pcre2_get_named_substring_length(pcre2_match_data *, \ +PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \ + int, PCRE2_UCHAR *, size_t); \ +PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \ +PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \ + PCRE2_SPTR, PCRE2_UCHAR **); \ +PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \ + int, PCRE2_UCHAR **); \ +PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \ PCRE2_SPTR); \ -PCRE2_EXP_DECL int pcre2_get_substring(pcre2_context *, \ - pcre2_match_data *, int, PCRE2_UCHAR **); \ -PCRE2_EXP_DECL int pcre2_get_substring_length(pcre2_match_data *, int); \ -PCRE2_EXP_DECL int pcre2_get_substring_list(pcre2_context *, \ - pcre2_match_data *, PCRE2_UCHAR ***); +PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \ + int); \ +PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \ + PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_UCHAR **); \ +PCRE2_EXP_DECL int pcre2_substring_number_from_name(\ + const pcre2_code *, PCRE2_SPTR); \ +PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \ +PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \ + PCRE2_UCHAR ***, size_t **); /* Functions for JIT processing */ #define PCRE2_JIT_FUNCTIONS \ -PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_context *, pcre2_code *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_jit_exec(pcre2_context *, const pcre2_code *, \ +PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_code *, uint32_t); \ +PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \ PCRE2_SPTR, int, size_t, uint32_t, \ pcre2_match_data *, pcre2_jit_stack *); \ -PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_context *); \ +PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\ PCRE2_EXP_DECL \ - pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_context *, size_t, \ - size_t); \ -PCRE2_EXP_DECL void pcre2_jit_stack_assign(pcre2_context *, \ - const pcre2_code *, pcre2_jit_callback, void *); \ -PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_context *, \ - pcre2_jit_stack *); + pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \ + size_t, size_t); \ +PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \ + pcre2_jit_callback, void *); \ +PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *); /* Other miscellaneous functions. */ #define PCRE2_OTHER_FUNCTIONS \ PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, size_t); \ +PCRE2_EXP_DECL size_t pcre2_get_match_frame_size(void); \ PCRE2_EXP_DECL \ - const unsigned char *pcre2_maketables(pcre2_context *); \ + const unsigned char *pcre2_maketables(pcre2_general_context *); \ PCRE2_EXP_DECL int pcre2_pattern_to_host_byte_order(pcre2_code *); @@ -499,90 +492,86 @@ pcre2_compile are called by application code. */ /* Data types */ -#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) -#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) +#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) +#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) -#define pcre2_code PCRE2_SUFFIX(pcre2_code_) -#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) -#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) +#define pcre2_code PCRE2_SUFFIX(pcre2_code_) +#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) +#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) -#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) -#define pcre2_real_context PCRE2_SUFFIX(pcre2_real_context_) -#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) -#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) +#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) +#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_) +#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_) +#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_) +#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) +#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) /* Data blocks */ -#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) -#define pcre2_context PCRE2_SUFFIX(pcre2_context_) -#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) +#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) +#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_) +#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_) +#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_) +#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) /* Functions: the complete list in alphabetical order */ +#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) #define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) +#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) +#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_) +#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_) #define pcre2_config PCRE2_SUFFIX(pcre2_config_) -#define pcre2_context_size PCRE2_SUFFIX(pcre2_context_size_) -#define pcre2_copy_context PCRE2_SUFFIX(pcre2_copy_context_) -#define pcre2_copy_named_substring PCRE2_SUFFIX(pcre2_copy_named_substring_) -#define pcre2_copy_substring PCRE2_SUFFIX(pcre2_copy_substring_) -#define pcre2_create_match_data PCRE2_SUFFIX(pcre2_create_match_data_) -#define pcre2_dfa_exec PCRE2_SUFFIX(pcre2_dfa_exec_) -#define pcre2_exec PCRE2_SUFFIX(pcre2_exec_) -#define pcre2_free_compiled_code PCRE2_SUFFIX(pcre2_free_compiled_code_) -#define pcre2_free_context PCRE2_SUFFIX(pcre2_free_context_) -#define pcre2_free_match_data PCRE2_SUFFIX(pcre2_free_match_data_) -#define pcre2_free_substring PCRE2_SUFFIX(pcre2_free_substring_) -#define pcre2_free_substring_list PCRE2_SUFFIX(pcre2_free_substring_list_) -#define pcre2_get_bsr_convention PCRE2_SUFFIX(pcre2_get_bsr_convention_) -#define pcre2_get_callout PCRE2_SUFFIX(pcre2_get_callout_) -#define pcre2_get_character_tables PCRE2_SUFFIX(pcre2_get_character_tables_) -#define pcre2_get_context_options PCRE2_SUFFIX(pcre2_get_context_options_) +#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_) +#define pcre2_match PCRE2_SUFFIX(pcre2_match_) +#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_) +#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_) +#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_) #define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_) #define pcre2_get_leftchar PCRE2_SUFFIX(pcre2_get_leftchar_) #define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_) -#define pcre2_get_match_limit PCRE2_SUFFIX(pcre2_get_match_limit_) -#define pcre2_get_memory_management PCRE2_SUFFIX(pcre2_get_memory_management_) -#define pcre2_get_named_substring PCRE2_SUFFIX(pcre2_get_named_substring_) -#define pcre2_get_named_substring_length PCRE2_SUFFIX(pcre2_get_named_substring_length_) -#define pcre2_get_newline_convention PCRE2_SUFFIX(pcre2_get_newline_convention_) -#define pcre2_get_ovector PCRE2_SUFFIX(pcre2_get_ovector_) -#define pcre2_get_ovector_slots PCRE2_SUFFIX(pcre2_get_ovector_slots_) -#define pcre2_get_parens_nest_limit PCRE2_SUFFIX(pcre2_get_parens_nest_limit_) -#define pcre2_get_recursion_guard PCRE2_SUFFIX(pcre2_get_recursion_guard_) -#define pcre2_get_recursion_limit PCRE2_SUFFIX(pcre2_get_recursion_limit_) -#define pcre2_get_recursion_memory_management PCRE2_SUFFIX(pcre2_get_recursion_memory_management_) +#define pcre2_get_match_frame_size PCRE2_SUFFIX(pcre2_get_match_frame_size_) +#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_) +#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_) #define pcre2_get_rightchar PCRE2_SUFFIX(pcre2_get_rightchar_) #define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_) -#define pcre2_get_stringtable_entries PCRE2_SUFFIX(pcre2_get_stringtable_entries_) -#define pcre2_get_substring PCRE2_SUFFIX(pcre2_get_substring_) -#define pcre2_get_substring_length PCRE2_SUFFIX(pcre2_get_substring_length_) -#define pcre2_get_substring_list PCRE2_SUFFIX(pcre2_get_substring_list_) -#define pcre2_get_substring_number PCRE2_SUFFIX(pcre2_get_substring_number_) -#define pcre2_get_user_data PCRE2_SUFFIX(pcre2_get_user_data_) -#define pcre2_init_context PCRE2_SUFFIX(pcre2_init_context_) #define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_) -#define pcre2_jit_exec PCRE2_SUFFIX(pcre2_jit_exec_) +#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_) #define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_) #define pcre2_jit_stack_alloc PCRE2_SUFFIX(pcre2_jit_stack_alloc_) #define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_) #define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_) #define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_) +#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_) +#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) +#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) +#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) +#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) +#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) #define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) #define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_) #define pcre2_set_bsr_convention PCRE2_SUFFIX(pcre2_set_bsr_convention_) #define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) -#define pcre2_set_context_options PCRE2_SUFFIX(pcre2_set_context_options_) +#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) -#define pcre2_set_memory_management PCRE2_SUFFIX(pcre2_set_memory_management_) #define pcre2_set_newline_convention PCRE2_SUFFIX(pcre2_set_newline_convention_) -#define pcre2_set_recursion_guard PCRE2_SUFFIX(pcre2_set_recursion_guard_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) -#define pcre2_set_user_data PCRE2_SUFFIX(pcre2_set_user_data_) +#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) +#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) +#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_) +#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_) +#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_) +#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_) +#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_) +#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_) +#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_) +#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_) +#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_) #define pcre2_version PCRE2_SUFFIX(pcre2_version_) @@ -593,7 +582,9 @@ prototypes. */ PCRE2_TYPES_LIST PCRE2_STRUCTURE_LIST PCRE2_GENERAL_INFO_FUNCTIONS -PCRE2_CONTEXT_FUNCTIONS +PCRE2_GENERAL_CONTEXT_FUNCTIONS +PCRE2_COMPILE_CONTEXT_FUNCTIONS +PCRE2_MATCH_CONTEXT_FUNCTIONS PCRE2_COMPILE_FUNCTIONS PCRE2_PATTERN_INFO_FUNCTIONS PCRE2_MATCH_FUNCTIONS @@ -606,7 +597,9 @@ PCRE2_OTHER_FUNCTIONS PCRE2_TYPES_LIST PCRE2_STRUCTURE_LIST PCRE2_GENERAL_INFO_FUNCTIONS -PCRE2_CONTEXT_FUNCTIONS +PCRE2_GENERAL_CONTEXT_FUNCTIONS +PCRE2_COMPILE_CONTEXT_FUNCTIONS +PCRE2_MATCH_CONTEXT_FUNCTIONS PCRE2_COMPILE_FUNCTIONS PCRE2_PATTERN_INFO_FUNCTIONS PCRE2_MATCH_FUNCTIONS @@ -619,7 +612,9 @@ PCRE2_OTHER_FUNCTIONS PCRE2_TYPES_LIST PCRE2_STRUCTURE_LIST PCRE2_GENERAL_INFO_FUNCTIONS -PCRE2_CONTEXT_FUNCTIONS +PCRE2_GENERAL_CONTEXT_FUNCTIONS +PCRE2_COMPILE_CONTEXT_FUNCTIONS +PCRE2_MATCH_CONTEXT_FUNCTIONS PCRE2_COMPILE_FUNCTIONS PCRE2_PATTERN_INFO_FUNCTIONS PCRE2_MATCH_FUNCTIONS @@ -633,7 +628,9 @@ PCRE2_OTHER_FUNCTIONS #undef PCRE2_TYPES_LIST #undef PCRE2_STRUCTURE_LIST #undef PCRE2_GENERAL_INFO_FUNCTIONS -#undef PCRE2_CONTEXT_FUNCTIONS +#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS +#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS +#undef PCRE2_MATCH_CONTEXT_FUNCTIONS #undef PCRE2_COMPILE_FUNCTIONS #undef PCRE2_PATTERN_INFO_FUNCTIONS #undef PCRE2_MATCH_FUNCTIONS diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 9190d4a..0587adb 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -53,9 +53,9 @@ POSSIBILITY OF SUCH DAMAGE. *************************************************/ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_free_compiled_code(pcre2_context *context, pcre2_code *code) +pcre2_code_free(pcre2_code *code) { -context=context;code=code; +code=code; return; } @@ -69,12 +69,12 @@ return; a pointer to a block of store holding a compiled version of the expression. Arguments: - context points to a PCRE2 context pattern the regular expression patlen the length of the pattern, or < 0 for zero-terminated options option bits errorcode pointer to error code variable (positive error code) erroroffset pointer for offset in pattern where error was detected + ccontext points to a compile context or is NULL Returns: pointer to compiled data block, or NULL on error, with errorcode and erroroffset set @@ -83,14 +83,35 @@ Returns: pointer to compiled data block, or NULL on error, /* FIXME: this is currently a placeholder function */ PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION -pcre2_compile(pcre2_context *context, PCRE2_SPTR pattern, int patlen, - uint32_t options, int *errorcode, size_t *erroroffset) +pcre2_compile(PCRE2_SPTR pattern, int patlen, uint32_t options, int *errorcode, + size_t *erroroffset, pcre2_compile_context *ccontext) { +pcre2_code *c = NULL; + +patlen = patlen; options = options; + +/* Fudge while testing pcre2test. */ -context = context; pattern = pattern; patlen = patlen; options = options; -*errorcode = 1; *erroroffset = 0; -return NULL; + +if (pattern[0] == 'Y') + { + c = ccontext->malloc(sizeof(pcre2_real_code), NULL); + c->magic_number = MAGIC_NUMBER; + c->size = sizeof(pcre2_real_code); + c->name_table_offset = sizeof(pcre2_real_code); + c->compile_options = options; + c->flags = PCRE2_CODE_UNIT_WIDTH/8; + c->name_count = 0; + c->name_entry_size = 0; + } + +else + { + *errorcode = 1; + } + +return c; } /* End of pcre2_compile.c */ diff --git a/src/pcre2_context.c b/src/pcre2_context.c index 4464cd4..3187de0 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -45,7 +45,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "pcre2_internal.h" -#define CSIZE sizeof(struct pcre2_real_context) /************************************************* @@ -56,171 +55,176 @@ POSSIBILITY OF SUCH DAMAGE. static void *default_malloc(size_t size, void *data) { -data = data; +(void)data; return malloc(size); } static void default_free(void *block, void *data) { -data = data; +(void)data; free(block); } /************************************************* -* Create/initialize, copy, free a context * +* Create contexts * *************************************************/ -PCRE2_EXP_DEFN size_t PCRE2_CALL_CONVENTION -pcre2_context_size(void) +PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION +pcre2_general_context_create(void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data) { -return CSIZE; +pcre2_general_context *gcontext; +if (private_malloc == NULL) private_malloc = default_malloc; +if (private_free == NULL) private_free = default_free; +gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data); +gcontext->malloc = private_malloc; +gcontext->free = private_free; +gcontext->memory_data = memory_data; +return gcontext; } -PCRE2_EXP_DEFN pcre2_context * PCRE2_CALL_CONVENTION -pcre2_init_context(pcre2_context *context) +PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION +pcre2_compile_context_create(pcre2_general_context *gcontext) { -if (context == NULL) +pcre2_compile_context *ccontext; +void *(*compile_malloc)(size_t, void *); +void (*compile_free)(void *, void *); +void *memory_data; +if (gcontext == NULL) { - context = malloc(CSIZE); - if (context == NULL) return NULL; + compile_malloc = default_malloc; + compile_free = default_free; + memory_data = NULL; } -context->callout = NULL; -context->malloc = default_malloc; -context->free = default_free; -context->stack_malloc = default_malloc; -context->stack_free = default_free; -context->stack_guard = NULL; -context->user_data = NULL; -context->tables = PRIV(default_tables); +else + { + compile_malloc = gcontext->malloc; + compile_free = gcontext->free; + memory_data = gcontext->memory_data; + } +ccontext = compile_malloc(sizeof(pcre2_real_compile_context), memory_data); +if (ccontext == NULL) return NULL; +ccontext->malloc = compile_malloc; +ccontext->free = compile_free; +ccontext->memory_data = memory_data; +ccontext->stack_guard = NULL; +ccontext->tables = PRIV(default_tables); #ifdef BSR_ANYCRLF -context->bsr_convention = PCRE2_BSR_ANYCRLF; +ccontext->bsr_convention = PCRE2_BSR_ANYCRLF; #else -context->bsr_convention = PCRE2_BSR_UNICODE; +ccontext->bsr_convention = PCRE2_BSR_UNICODE; #endif -context->newline_convention = NEWLINE; -context->options = 0; -context->match_limit = MATCH_LIMIT; -context->parens_nest_limit = PARENS_NEST_LIMIT; -context->recursion_limit = MATCH_LIMIT_RECURSION; -return context; +ccontext->newline_convention = NEWLINE; +ccontext->parens_nest_limit = PARENS_NEST_LIMIT; +return ccontext; } -PCRE2_EXP_DEFN pcre2_context * PCRE2_CALL_CONVENTION -pcre2_copy_context(pcre2_context *context) +PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION +pcre2_match_context_create(pcre2_general_context *gcontext) { -pcre2_context *new = context->malloc(CSIZE, context->user_data); -if (new == NULL) return NULL; -memcpy(new, context, CSIZE); -return new; -} - - -PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_free_context(pcre2_context *context) -{ -context->free(context, context->user_data); +pcre2_match_context *mcontext; +void *(*match_malloc)(size_t, void *); +void (*match_free)(void *, void *); +void *memory_data; +if (gcontext == NULL) + { + match_malloc = default_malloc; + match_free = default_free; + memory_data = NULL; + } +else + { + match_malloc = gcontext->malloc; + match_free = gcontext->free; + memory_data = gcontext->memory_data; + } +mcontext = match_malloc(sizeof(pcre2_real_match_context), memory_data); +if (mcontext == NULL) return NULL; +mcontext->malloc = match_malloc; +mcontext->free = match_free; +mcontext->memory_data = memory_data; +#ifdef NO_RECURSE +mcontext->stack_malloc = match_malloc; +mcontext->stack_free = match_free; +#endif +mcontext->callout = NULL; +mcontext->match_limit = MATCH_LIMIT; +mcontext->recursion_limit = MATCH_LIMIT_RECURSION; +return mcontext; } /************************************************* -* Extract settings from a context * +* Context copy functions * *************************************************/ -PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION -pcre2_get_bsr_convention(pcre2_context *context) +PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION +pcre2_general_context_copy(pcre2_general_context *gcontext) { -return context->bsr_convention; +pcre2_general_context *new = + gcontext->malloc(sizeof(pcre2_real_general_context), gcontext->memory_data); +if (new == NULL) return NULL; +memcpy(new, gcontext, sizeof(pcre2_real_general_context)); +return new; } -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -(*pcre2_get_callout(pcre2_context *context))(pcre2_callout_block *, void *) +PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION +pcre2_compile_context_copy(pcre2_compile_context *ccontext) { -return context->callout; -} - - -PCRE2_EXP_DEFN const unsigned char * PCRE2_CALL_CONVENTION -pcre2_get_character_tables(pcre2_context *context) -{ -return context->tables; +pcre2_compile_context *new = + ccontext->malloc(sizeof(pcre2_real_compile_context), ccontext->memory_data); +if (new == NULL) return NULL; +memcpy(new, ccontext, sizeof(pcre2_real_compile_context)); +return new; } -PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION -pcre2_get_context_options(pcre2_context *context) +PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION +pcre2_match_context_copy(pcre2_match_context *mcontext) { -return context->options; +pcre2_match_context *new = + mcontext->malloc(sizeof(pcre2_real_match_context), mcontext->memory_data); +if (new == NULL) return NULL; +memcpy(new, mcontext, sizeof(pcre2_real_match_context)); +return new; } -PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION -pcre2_get_match_limit(pcre2_context *context) + +/************************************************* +* Context free functions * +*************************************************/ + + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_general_context_free(pcre2_general_context *gcontext) { -return context->match_limit; -} - - -PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_get_memory_management(pcre2_context *context, - void *(**mptr)(size_t, void *), - void (**fptr)(void *, void *)) -{ -*mptr = context->malloc; -*fptr = context->free; -} - - -PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION -pcre2_get_newline_convention(pcre2_context *context) -{ -return context->newline_convention; -} - - -PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION -pcre2_get_parens_nest_limit(pcre2_context *context) -{ -return context->parens_nest_limit; -} - - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -(*pcre2_get_recursion_guard(pcre2_context *context))(uint32_t, void *) -{ -return context->stack_guard; -} - - -PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION -pcre2_get_recursion_limit(pcre2_context *context) -{ -return context->recursion_limit; +gcontext->free(gcontext, gcontext->memory_data); } PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_get_recursion_memory_management(pcre2_context *context, - void *(**mptr)(size_t, void *), - void (**fptr)(void *, void *)) +pcre2_compile_context_free(pcre2_compile_context *ccontext) { -*mptr = context->stack_malloc; -*fptr = context->stack_free; +ccontext->free(ccontext, ccontext->memory_data); } - -PCRE2_EXP_DEFN void * PCRE2_CALL_CONVENTION -pcre2_get_user_data(pcre2_context *context) + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_match_context_free(pcre2_match_context *mcontext) { -return context->user_data; +mcontext->free(mcontext, mcontext->memory_data); } + + /************************************************* * Set values in contexts * *************************************************/ @@ -228,15 +232,14 @@ return context->user_data; /* All these functions return 1 for success or 0 if invalid data is given. Only some of the functions are able to test the validity of the data. */ - PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_bsr_convention(pcre2_context *context, uint32_t value) +pcre2_set_bsr_convention(pcre2_compile_context *ccontext, uint32_t value) { switch(value) { case PCRE2_BSR_ANYCRLF: case PCRE2_BSR_UNICODE: - context->bsr_convention = value; + ccontext->bsr_convention = value; return 1; default: @@ -245,54 +248,17 @@ switch(value) } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_character_tables(pcre2_compile_context *ccontext, + const unsigned char *tables) +{ +ccontext->tables = tables; +return 1; +} + + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_callout(pcre2_context *context, - int (*callout)(pcre2_callout_block *, void *)) -{ -context->callout = callout; -return 1; -} - - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_character_tables(pcre2_context *context, const unsigned char *tables) -{ -context->tables = tables; -return 1; -} - - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_context_options(pcre2_context *context, uint32_t unset_bits, - uint32_t set_bits) -{ -if ((set_bits & ~PCRE2_CONTEXT_OPTIONS) != 0) return 0; -context->options = (context->options & ~unset_bits) | set_bits; -return 1; -} - - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_match_limit(pcre2_context *context, uint32_t limit) -{ -context->match_limit = limit; -return 1; -} - - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_memory_management(pcre2_context *context, - void * (*mymalloc)(size_t, void*), - void (*myfree)(void *, void *)) -{ -context->malloc = context->stack_malloc = mymalloc; -context->free = context->stack_free = myfree; -return 1; -} - - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_newline_convention(pcre2_context *context, uint32_t newline) +pcre2_set_newline_convention(pcre2_compile_context *ccontext, uint32_t newline) { switch(newline) { @@ -301,7 +267,7 @@ switch(newline) case PCRE2_NEWLINE_CRLF: case PCRE2_NEWLINE_ANY: case PCRE2_NEWLINE_ANYCRLF: - context->newline_convention = newline; + ccontext->newline_convention = newline; return 1; default: @@ -311,47 +277,61 @@ switch(newline) PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_parens_nest_limit(pcre2_context *context, uint32_t limit) +pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit) { -context->parens_nest_limit = limit; +ccontext->parens_nest_limit = limit; return 1; } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_recursion_guard(pcre2_context *context, - int (*guard)(uint32_t, void *)) +pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard)(uint32_t)) { -context->stack_guard = guard; +ccontext->stack_guard = guard; +return 1; +} + + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout)(pcre2_callout_block *, void *)) +{ +mcontext->callout = callout; return 1; } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_recursion_limit(pcre2_context *context, uint32_t limit) +pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit) { -context->recursion_limit = limit; +mcontext->match_limit = limit; +return 1; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit) +{ +mcontext->recursion_limit = limit; return 1; } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_recursion_memory_management(pcre2_context *context, +pcre2_set_recursion_memory_management(pcre2_match_context *mcontext, void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *)) { -context->stack_malloc = mymalloc; -context->stack_free = myfree; +#ifdef NORECURSE +mcontext->stack_malloc = mymalloc; +mcontext->stack_free = myfree; +#else +(void)mcontext; +(void)mymalloc; +(void)myfree; +#endif return 1; } - - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_user_data(pcre2_context *context, void *data) -{ -context->user_data = data; -return 1; -} /* End of pcre2_context.c */ diff --git a/src/pcre2_dfa_exec.c b/src/pcre2_dfa_match.c similarity index 90% rename from src/pcre2_dfa_exec.c rename to src/pcre2_dfa_match.c index b4d9374..9048b21 100644 --- a/src/pcre2_dfa_exec.c +++ b/src/pcre2_dfa_match.c @@ -54,13 +54,13 @@ POSSIBILITY OF SUCH DAMAGE. alternate matching algorithm that finds all matches at once. Arguments: - context points to a PCRE2 context code points to the compiled pattern subject subject string length length of subject string startoffset where to start matching in the subject options option bits match_data points to a match data structure + gcontext points to a match context workspace pointer to workspace wscount size of workspace @@ -73,16 +73,16 @@ Returns: > 0 => number of match offset pairs placed in offsets /* FIXME: this is currently a placeholder function */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_dfa_exec(pcre2_context *context, const pcre2_code *code, - PCRE2_SPTR subject, int length, size_t startoffset, uint32_t options, - pcre2_match_data *match_data, int *workspace, size_t wscount) +pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, int length, + size_t startoffset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, int *workspace, size_t wscount) { -context = context; code = code; subject = subject; length = length; +mcontext = mcontext; code = code; subject = subject; length = length; startoffset = startoffset; options = options; match_data = match_data; workspace = workspace; wscount = wscount; return PCRE2_ERROR_NOMATCH; } -/* End of pcre2_dfa_exec.c */ +/* End of pcre2_dfa_match.c */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index dae20fb..eef2e60 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -12,7 +12,10 @@ with some of the new PCRE2 context stuff added. */ #include "pcre2.h" #define PUBL(name) pcre2_##name + +#ifndef PRIV #define PRIV(name) _pcre2_##name +#endif #define PCRE2_CALL_CONVENTION @@ -21,30 +24,761 @@ extern const uint8_t PRIV(default_tables)[]; /* What follows is "real" code for PCRE2. */ + +typedef int BOOL; + +#ifndef FALSE +#define FALSE 0 +#define TRUE 1 +#endif + + +/* Valgrind (memcheck) support */ + +#ifdef SUPPORT_VALGRIND +#include +#endif + +/* When UTF encoding is being used, a character is no longer just a single +byte in 8-bit mode or a single short in 16-bit mode. The macros for character +handling generate simple sequences when used in the basic mode, and more +complicated ones for UTF characters. GETCHARLENTEST and other macros are not +used when UTF is not supported. To make sure they can never even appear when +UTF support is omitted, we don't even define them. */ + +#ifndef SUPPORT_UTF + +/* #define MAX_VALUE_FOR_SINGLE_CHAR */ +/* #define HAS_EXTRALEN(c) */ +/* #define GET_EXTRALEN(c) */ +/* #define NOT_FIRSTCHAR(c) */ +#define GETCHAR(c, eptr) c = *eptr; +#define GETCHARTEST(c, eptr) c = *eptr; +#define GETCHARINC(c, eptr) c = *eptr++; +#define GETCHARINCTEST(c, eptr) c = *eptr++; +#define GETCHARLEN(c, eptr, len) c = *eptr; +/* #define GETCHARLENTEST(c, eptr, len) */ +/* #define BACKCHAR(eptr) */ +/* #define FORWARDCHAR(eptr) */ +/* #define ACROSSCHAR(condition, eptr, action) */ + +#else /* SUPPORT_UTF */ + +/* Tests whether a UTF-8 code point needs extra bytes to decode. */ + +#define HASUTF8EXTRALEN(c) ((c) >= 0xc0) + +/* Base macro to pick up the remaining bytes of a UTF-8 character, not +advancing the pointer. */ + +#define GETUTF8(c, eptr) \ + { \ + if ((c & 0x20) == 0) \ + c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \ + else if ((c & 0x10) == 0) \ + c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ + else if ((c & 0x08) == 0) \ + c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \ + ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \ + else if ((c & 0x04) == 0) \ + c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \ + ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \ + (eptr[4] & 0x3f); \ + else \ + c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \ + ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \ + ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \ + } + +/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing +the pointer. */ + +#define GETUTF8INC(c, eptr) \ + { \ + if ((c & 0x20) == 0) \ + c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \ + else if ((c & 0x10) == 0) \ + { \ + c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \ + eptr += 2; \ + } \ + else if ((c & 0x08) == 0) \ + { \ + c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \ + ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ + eptr += 3; \ + } \ + else if ((c & 0x04) == 0) \ + { \ + c = ((c & 0x03) << 24) | ((*eptr & 0x3f) << 18) | \ + ((eptr[1] & 0x3f) << 12) | ((eptr[2] & 0x3f) << 6) | \ + (eptr[3] & 0x3f); \ + eptr += 4; \ + } \ + else \ + { \ + c = ((c & 0x01) << 30) | ((*eptr & 0x3f) << 24) | \ + ((eptr[1] & 0x3f) << 18) | ((eptr[2] & 0x3f) << 12) | \ + ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \ + eptr += 5; \ + } \ + } + +#endif /* SUPPORT_UTF */ + + +/* Private flags containing information about the compiled pattern. The first +three must not be changed, because whichever is set is actually the number of +bytes in a code unit in that mode. */ + +#define PCRE2_MODE8 0x00000001 /* compiled in 8 bit mode */ +#define PCRE2_MODE16 0x00000002 /* compiled in 16 bit mode */ +#define PCRE2_MODE32 0x00000004 /* compiled in 32 bit mode */ +#define PCRE2_FIRSTSET 0x00000010 /* first_char is set */ +#define PCRE2_FCH_CASELESS 0x00000020 /* caseless first char */ +#define PCRE2_REQCHSET 0x00000040 /* req_byte is set */ +#define PCRE2_RCH_CASELESS 0x00000080 /* caseless requested char */ +#define PCRE2_STARTLINE 0x00000100 /* start after \n for multiline */ +#define PCRE2_JCHANGED 0x00000200 /* j option used in pattern */ +#define PCRE2_HASCRORLF 0x00000400 /* explicit \r or \n in pattern */ +#define PCRE2_HASTHEN 0x00000800 /* pattern contains (*THEN) */ +#define PCRE2_MLSET 0x00001000 /* match limit set by pattern */ +#define PCRE2_RLSET 0x00002000 /* recursion limit set by pattern */ +#define PCRE2_MATCH_EMPTY 0x00004000 /* pattern can match empty string */ + +#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) + + +/* Magic number to provide a small check against being handed junk. */ + +#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ + +/* This variable is used to detect a loaded regular expression +in different endianness. */ + +#define REVERSED_MAGIC_NUMBER 0x45524350UL /* 'ERCP' */ + + + +/* -------------------- Character and string names ------------------------ */ + +/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal +character constants like '*' because the compiler would emit their EBCDIC code, +which is different from their ASCII/UTF-8 code. Instead we define macros for +the characters so that they always use the ASCII/UTF-8 code when UTF-8 support +is enabled. When UTF-8 support is not enabled, the definitions use character +literals. Both character and string versions of each character are needed, and +there are some longer strings as well. + +This means that, on EBCDIC platforms, the PCRE library can handle either +EBCDIC, or UTF-8, but not both. To support both in the same compiled library +would need different lookups depending on whether PCRE_UTF8 was set or not. +This would make it impossible to use characters in switch/case statements, +which would reduce performance. For a theoretical use (which nobody has asked +for) in a minority area (EBCDIC platforms), this is not sensible. Any +application that did need both could compile two versions of the library, using +macros to give the functions distinct names. */ + +#ifndef SUPPORT_UTF + +/* UTF-8 support is not enabled; use the platform-dependent character literals +so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF +mode. Newline characters are problematic in EBCDIC. Though it has CR and LF +characters, a common practice has been to use its NL (0x15) character as the +line terminator in C-like processing environments. However, sometimes the LF +(0x25) character is used instead, according to this Unicode document: + +http://unicode.org/standard/reports/tr13/tr13-5.html + +PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25 +instead. Whichever is *not* chosen is defined as NEL. + +In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the +same code point. */ + +#ifdef EBCDIC + +#ifndef EBCDIC_NL25 +#define CHAR_NL '\x15' +#define CHAR_NEL '\x25' +#define STR_NL "\x15" +#define STR_NEL "\x25" +#else +#define CHAR_NL '\x25' +#define CHAR_NEL '\x15' +#define STR_NL "\x25" +#define STR_NEL "\x15" +#endif + +#define CHAR_LF CHAR_NL +#define STR_LF STR_NL + +#define CHAR_ESC '\047' +#define CHAR_DEL '\007' +#define STR_ESC "\047" +#define STR_DEL "\007" + +#else /* Not EBCDIC */ + +/* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for +compatibility. NEL is the Unicode newline character; make sure it is +a positive value. */ + +#define CHAR_LF '\n' +#define CHAR_NL CHAR_LF +#define CHAR_NEL ((unsigned char)'\x85') +#define CHAR_ESC '\033' +#define CHAR_DEL '\177' + +#define STR_LF "\n" +#define STR_NL STR_LF +#define STR_NEL "\x85" +#define STR_ESC "\033" +#define STR_DEL "\177" + +#endif /* EBCDIC */ + +/* The remaining definitions work in both environments. */ + +#define CHAR_NULL '\0' +#define CHAR_HT '\t' +#define CHAR_VT '\v' +#define CHAR_FF '\f' +#define CHAR_CR '\r' +#define CHAR_BS '\b' +#define CHAR_BEL '\a' + +#define CHAR_SPACE ' ' +#define CHAR_EXCLAMATION_MARK '!' +#define CHAR_QUOTATION_MARK '"' +#define CHAR_NUMBER_SIGN '#' +#define CHAR_DOLLAR_SIGN '$' +#define CHAR_PERCENT_SIGN '%' +#define CHAR_AMPERSAND '&' +#define CHAR_APOSTROPHE '\'' +#define CHAR_LEFT_PARENTHESIS '(' +#define CHAR_RIGHT_PARENTHESIS ')' +#define CHAR_ASTERISK '*' +#define CHAR_PLUS '+' +#define CHAR_COMMA ',' +#define CHAR_MINUS '-' +#define CHAR_DOT '.' +#define CHAR_SLASH '/' +#define CHAR_0 '0' +#define CHAR_1 '1' +#define CHAR_2 '2' +#define CHAR_3 '3' +#define CHAR_4 '4' +#define CHAR_5 '5' +#define CHAR_6 '6' +#define CHAR_7 '7' +#define CHAR_8 '8' +#define CHAR_9 '9' +#define CHAR_COLON ':' +#define CHAR_SEMICOLON ';' +#define CHAR_LESS_THAN_SIGN '<' +#define CHAR_EQUALS_SIGN '=' +#define CHAR_GREATER_THAN_SIGN '>' +#define CHAR_QUESTION_MARK '?' +#define CHAR_COMMERCIAL_AT '@' +#define CHAR_A 'A' +#define CHAR_B 'B' +#define CHAR_C 'C' +#define CHAR_D 'D' +#define CHAR_E 'E' +#define CHAR_F 'F' +#define CHAR_G 'G' +#define CHAR_H 'H' +#define CHAR_I 'I' +#define CHAR_J 'J' +#define CHAR_K 'K' +#define CHAR_L 'L' +#define CHAR_M 'M' +#define CHAR_N 'N' +#define CHAR_O 'O' +#define CHAR_P 'P' +#define CHAR_Q 'Q' +#define CHAR_R 'R' +#define CHAR_S 'S' +#define CHAR_T 'T' +#define CHAR_U 'U' +#define CHAR_V 'V' +#define CHAR_W 'W' +#define CHAR_X 'X' +#define CHAR_Y 'Y' +#define CHAR_Z 'Z' +#define CHAR_LEFT_SQUARE_BRACKET '[' +#define CHAR_BACKSLASH '\\' +#define CHAR_RIGHT_SQUARE_BRACKET ']' +#define CHAR_CIRCUMFLEX_ACCENT '^' +#define CHAR_UNDERSCORE '_' +#define CHAR_GRAVE_ACCENT '`' +#define CHAR_a 'a' +#define CHAR_b 'b' +#define CHAR_c 'c' +#define CHAR_d 'd' +#define CHAR_e 'e' +#define CHAR_f 'f' +#define CHAR_g 'g' +#define CHAR_h 'h' +#define CHAR_i 'i' +#define CHAR_j 'j' +#define CHAR_k 'k' +#define CHAR_l 'l' +#define CHAR_m 'm' +#define CHAR_n 'n' +#define CHAR_o 'o' +#define CHAR_p 'p' +#define CHAR_q 'q' +#define CHAR_r 'r' +#define CHAR_s 's' +#define CHAR_t 't' +#define CHAR_u 'u' +#define CHAR_v 'v' +#define CHAR_w 'w' +#define CHAR_x 'x' +#define CHAR_y 'y' +#define CHAR_z 'z' +#define CHAR_LEFT_CURLY_BRACKET '{' +#define CHAR_VERTICAL_LINE '|' +#define CHAR_RIGHT_CURLY_BRACKET '}' +#define CHAR_TILDE '~' + +#define STR_HT "\t" +#define STR_VT "\v" +#define STR_FF "\f" +#define STR_CR "\r" +#define STR_BS "\b" +#define STR_BEL "\a" + +#define STR_SPACE " " +#define STR_EXCLAMATION_MARK "!" +#define STR_QUOTATION_MARK "\"" +#define STR_NUMBER_SIGN "#" +#define STR_DOLLAR_SIGN "$" +#define STR_PERCENT_SIGN "%" +#define STR_AMPERSAND "&" +#define STR_APOSTROPHE "'" +#define STR_LEFT_PARENTHESIS "(" +#define STR_RIGHT_PARENTHESIS ")" +#define STR_ASTERISK "*" +#define STR_PLUS "+" +#define STR_COMMA "," +#define STR_MINUS "-" +#define STR_DOT "." +#define STR_SLASH "/" +#define STR_0 "0" +#define STR_1 "1" +#define STR_2 "2" +#define STR_3 "3" +#define STR_4 "4" +#define STR_5 "5" +#define STR_6 "6" +#define STR_7 "7" +#define STR_8 "8" +#define STR_9 "9" +#define STR_COLON ":" +#define STR_SEMICOLON ";" +#define STR_LESS_THAN_SIGN "<" +#define STR_EQUALS_SIGN "=" +#define STR_GREATER_THAN_SIGN ">" +#define STR_QUESTION_MARK "?" +#define STR_COMMERCIAL_AT "@" +#define STR_A "A" +#define STR_B "B" +#define STR_C "C" +#define STR_D "D" +#define STR_E "E" +#define STR_F "F" +#define STR_G "G" +#define STR_H "H" +#define STR_I "I" +#define STR_J "J" +#define STR_K "K" +#define STR_L "L" +#define STR_M "M" +#define STR_N "N" +#define STR_O "O" +#define STR_P "P" +#define STR_Q "Q" +#define STR_R "R" +#define STR_S "S" +#define STR_T "T" +#define STR_U "U" +#define STR_V "V" +#define STR_W "W" +#define STR_X "X" +#define STR_Y "Y" +#define STR_Z "Z" +#define STR_LEFT_SQUARE_BRACKET "[" +#define STR_BACKSLASH "\\" +#define STR_RIGHT_SQUARE_BRACKET "]" +#define STR_CIRCUMFLEX_ACCENT "^" +#define STR_UNDERSCORE "_" +#define STR_GRAVE_ACCENT "`" +#define STR_a "a" +#define STR_b "b" +#define STR_c "c" +#define STR_d "d" +#define STR_e "e" +#define STR_f "f" +#define STR_g "g" +#define STR_h "h" +#define STR_i "i" +#define STR_j "j" +#define STR_k "k" +#define STR_l "l" +#define STR_m "m" +#define STR_n "n" +#define STR_o "o" +#define STR_p "p" +#define STR_q "q" +#define STR_r "r" +#define STR_s "s" +#define STR_t "t" +#define STR_u "u" +#define STR_v "v" +#define STR_w "w" +#define STR_x "x" +#define STR_y "y" +#define STR_z "z" +#define STR_LEFT_CURLY_BRACKET "{" +#define STR_VERTICAL_LINE "|" +#define STR_RIGHT_CURLY_BRACKET "}" +#define STR_TILDE "~" + +#define STRING_ACCEPT0 "ACCEPT\0" +#define STRING_COMMIT0 "COMMIT\0" +#define STRING_F0 "F\0" +#define STRING_FAIL0 "FAIL\0" +#define STRING_MARK0 "MARK\0" +#define STRING_PRUNE0 "PRUNE\0" +#define STRING_SKIP0 "SKIP\0" +#define STRING_THEN "THEN" + +#define STRING_alpha0 "alpha\0" +#define STRING_lower0 "lower\0" +#define STRING_upper0 "upper\0" +#define STRING_alnum0 "alnum\0" +#define STRING_ascii0 "ascii\0" +#define STRING_blank0 "blank\0" +#define STRING_cntrl0 "cntrl\0" +#define STRING_digit0 "digit\0" +#define STRING_graph0 "graph\0" +#define STRING_print0 "print\0" +#define STRING_punct0 "punct\0" +#define STRING_space0 "space\0" +#define STRING_word0 "word\0" +#define STRING_xdigit "xdigit" + +#define STRING_DEFINE "DEFINE" +#define STRING_WEIRD_STARTWORD "[:<:]]" +#define STRING_WEIRD_ENDWORD "[:>:]]" + +#define STRING_CR_RIGHTPAR "CR)" +#define STRING_LF_RIGHTPAR "LF)" +#define STRING_CRLF_RIGHTPAR "CRLF)" +#define STRING_ANY_RIGHTPAR "ANY)" +#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" +#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" +#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" +#define STRING_UTF8_RIGHTPAR "UTF8)" +#define STRING_UTF16_RIGHTPAR "UTF16)" +#define STRING_UTF32_RIGHTPAR "UTF32)" +#define STRING_UTF_RIGHTPAR "UTF)" +#define STRING_UCP_RIGHTPAR "UCP)" +#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)" +#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" +#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" +#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" + +#else /* SUPPORT_UTF */ + +/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This +works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode +only. */ + +#define CHAR_HT '\011' +#define CHAR_VT '\013' +#define CHAR_FF '\014' +#define CHAR_CR '\015' +#define CHAR_LF '\012' +#define CHAR_NL CHAR_LF +#define CHAR_NEL ((unsigned char)'\x85') +#define CHAR_BS '\010' +#define CHAR_BEL '\007' +#define CHAR_ESC '\033' +#define CHAR_DEL '\177' + +#define CHAR_NULL '\0' +#define CHAR_SPACE '\040' +#define CHAR_EXCLAMATION_MARK '\041' +#define CHAR_QUOTATION_MARK '\042' +#define CHAR_NUMBER_SIGN '\043' +#define CHAR_DOLLAR_SIGN '\044' +#define CHAR_PERCENT_SIGN '\045' +#define CHAR_AMPERSAND '\046' +#define CHAR_APOSTROPHE '\047' +#define CHAR_LEFT_PARENTHESIS '\050' +#define CHAR_RIGHT_PARENTHESIS '\051' +#define CHAR_ASTERISK '\052' +#define CHAR_PLUS '\053' +#define CHAR_COMMA '\054' +#define CHAR_MINUS '\055' +#define CHAR_DOT '\056' +#define CHAR_SLASH '\057' +#define CHAR_0 '\060' +#define CHAR_1 '\061' +#define CHAR_2 '\062' +#define CHAR_3 '\063' +#define CHAR_4 '\064' +#define CHAR_5 '\065' +#define CHAR_6 '\066' +#define CHAR_7 '\067' +#define CHAR_8 '\070' +#define CHAR_9 '\071' +#define CHAR_COLON '\072' +#define CHAR_SEMICOLON '\073' +#define CHAR_LESS_THAN_SIGN '\074' +#define CHAR_EQUALS_SIGN '\075' +#define CHAR_GREATER_THAN_SIGN '\076' +#define CHAR_QUESTION_MARK '\077' +#define CHAR_COMMERCIAL_AT '\100' +#define CHAR_A '\101' +#define CHAR_B '\102' +#define CHAR_C '\103' +#define CHAR_D '\104' +#define CHAR_E '\105' +#define CHAR_F '\106' +#define CHAR_G '\107' +#define CHAR_H '\110' +#define CHAR_I '\111' +#define CHAR_J '\112' +#define CHAR_K '\113' +#define CHAR_L '\114' +#define CHAR_M '\115' +#define CHAR_N '\116' +#define CHAR_O '\117' +#define CHAR_P '\120' +#define CHAR_Q '\121' +#define CHAR_R '\122' +#define CHAR_S '\123' +#define CHAR_T '\124' +#define CHAR_U '\125' +#define CHAR_V '\126' +#define CHAR_W '\127' +#define CHAR_X '\130' +#define CHAR_Y '\131' +#define CHAR_Z '\132' +#define CHAR_LEFT_SQUARE_BRACKET '\133' +#define CHAR_BACKSLASH '\134' +#define CHAR_RIGHT_SQUARE_BRACKET '\135' +#define CHAR_CIRCUMFLEX_ACCENT '\136' +#define CHAR_UNDERSCORE '\137' +#define CHAR_GRAVE_ACCENT '\140' +#define CHAR_a '\141' +#define CHAR_b '\142' +#define CHAR_c '\143' +#define CHAR_d '\144' +#define CHAR_e '\145' +#define CHAR_f '\146' +#define CHAR_g '\147' +#define CHAR_h '\150' +#define CHAR_i '\151' +#define CHAR_j '\152' +#define CHAR_k '\153' +#define CHAR_l '\154' +#define CHAR_m '\155' +#define CHAR_n '\156' +#define CHAR_o '\157' +#define CHAR_p '\160' +#define CHAR_q '\161' +#define CHAR_r '\162' +#define CHAR_s '\163' +#define CHAR_t '\164' +#define CHAR_u '\165' +#define CHAR_v '\166' +#define CHAR_w '\167' +#define CHAR_x '\170' +#define CHAR_y '\171' +#define CHAR_z '\172' +#define CHAR_LEFT_CURLY_BRACKET '\173' +#define CHAR_VERTICAL_LINE '\174' +#define CHAR_RIGHT_CURLY_BRACKET '\175' +#define CHAR_TILDE '\176' + +#define STR_HT "\011" +#define STR_VT "\013" +#define STR_FF "\014" +#define STR_CR "\015" +#define STR_NL "\012" +#define STR_BS "\010" +#define STR_BEL "\007" +#define STR_ESC "\033" +#define STR_DEL "\177" + +#define STR_SPACE "\040" +#define STR_EXCLAMATION_MARK "\041" +#define STR_QUOTATION_MARK "\042" +#define STR_NUMBER_SIGN "\043" +#define STR_DOLLAR_SIGN "\044" +#define STR_PERCENT_SIGN "\045" +#define STR_AMPERSAND "\046" +#define STR_APOSTROPHE "\047" +#define STR_LEFT_PARENTHESIS "\050" +#define STR_RIGHT_PARENTHESIS "\051" +#define STR_ASTERISK "\052" +#define STR_PLUS "\053" +#define STR_COMMA "\054" +#define STR_MINUS "\055" +#define STR_DOT "\056" +#define STR_SLASH "\057" +#define STR_0 "\060" +#define STR_1 "\061" +#define STR_2 "\062" +#define STR_3 "\063" +#define STR_4 "\064" +#define STR_5 "\065" +#define STR_6 "\066" +#define STR_7 "\067" +#define STR_8 "\070" +#define STR_9 "\071" +#define STR_COLON "\072" +#define STR_SEMICOLON "\073" +#define STR_LESS_THAN_SIGN "\074" +#define STR_EQUALS_SIGN "\075" +#define STR_GREATER_THAN_SIGN "\076" +#define STR_QUESTION_MARK "\077" +#define STR_COMMERCIAL_AT "\100" +#define STR_A "\101" +#define STR_B "\102" +#define STR_C "\103" +#define STR_D "\104" +#define STR_E "\105" +#define STR_F "\106" +#define STR_G "\107" +#define STR_H "\110" +#define STR_I "\111" +#define STR_J "\112" +#define STR_K "\113" +#define STR_L "\114" +#define STR_M "\115" +#define STR_N "\116" +#define STR_O "\117" +#define STR_P "\120" +#define STR_Q "\121" +#define STR_R "\122" +#define STR_S "\123" +#define STR_T "\124" +#define STR_U "\125" +#define STR_V "\126" +#define STR_W "\127" +#define STR_X "\130" +#define STR_Y "\131" +#define STR_Z "\132" +#define STR_LEFT_SQUARE_BRACKET "\133" +#define STR_BACKSLASH "\134" +#define STR_RIGHT_SQUARE_BRACKET "\135" +#define STR_CIRCUMFLEX_ACCENT "\136" +#define STR_UNDERSCORE "\137" +#define STR_GRAVE_ACCENT "\140" +#define STR_a "\141" +#define STR_b "\142" +#define STR_c "\143" +#define STR_d "\144" +#define STR_e "\145" +#define STR_f "\146" +#define STR_g "\147" +#define STR_h "\150" +#define STR_i "\151" +#define STR_j "\152" +#define STR_k "\153" +#define STR_l "\154" +#define STR_m "\155" +#define STR_n "\156" +#define STR_o "\157" +#define STR_p "\160" +#define STR_q "\161" +#define STR_r "\162" +#define STR_s "\163" +#define STR_t "\164" +#define STR_u "\165" +#define STR_v "\166" +#define STR_w "\167" +#define STR_x "\170" +#define STR_y "\171" +#define STR_z "\172" +#define STR_LEFT_CURLY_BRACKET "\173" +#define STR_VERTICAL_LINE "\174" +#define STR_RIGHT_CURLY_BRACKET "\175" +#define STR_TILDE "\176" + +#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0" +#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0" +#define STRING_F0 STR_F "\0" +#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0" +#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0" +#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0" +#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0" +#define STRING_THEN STR_T STR_H STR_E STR_N + +#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0" +#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0" +#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0" +#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0" +#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0" +#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0" +#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0" +#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0" +#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0" +#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0" +#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0" +#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0" +#define STRING_word0 STR_w STR_o STR_r STR_d "\0" +#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t + +#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E +#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET +#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET + +#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS +#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS +#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS +#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS +#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS +#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS +#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS +#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS +#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS +#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS +#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS +#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS +#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS +#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS +#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN +#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN + +#endif /* SUPPORT_UTF */ + +/* -------------------- End of character and string names -------------------*/ + + + /* Only these bits are allowed when setting context options. */ #define PCRE2_CONTEXT_OPTIONS (\ PCRE2_ALT_BSUX|PCRE2_DOLLAR_ENDONLY|PCRE2_DUPNAMES|PCRE2_ALLOW_EMPTY_CLASS|\ PCRE2_MATCH_UNSET_BACKREF|PCRE2_NEVER_UTF|PCRE2_NEVER_UCP|PCRE2_UTF|\ PCRE2_UCP) - -/* The real context structure */ - -struct pcre2_real_context { - const unsigned char *tables; - int (*callout)(pcre2_callout_block *, void *); - void * (*malloc)(size_t, void *); - void (*free)(void *, void *); - void * (*stack_malloc)(size_t, void *); - void (*stack_free)(void *, void *); - int (*stack_guard)(uint32_t, void *); - void *user_data; - uint16_t bsr_convention; - uint16_t newline_convention; - uint32_t options; - uint32_t match_limit; - uint32_t parens_nest_limit; - uint32_t recursion_limit; -}; + +/* The private structures used by PCRE are defined in a separate file. When +compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, so we include +them at the appropriate width. When compiling pcretest, however, that macro is +not set at this point because pcretest needs to include them at all supported +widths. */ + +#ifdef PCRE2_CODE_UNIT_WIDTH +#include "pcre2_intstructs.h" +#endif /* End of pcre2_internal.h */ diff --git a/src/pcre2_intstructs.h b/src/pcre2_intstructs.h new file mode 100644 index 0000000..f177228 --- /dev/null +++ b/src/pcre2_intstructs.h @@ -0,0 +1,102 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2014 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains the private structures needed by pcre2_internal.h. They +are kept separate so that they can be #included multiple times for different +code unit widths by pcre2test. */ + + +/* The real general context structure */ + +typedef struct pcre2_real_general_context { + void * (*malloc)(size_t, void *); + void (*free)(void *, void *); + void *memory_data; +} pcre2_real_general_context; + +/* The real compile context structure */ + +typedef struct pcre2_real_compile_context { + void * (*malloc)(size_t, void *); + void (*free)(void *, void *); + void * memory_data; + int (*stack_guard)(uint32_t); + const unsigned char *tables; + uint16_t bsr_convention; + uint16_t newline_convention; + uint32_t parens_nest_limit; +} pcre2_real_compile_context; + +/* The real match context structure. */ + +typedef struct pcre2_real_match_context { + void * (*malloc)(size_t, void *); + void (*free)(void *, void *); + void * memory_data; +#ifdef NO_RECURSE + void * (*stack_malloc)(size_t, void *); + void (*stack_free)(void *, void *); +#endif + int (*callout)(pcre2_callout_block *, void *); + uint32_t match_limit; + uint32_t recursion_limit; +} pcre2_real_match_context; + +/* The real compiled code structure */ + +typedef struct pcre2_real_code { + uint32_t magic_number; + uint32_t size; /* Total that was malloc-ed */ + uint32_t compile_options; /* Options passed to pcre2_compile() */ + uint32_t pattern_options; /* Options taken from the pattern */ + uint32_t flags; /* Various state flags */ + uint32_t limit_match; /* Limit set in the pattern */ + uint32_t limit_recursion; /* Limit set in the pattern */ + uint32_t first_char; /* Starting character */ + uint32_t req_char; /* This character must be seen */ + uint16_t max_lookbehind; /* Longest lookbehind (characters) */ + uint16_t top_bracket; /* Highest numbered group */ + uint16_t top_backref; /* Highest numbered back reference */ + uint16_t name_table_offset; /* Offset to name table that follows */ + uint16_t name_entry_size; /* Size of name items in the table */ + uint16_t name_count; /* Number of name entries in the table */ +} pcre2_real_code; + +/* End of pcre2_intstructs.h */ diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 9f723c3..bb8dfe1 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -54,7 +54,6 @@ POSSIBILITY OF SUCH DAMAGE. code. Arguments: - context points to a PCRE2 context code a compiled pattern options JIT option bits @@ -64,17 +63,15 @@ Returns: nothing /* FIXME: this is currently a placeholder function */ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_jit_compile(pcre2_context *context, pcre2_code *code, uint32_t options) +pcre2_jit_compile(pcre2_code *code, uint32_t options) { #ifndef SUPPORT_JIT -(void)context; (void)code; (void)options; #else /* SUPPORT_JIT */ -context = context; code=code; options = options; /* Dummy.... */ - +code=code; options = options; /* Dummy.... */ #endif /* SUPPORT_JIT */ } diff --git a/src/pcre2_jit_exec.c b/src/pcre2_jit_match.c similarity index 91% rename from src/pcre2_jit_exec.c rename to src/pcre2_jit_match.c index 1dd208a..5a74ad8 100644 --- a/src/pcre2_jit_exec.c +++ b/src/pcre2_jit_match.c @@ -53,7 +53,6 @@ POSSIBILITY OF SUCH DAMAGE. /* This function runs a JIT pattern match. Arguments: - context points a PCRE2 context code points to the compiled expression subject points to the subject string length length of subject string (may contain binary zeros) @@ -71,12 +70,11 @@ Returns: > 0 => success; value is the number of ovector pairs filled /* FIXME: this is currently a placeholder function */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_jit_exec(pcre2_context *context, const pcre2_code *code, - PCRE2_SPTR subject, int length, size_t start_offset, uint32_t options, - pcre2_match_data *match_data, pcre2_jit_stack *jit_stack) +pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length, + size_t start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_jit_stack *jit_stack) { #ifndef SUPPORT_JIT -(void)context; (void)code; (void)subject; (void)length; @@ -89,7 +87,7 @@ return PCRE2_ERROR_NOMATCH; /* Dummy code */ -context=context;code=code;subject=subject;length=length; +code=code;subject=subject;length=length; start_offset=start_offset; options=options; match_data=match_data; jit_stack=jit_stack; return PCRE2_ERROR_NOMATCH; @@ -97,4 +95,4 @@ return PCRE2_ERROR_NOMATCH; #endif /* SUPPORT_JIT */ } -/* End of pcre2_jit_exec.c */ +/* End of pcre2_jit_match.c */ diff --git a/src/pcre2_jit_misc.c b/src/pcre2_jit_misc.c index b4e365a..ee4ea88 100644 --- a/src/pcre2_jit_misc.c +++ b/src/pcre2_jit_misc.c @@ -55,13 +55,13 @@ POSSIBILITY OF SUCH DAMAGE. PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_jit_free_unused_memory(pcre2_context *context) +pcre2_jit_free_unused_memory(pcre2_general_context *gcontext) { #ifndef SUPPORT_JIT -(void)context; /* Suppress warning */ +(void)gcontext; /* Suppress warning */ #else /* SUPPORT_JIT */ -context=context; /* Dummy */ +gcontext=gcontext; /* Dummy */ #endif /* SUPPORT_JIT */ } @@ -72,16 +72,17 @@ context=context; /* Dummy */ *************************************************/ PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION -pcre2_jit_stack_alloc(pcre2_context *context, size_t startsize, size_t maxsize) +pcre2_jit_stack_alloc(pcre2_general_context *gcontext, size_t startsize, + size_t maxsize) { #ifndef SUPPORT_JIT -(void)context; +(void)gcontext; (void)startsize; (void)maxsize; return NULL; #else /* SUPPORT_JIT */ -context=context;startsize=startsize;maxsize=maxsize; +gcontext=gcontext;startsize=startsize;maxsize=maxsize; return NULL; #endif } @@ -92,18 +93,16 @@ return NULL; *************************************************/ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_jit_stack_assign(pcre2_context *context, const pcre2_code *code, - pcre2_jit_callback callback, void *callback_data) +pcre2_jit_stack_assign(const pcre2_code *code, pcre2_jit_callback callback, + void *callback_data) { #ifndef SUPPORT_JIT -(void)context; (void)code; (void)callback; (void)callback_data; #else /* SUPPORT_JIT */ - -context=context;code=code;callback=callback;callback_data=callback_data; +code=code;callback=callback;callback_data=callback_data; #endif /* SUPPORT_JIT */ } @@ -113,15 +112,13 @@ context=context;code=code;callback=callback;callback_data=callback_data; *************************************************/ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_jit_stack_free(pcre2_context *context, pcre2_jit_stack *jit_stack) +pcre2_jit_stack_free(pcre2_jit_stack *jit_stack) { #ifndef SUPPORT_JIT -(void)context; (void)jit_stack; #else /* SUPPORT_JIT */ - -context=context;jit_stack=jit_stack; +jit_stack=jit_stack; #endif /* SUPPORT_JIT */ } diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c index d97b01c..2df7819 100644 --- a/src/pcre2_maketables.c +++ b/src/pcre2_maketables.c @@ -70,9 +70,9 @@ Returns: pointer to the contiguous block of data /* FIXME: temporarily a dummy, until pcre2_internal is complete. */ PCRE2_EXP_DEFN const unsigned char * PCRE2_CALL_CONVENTION -pcre2_maketables(pcre2_context *context) +pcre2_maketables(pcre2_general_context *gcontext) { -context=context; +gcontext=gcontext; return NULL; #ifdef NEVER diff --git a/src/pcre2_exec.c b/src/pcre2_match.c similarity index 91% rename from src/pcre2_exec.c rename to src/pcre2_match.c index 9e868b8..75f45aa 100644 --- a/src/pcre2_exec.c +++ b/src/pcre2_match.c @@ -47,7 +47,7 @@ POSSIBILITY OF SUCH DAMAGE. /************************************************* -* Execute a Regular Expression * +* Match a Regular Expression * *************************************************/ /* This function applies a compiled re to a subject string and picks out @@ -72,13 +72,13 @@ Returns: > 0 => success; value is the number of ovector pairs filled /* FIXME: this is currently a placeholder function */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_exec(pcre2_context *context, const pcre2_code *code, - PCRE2_SPTR subject, int length, size_t start_offset, uint32_t options, - pcre2_match_data *match_data) +pcre2_match( const pcre2_code *code, PCRE2_SPTR subject, int length, + size_t start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext) { -context=context;code=code;subject=subject;length=length; +mcontext=mcontext;code=code;subject=subject;length=length; start_offset=start_offset; options=options; match_data=match_data; return PCRE2_ERROR_NOMATCH; } -/* End of pcre2_exec.c */ +/* End of pcre2_match.c */ diff --git a/src/pcre2_match_data.c b/src/pcre2_match_data.c index 57d74b2..b1fe745 100644 --- a/src/pcre2_match_data.c +++ b/src/pcre2_match_data.c @@ -49,14 +49,27 @@ POSSIBILITY OF SUCH DAMAGE. /* FIXME: these are all dummy functions */ /************************************************* -* Create a match data block * +* Create a match data block given ovector size * *************************************************/ +PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION +pcre2_match_data_create(size_t ovecsize, pcre2_general_context *gcontext) +{ +gcontext=gcontext;ovecsize=ovecsize; +return NULL; +} + + + +/************************************************* +* Create a match data block using pattern data * +*************************************************/ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION -pcre2_create_match_data(pcre2_context *context, size_t ovecsize) +pcre2_match_data_create_from_pattern(pcre2_code *code, + pcre2_general_context *gcontext) { -context=context;ovecsize=ovecsize; +code=code;gcontext=gcontext; return NULL; } @@ -67,9 +80,9 @@ return NULL; *************************************************/ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_free_match_data(pcre2_context *context, pcre2_match_data *match_data) +pcre2_match_data_free(pcre2_match_data *match_data) { -context=context;match_data=match_data; +match_data=match_data; return; } @@ -106,7 +119,7 @@ return NULL; *************************************************/ PCRE2_EXP_DEFN size_t * PCRE2_CALL_CONVENTION -pcre2_get_ovector(pcre2_match_data *match_data) +pcre2_get_ovector_pointer(pcre2_match_data *match_data) { match_data=match_data; return NULL; @@ -119,7 +132,7 @@ return NULL; *************************************************/ PCRE2_EXP_DEFN size_t PCRE2_CALL_CONVENTION -pcre2_get_ovector_slots(pcre2_match_data *match_data) +pcre2_get_ovector_count(pcre2_match_data *match_data) { match_data=match_data; return 0; diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c index c8d04e1..be2c0d0 100644 --- a/src/pcre2_pattern_info.c +++ b/src/pcre2_pattern_info.c @@ -59,13 +59,58 @@ Arguments: Returns: 0 if data returned, negative on error */ -/* FIXME: this is currently a placeholder function */ +/* FIXME: this is currently incomplete */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) { -code=code;what=what;where=where; -return -1; +const pcre2_real_code *re = (pcre2_real_code *)code; + +if (re == NULL || where == NULL) return PCRE2_ERROR_NULL; + +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ + +if (re->magic_number != MAGIC_NUMBER) + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC; + +/* Check that this pattern was compiled in the correct bit mode */ + +if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) + return PCRE2_ERROR_BADMODE; + +switch(what) + { + case PCRE2_INFO_NAMEENTRYSIZE: + *((int *)where) = re->name_entry_size; + break; + + case PCRE2_INFO_NAMECOUNT: + *((int *)where) = re->name_count; + break; + + case PCRE2_INFO_SIZE: + *((size_t *)where) = re->size; + break; + + case PCRE2_INFO_JITSIZE: +#ifdef SUPPORT_JIT + *((size_t *)where) = + (re->flags & PCRE2_EXTRA_EXECUTABLE_JIT) != 0 && + re->executable_jit != NULL)? + PRIV(jit_get_size)(re->executable_jit) : 0; +#else + *((size_t *)where) = 0; +#endif + break; + + default: return PCRE2_ERROR_BADOPTION; + } + +return 0; } /* End of pcre2_pattern_info.c */ diff --git a/src/pcre2_substring.c b/src/pcre2_substring.c index eec27d7..6b0ac7a 100644 --- a/src/pcre2_substring.c +++ b/src/pcre2_substring.c @@ -72,7 +72,7 @@ Returns: if successful: */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_copy_named_substring(pcre2_match_data *match_data, PCRE2_SPTR stringname, +pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname, PCRE2_UCHAR *buffer, size_t size) { match_data=match_data;stringname=stringname;buffer=buffer;size=size; @@ -103,7 +103,7 @@ Returns: if successful: */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_copy_substring(pcre2_match_data *match_data, int stringnumber, +pcre2_substring_copy_bynumber(pcre2_match_data *match_data, int stringnumber, PCRE2_UCHAR *buffer, size_t size) { match_data=match_data;stringnumber=stringnumber;buffer=buffer;size=size; @@ -127,9 +127,9 @@ Returns: nothing */ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_free_substring(pcre2_context *context, PCRE2_UCHAR *string) +pcre2_substring_free(PCRE2_UCHAR *string) { -context->free(string, context->user_data); +string=string; return; } @@ -151,9 +151,9 @@ Returns: nothing PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION -pcre2_free_substring_list(pcre2_context *context, PCRE2_SPTR *list) +pcre2_substring_list_free(PCRE2_SPTR *list) { -context->free(list, context->user_data); +list=list; return; } @@ -182,62 +182,10 @@ Returns: if successful: */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_get_named_substring(pcre2_context *context, pcre2_match_data *match_data, +pcre2_substring_get_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr) { -context=context;match_data=match_data;stringname=stringname;stringptr=stringptr; -return PCRE2_ERROR_NOSUBSTRING; -} - - - -/************************************************* -* Get length of a named substring * -*************************************************/ - -/* This function returns the length of a named captured substring. If the regex -permits duplicate names, the first substring that is set is chosen. - -Arguments: - match_data pointer to match data - stringname the name of the required substring - -Returns: a non-negative length if successful - a negative error code otherwise -*/ - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_get_named_substring_length(pcre2_match_data *match_data, - PCRE2_SPTR stringname) -{ -match_data=match_data;stringname=stringname; -return PCRE2_ERROR_NOSUBSTRING; -} - - - -/************************************************* -* Find (multiple) entries for named string * -*************************************************/ - -/* This is used by the local get_first_set() function, as well as being -generally available. It is used when duplicated names are permitted. - -Arguments: - code the compiled regex - stringname the name whose entries required - firstptr where to put the pointer to the first entry - lastptr where to put the pointer to the last entry - -Returns: the length of each entry, or a negative number - (PCRE2_ERROR_NOSUBSTRING) if not found -*/ - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_get_stringtable_entries(const pcre2_code *code, PCRE2_SPTR stringname, - PCRE2_UCHAR **firstptr, PCRE2_UCHAR **lastptr) -{ -code=code;stringname=stringname;firstptr=firstptr;lastptr=lastptr; +match_data=match_data;stringname=stringname;stringptr=stringptr; return PCRE2_ERROR_NOSUBSTRING; } @@ -265,16 +213,41 @@ Returns: if successful: */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_get_substring(pcre2_context *context, pcre2_match_data *match_data, - int stringnumber, PCRE2_UCHAR **stringptr) +pcre2_substring_get_bynumber(pcre2_match_data *match_data, int stringnumber, + PCRE2_UCHAR **stringptr) { -context=context;match_data=match_data;stringnumber=stringnumber; +match_data=match_data;stringnumber=stringnumber; stringptr=stringptr; return PCRE2_ERROR_NOSUBSTRING; } +/************************************************* +* Get length of a named substring * +*************************************************/ + +/* This function returns the length of a named captured substring. If the regex +permits duplicate names, the first substring that is set is chosen. + +Arguments: + match_data pointer to match data + stringname the name of the required substring + +Returns: a non-negative length if successful + a negative error code otherwise +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR stringname) +{ +match_data=match_data;stringname=stringname; +return PCRE2_ERROR_NOSUBSTRING; +} + + + /************************************************* * Get length of a numbered substring * *************************************************/ @@ -290,7 +263,7 @@ Returns: a non-negative length if successful */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_get_substring_length(pcre2_match_data *match_data, +pcre2_substring_length_bynumber(pcre2_match_data *match_data, int stringnumber) { match_data=match_data;stringnumber=stringnumber; @@ -317,10 +290,10 @@ Returns: if successful: 0 */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_get_substring_list(pcre2_context *context, pcre2_match_data *match_data, - PCRE2_UCHAR ***listptr) +pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr, + size_t **lengthsptr) { -context=context;match_data=match_data;listptr=listptr; +match_data=match_data;listptr=listptr;lengthsptr=lengthsptr; return PCRE2_ERROR_NOMEMORY; } @@ -342,11 +315,39 @@ Returns: the number of the named parentheses, or a negative number */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION - pcre2_get_substring_number(const pcre2_code *code, PCRE2_SPTR stringname) +pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR stringname) { code=code;stringname=stringname; return PCRE2_ERROR_NOSUBSTRING; } + +/************************************************* +* Find (multiple) entries for named string * +*************************************************/ + +/* This is used by the local get_first_set() function, as well as being +generally available. It is used when duplicated names are permitted. + +Arguments: + code the compiled regex + stringname the name whose entries required + firstptr where to put the pointer to the first entry + lastptr where to put the pointer to the last entry + +Returns: the length of each entry, or a negative number + (PCRE2_ERROR_NOSUBSTRING) if not found +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname, + PCRE2_UCHAR **firstptr, PCRE2_UCHAR **lastptr) +{ +code=code;stringname=stringname;firstptr=firstptr;lastptr=lastptr; +return PCRE2_ERROR_NOSUBSTRING; +} + + /* End of pcre2_substring.c */ diff --git a/src/pcre2_tables.c b/src/pcre2_tables.c new file mode 100644 index 0000000..6c3def5 --- /dev/null +++ b/src/pcre2_tables.c @@ -0,0 +1,665 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2014 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2_INCLUDED + +/* This module contains some fixed tables that are used by more than one of the +PCRE code modules. The tables are also #included by the pcre2test program, +which uses macros to change their names from _pcre2_xxx to xxxx, thereby +avoiding name clashes with the library. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" +#endif /* PCRE2_INCLUDED */ + + +#ifdef FIXME + +/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that +the definition is next to the definition of the opcodes in pcre2_internal.h. */ + +const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS }; + +/* Tables of horizontal and vertical whitespace characters, suitable for +adding to classes. */ + +const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST }; +const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST }; +#endif /* FIXME */ + + +/************************************************* +* Tables for UTF-8 support * +*************************************************/ + +/* These are the breakpoints for different numbers of bytes in a UTF-8 +character. */ + +#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \ + || (defined PCRE2_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)) + +/* These tables are also required by pcretest in 16- or 32-bit mode. */ + +const int PRIV(utf8_table1)[] = + { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; + +const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int); + +/* These are the indicator bits and the mask for the data bits to set in the +first byte of a character, indexed by the number of additional bytes. */ + +const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; +const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; + +/* Table of the number of extra bytes, indexed by the first byte masked with +0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ + +const uint8_t PRIV(utf8_table4)[] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; + +#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE2_INCLUDED && SUPPORT_PCRE[16|32])*/ + + +#ifdef FIXME + +#ifdef SUPPORT_UTF + +/* Table to translate from particular type value to the general value. */ + +const uint32_t PRIV(ucp_gentype)[] = { + ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ + ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ + ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ + ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */ + ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */ + ucp_P, ucp_P, /* Ps, Po */ + ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */ + ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */ +}; + +/* This table encodes the rules for finding the end of an extended grapheme +cluster. Every code point has a grapheme break property which is one of the +ucp_gbXX values defined in ucp.h. The 2-dimensional table is indexed by the +properties of two adjacent code points. The left property selects a word from +the table, and the right property selects a bit from that word like this: + + ucp_gbtable[left-property] & (1 << right-property) + +The value is non-zero if a grapheme break is NOT permitted between the relevant +two code points. The breaking rules are as follows: + +1. Break at the start and end of text (pretty obviously). + +2. Do not break between a CR and LF; otherwise, break before and after + controls. + +3. Do not break Hangul syllable sequences, the rules for which are: + + L may be followed by L, V, LV or LVT + LV or V may be followed by V or T + LVT or T may be followed by T + +4. Do not break before extending characters. + +The next two rules are only for extended grapheme clusters (but that's what we +are implementing). + +5. Do not break before SpacingMarks. + +6. Do not break after Prepend characters. + +7. Otherwise, break everywhere. +*/ + +const uint32_t PRIV(ucp_gbtable[]) = { + (1< #include +#include +#include +#include +#include +#include -int main(void) +/* Both libreadline and libedit are optionally supported. The user-supplied +original patch uses readline/readline.h for libedit, but in at least one system +it is installed as editline/readline.h, so the configuration code now looks for +that first, falling back to readline/readline.h. */ + +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) +#ifdef HAVE_UNISTD_H +#include +#endif +#if defined(SUPPORT_LIBREADLINE) +#include +#include +#else +#if defined(HAVE_EDITLINE_READLINE_H) +#include +#else +#include +#endif +#endif +#endif + + +/* ---------------------- System-specific definitions ---------------------- */ + +/* A number of things vary for Windows builds. Originally, pcretest opened its +input and output without "b"; then I was told that "b" was needed in some +environments, so it was added for release 5.0 to both the input and output. (It +makes no difference on Unix-like systems.) Later I was told that it is wrong +for the input on Windows. I've now abstracted the modes into two macros that +are set here, to make it easier to fiddle with them, and removed "b" from the +input mode under Windows. */ + +#if defined(_WIN32) || defined(WIN32) +#include /* For _setmode() */ +#include /* For _O_BINARY */ +#define INPUT_MODE "r" +#define OUTPUT_MODE "wb" + +#ifndef isatty +#define isatty _isatty /* This is what Windows calls them, I'm told, */ +#endif /* though in some environments they seem to */ + /* be already defined, hence the #ifndefs. */ +#ifndef fileno +#define fileno _fileno +#endif + +/* A user sent this fix for Borland Builder 5 under Windows. */ + +#ifdef __BORLANDC__ +#define _setmode(handle, mode) setmode(handle, mode) +#endif + +/* Not Windows */ + +#else +#include /* These two includes are needed */ +#include /* for setrlimit(). */ +#if defined NATIVE_ZOS /* z/OS uses non-binary I/O */ +#define INPUT_MODE "r" +#define OUTPUT_MODE "w" +#else +#define INPUT_MODE "rb" +#define OUTPUT_MODE "wb" +#endif +#endif + +#ifdef __VMS +#include +void vms_setsymbol( char *, char *, int ); +#endif + +/* ------------------End of system-specific definitions -------------------- */ + +/* Glueing macros that are used in several places below. */ + +#define glue(a,b) a##b +#define G(a,b) glue(a,b) + +/* Other parameters */ + +#ifndef CLOCKS_PER_SEC +#ifdef CLK_TCK +#define CLOCKS_PER_SEC CLK_TCK +#else +#define CLOCKS_PER_SEC 100 +#endif +#endif + +/* Size of buffer for the version string. */ + +#define VERSION_SIZE 64 + +/* This is the default loop count for timing. */ + +#define LOOPREPEAT 500000 + +/* Execution modes */ + +enum { PCRE8_MODE, PCRE16_MODE, PCRE32_MODE }; + +/* Processing returns */ + +enum { PR_OK, PR_SKIP, PR_ABEND }; + +/* This is defined before including pcre2_internal.h so that it does not get +defined therein. */ + +#define PRIV(name) name + +/* We have to include pcre2_internal.h and pcre2_intstructs.h because we need +to know about the internal macros, structures, and other internal data values; +pcre2test has "inside information" compared to a program that strictly follows +the PCRE2 API. We need the structures at all supported code widths; +pcre2_internal.h does not include the structures if PCRE2_CODE_UNIT_WIDTH is +unset. This enables us to #include pcre2_intstructs.h as many times as +necessary. + +Although pcre2_internal.h does itself include pcre2.h, we explicitly include it +before pcre2_internal.h so that the PCRE2_EXP_xxx macros get set +appropriately for an application, not for building PCRE2. */ + +#include "pcre2.h" +#include "pcre2posix.h" +#include "pcre2_internal.h" + +/* Now include the internal structures at all supporte widths. Because +PCRE2_CODE_UNIT_WIDTH was not defined before including pcre2.h, it will have +left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately while +including the internal structures, and then restore it to a no-op. */ + +#ifdef SUPPORT_PCRE8 +#undef PCRE2_SUFFIX +#define PCRE2_SUFFIX(a) G(a,8) +#include "pcre2_intstructs.h" +#endif + +#ifdef SUPPORT_PCRE16 +#undef PCRE2_SUFFIX +#define PCRE2_SUFFIX(a) G(a,16) +#include "pcre2_intstructs.h" +#endif + +#ifdef SUPPORT_PCRE32 +#undef PCRE2_SUFFIX +#define PCRE2_SUFFIX(a) G(a,32) +#include "pcre2_intstructs.h" +#endif + +#undef PCRE2_SUFFIX +#define PCRE2_SUFFIX(a) a + +/* We need access to some of the data tables that PCRE uses. So as not to have +to keep two copies, we include the source files here, having previously defined +PRIV to change the names of the external symbols to prevent clashes. Defining +PCRE2_INCLUDED makes some minor chantes in the files. */ + +#define PCRE2_INCLUDED +#include "pcre2_tables.c" + +#ifdef FIXME +#include "pcre2_ucd.c" +#endif + +/* If we have 8-bit support, default to it; if there is also 16-or 32-bit +support, it can be changed by an option. If there is no 8-bit support, there +must be 16- or 32-bit support, so default to one of them. The config function, +JIT stack, contexts, and version string are the same in all modes, so use the +form of the first that is available. */ + +#if defined SUPPORT_PCRE8 +#define DEFAULT_TEST_MODE PCRE8_MODE +#define PCRE2_CONFIG pcre2_config_8 +#define PCRE2_JIT_STACK pcre2_jit_stack_8 +#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8 +#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8 +#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8 +#define PCRE2_VERSION pcre2_version_8 +#define VERSION_TYPE PCRE2_UCHAR8 + +#elif defined SUPPORT_PCRE16 +#define DEFAULT_TEST_MODE PCRE16_MODE +#define PCRE2_CONFIG pcre2_config_16 +#define PCRE2_JIT_STACK pcre2_jit_stack_16 +#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16 +#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16 +#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16 +#define PCRE2_VERSION pcre2_version_16 +#define VERSION_TYPE PCRE2_UCHAR16 + +#elif defined SUPPORT_PCRE32 +#define DEFAULT_TEST_MODE PCRE32_MODE +#define PCRE2_CONFIG pcre2_config_32 +#define PCRE2_JIT_STACK pcre2_jit_stack_32 +#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32 +#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32 +#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32 +#define PCRE2_VERSION pcre2_version_32 +#define VERSION_TYPE PCRE2_UCHAR32 +#endif + + +/* Table of names for newline types. Must be kept in step with the definitions +of PCRE2_NEWLINE_xx in pcre2.h. */ + +static const char *newlines[] = { "CR", "LF", "CRLF", "ANY", "ANYCRLF" }; + + +/* Modifier types and applicability */ + +enum { MOD_CTC, /* Applies to a compile context */ + MOD_CTM, /* Applies to a match context */ + MOD_PAT, /* Applies to a pattern */ + MOD_DAT, /* Applies to a data line */ + MOD_PD, /* Applies to a pattern or a data line */ + MOD_CTL, /* Is a control bit */ + MOD_BSR, /* Is a BSR value */ + MOD_IN2, /* Is one or two integer values */ + MOD_INT, /* Is an integer value */ + MOD_NL, /* Is a newline value */ + MOD_NN, /* Is a number or a name; more than one may occur */ + MOD_OPT, /* Is an option bit */ + MOD_STR }; /* Is a string */ + +/* Control bits */ + +#define CTL_AFTERTEXT 0x00000001 +#define CTL_ALLAFTERTEXT 0x00000002 +#define CTL_ALLCAPTURES 0x00000004 +#define CTL_ALTGLOBMATCH 0x00000008 +#define CTL_BYTECODE 0x00000010 +#define CTL_CALLOUT_CAPTURE 0x00000020 +#define CTL_CALLOUT_NONE 0x00000040 +#define CTL_DFA 0x00000080 +#define CTL_DFA_RESTART 0x00000100 +#define CTL_DFA_SHORTEST 0x00000200 +#define CTL_FLIPBYTES 0x00000400 +#define CTL_FULLBYTECODE 0x00000800 +#define CTL_GETLIST 0x00001000 +#define CTL_GLOBMATCH 0x00002000 +#define CTL_INFO 0x00004000 +#define CTL_JITVERIFY 0x00008000 +#define CTL_LIMITS 0x00010000 +#define CTL_MARKS 0x00020000 +#define CTL_MEMORY 0x00040000 +#define CTL_PERLCOMPAT 0x00080000 +#define CTL_POSIX 0x00100000 + +#define CTL_DEBUG (CTL_FULLBYTECODE|CTL_INFO) /* For setting */ +#define CTL_ANYINFO (CTL_DEBUG|CTL_BYTECODE) /* For testing */ + +/* Structures and tables for handling modifiers. */ + +typedef struct patctl { /* Structure for pattern control settings. */ + uint32_t options; + uint32_t control; + uint32_t jit; + uint32_t stackguard_test; + uint32_t tables_id; + char locale[32]; + char save[64]; +} patctl; + +#define MAXCPYGET 10 +#define LENCPYGET 64 + +typedef struct datctl { /* Structure for data line control settings. */ + uint32_t options; + uint32_t control; + uint32_t cfail[2]; + uint32_t copy_numbers[MAXCPYGET]; + uint32_t get_numbers[MAXCPYGET]; + uint32_t jitstack; + uint32_t ovecsize; + uint32_t offset; + char copy_names[LENCPYGET]; + char get_names[LENCPYGET]; +} datctl; + +/* Ids for which context to modify. */ + +enum { CTX_PAT, CTX_DEFPAT, CTX_DAT, CTX_DEFDAT, CTX_DEFANY }; + +/* Macros to simplify the big table below. */ + +#define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name) +#define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name) +#define PO(name) offsetof(patctl, name) +#define PD(name) PO(name) +#define DO(name) offsetof(datctl, name) + +/* The offsets to the options and control bits fields of the pattern and data +control blocks must be the same so that common options and controls such as +"anchored" or "memory" can work for either of them from a single table entry. */ + +#ifdef FIXME +#if PO(options) != DO(options) +#error Options offsets for pattern and data must be the same. +#endif + +#if PO(control) != DO(control) +#error Control bits offsets for pattern and data must be the same. +#endif + +#endif + +/* Table of all long-form modifiers. */ + +typedef struct modstruct { + const char *name; + uint16_t which; + uint16_t type; + uint32_t value; + size_t offset; +} modstruct; + +static modstruct modlist[] = { + { "aftertext", MOD_PAT, MOD_CTL, CTL_AFTERTEXT, PO(control) }, + { "allaftertext", MOD_PAT, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) }, + { "allcaptures", MOD_PAT, MOD_CTL, CTL_ALLCAPTURES, PO(control) }, + { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) }, + { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) }, + { "altglobmatch", MOD_PAT, MOD_CTL, CTL_ALTGLOBMATCH, PO(control) }, + { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, + { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, + { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, + { "bytecode", MOD_PAT, MOD_CTL, CTL_BYTECODE, PO(control) }, + { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, + { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, + { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, + { "caseless", MOD_PAT, MOD_OPT, PCRE2_CASELESS, PO(options) }, + { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, + { "debug", MOD_PAT, MOD_OPT, CTL_DEBUG, PO(control) }, + { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) }, + { "dfa_restart", MOD_DAT, MOD_OPT, CTL_DFA_RESTART, DO(options) }, + { "dfa_shortest", MOD_DAT, MOD_OPT, CTL_DFA_SHORTEST, DO(options) }, + { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, + { "dotall", MOD_PAT, MOD_OPT, PCRE2_DOTALL, PO(options) }, + { "dupnames", MOD_PAT, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, + { "extended", MOD_PAT, MOD_OPT, PCRE2_EXTENDED, PO(options) }, + { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) }, + { "flipbytes", MOD_PAT, MOD_CTL, CTL_FLIPBYTES, PO(control) }, + { "fullbytecode", MOD_PAT, MOD_CTL, CTL_FULLBYTECODE, PO(control) }, + { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) }, + { "getlist", MOD_DAT, MOD_OPT, CTL_GETLIST, DO(control) }, + { "globmatch", MOD_PAT, MOD_CTL, CTL_GLOBMATCH, PO(control) }, + { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) }, + { "jit", MOD_PAT, MOD_INT, 1, PO(jit) }, + { "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) }, + { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) }, + { "limits", MOD_DAT, MOD_CTL, CTL_LIMITS, DO(control) }, + { "locale", MOD_PAT, MOD_STR, 0, PO(locale) }, + { "marks", MOD_PAT, MOD_CTL, CTL_MARKS, PO(control) }, + { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) }, + { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) }, + { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) }, + { "multiline", MOD_PAT, MOD_OPT, PCRE2_MULTILINE, PO(options) }, + { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, + { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, + { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, + { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, + { "no_auto_possess", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, + { "no_start_optimize", MOD_PD, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PD(options) }, + { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, + { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, + { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) }, + { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, + { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, + { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, + { "ovector", MOD_DAT, MOD_INT, 0, DO(ovecsize) }, + { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, + { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, + { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, + { "perlcompat", MOD_PAT, MOD_CTL, CTL_PERLCOMPAT, PO(control) }, + { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) }, + { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) }, + { "save", MOD_PAT, MOD_STR, 0, PO(save) }, + { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, + { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, + { "ucp", MOD_PAT, MOD_OPT, PCRE2_UCP, PO(options) }, + { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, + { "utf", MOD_PAT, MOD_OPT, PCRE2_UTF, PO(options) } +}; + +#define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct) + +/* Table of single-character and doubled-character abbreviated modifiers. The +index field is initialized to -1, but the first time the modifier is +encountered, it is filled in with the index of the full entry in modlist, to +save repeated searching when processing multiple test items. */ + +typedef struct c1modstruct { + const char *fullname; + uint32_t onechar; + int index; +} c1modstruct; + +static c1modstruct c1modlist[] = { + { "bytecode", 'B', -1 }, + { "fullbytecode", ('B'<<8)|'B', -1 }, + { "debug", 'D', -1 }, + { "info", 'I', -1 }, + { "partial_soft", 'P', -1 }, + { "partial_hard", ('P'<<8)|'P', -1 }, + { "globmatch", 'g', -1 }, + { "altglobmatch", ('g'<<8)|'g', -1 }, + { "caseless", 'i', -1 }, + { "multiline", 'm', -1 }, + { "dotall", 's', -1 }, + { "extended", 'x', -1 } +}; + +#define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct) + +/* Table of arguments for the -C command line option. Use macros to make the +table easier to read. */ + +#if defined SUPPORT_PCRE8 +#define SUPPORT_8 1 +#endif +#if defined SUPPORT_PCRE16 +#define SUPPORT_16 1 +#endif +#if defined SUPPORT_PCRE32 +#define SUPPORT_32 1 +#endif + +#ifndef SUPPORT_8 +#define SUPPORT_8 0 +#endif +#ifndef SUPPORT_16 +#define SUPPORT_16 0 +#endif +#ifndef SUPPORT_32 +#define SUPPORT_32 0 +#endif + +#ifdef EBCDIC +#define SUPPORT_EBCDIC 1 +#define EBCDIC_NL CHAR_LF +#else +#define SUPPORT_EBCDIC 0 +#define EBCDIC_NL 0 +#endif + +typedef struct coptstruct { + const char *name; + uint32_t type; + uint32_t value; +} coptstruct; + +enum { CONF_BSR, + CONF_FIX, + CONF_FIZ, + CONF_INT, + CONF_NL +}; + +static coptstruct coptlist[] = { + { "bsr", CONF_BSR, PCRE2_CONFIG_BSR }, + { "ebcdic", CONF_FIX, SUPPORT_EBCDIC }, + { "ebcdic-nl", CONF_FIZ, EBCDIC_NL }, + { "jit", CONF_INT, PCRE2_CONFIG_JIT }, + { "linksize", CONF_INT, PCRE2_CONFIG_LINK_SIZE }, + { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE }, + { "pcre16", CONF_FIX, SUPPORT_16 }, + { "pcre32", CONF_FIX, SUPPORT_32 }, + { "pcre8", CONF_FIX, SUPPORT_8 }, + { "utf", CONF_INT, PCRE2_CONFIG_UTF } +}; + +#define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct) + +#undef SUPPORT_8 +#undef SUPPORT_16 +#undef SUPPORT_32 +#undef SUPPORT_EBCDIC + + +/* Static variables */ + +static FILE *infile; +static FILE *outfile; + +static int buffer_size = 50000; +static size_t dbuffer_size = 1u << 14; +static int test_mode = DEFAULT_TEST_MODE; +static int timeit = 0; +static int timeitm = 0; + +clock_t total_compile_time = 0; +clock_t total_match_time = 0; + +static uint8_t *buffer = NULL; +static uint8_t *pbuffer = NULL; +static uint8_t *dbuffer = NULL; + +static VERSION_TYPE version[64]; + +static patctl def_patctl; +static patctl pat_patctl; +static datctl def_datctl; +static datctl dat_datctl; + +static regex_t preg = { NULL, 0, 0 }; + + +/* We need buffers for building 16/32-bit strings; 8-bit strings don't need +rebuilding, but set up an appropriate name (pbuffer8) for use in macros. It +will be pointed to the same memory as pbuffer. We also need the tables of +operator lengths that are used for 16/32-bit compiling, in order to swap bytes +in a pattern for saving/reloading testing. Luckily, the data for these tables +is defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE +(which are used in the tables) are adjusted appropriately for the 16/32-bit +world. LINK_SIZE is also used later in this program. */ + +static uint8_t *pbuffer8 = NULL; + +#ifdef SUPPORT_PCRE16 +#undef IMM2_SIZE +#define IMM2_SIZE 1 + +#if LINK_SIZE == 2 +#undef LINK_SIZE +#define LINK_SIZE 1 +#elif LINK_SIZE == 3 || LINK_SIZE == 4 +#undef LINK_SIZE +#define LINK_SIZE 2 +#else +#error LINK_SIZE must be either 2, 3, or 4 +#endif /* LINK_SIZE */ + +static int pbuffer16_size = 0; +static uint16_t *pbuffer16 = NULL; +#ifdef FIXME +static const uint16_t OP_lengths16[] = { OP_LENGTHS }; +#endif /* FIXME */ +#endif /* SUPPORT_PCRE16 */ + + +#ifdef SUPPORT_PCRE32 +#undef IMM2_SIZE +#define IMM2_SIZE 1 +#undef LINK_SIZE +#define LINK_SIZE 1 + +static int pbuffer32_size = 0; +static uint32_t *pbuffer32 = NULL; +#ifdef FIXME +static const uint32_t OP_lengths32[] = { OP_LENGTHS }; +#endif /* FIXME */ +#endif /* SUPPORT_PCRE32 */ + + +/* ---------------- Mode-dependent variables -------------------*/ + +#ifdef SUPPORT_PCRE8 +pcre2_code_8 *compiled_code8; +pcre2_compile_context_8 *pat_context8, *default_pat_context8; +pcre2_match_context_8 *dat_context8, *default_dat_context8; +#endif + +#ifdef SUPPORT_PCRE16 +pcre2_code_16 *compiled_code16; +pcre2_compile_context_16 *pat_context16, *default_pat_context16; +pcre2_match_context_16 *dat_context16, *default_dat_context16; +#endif + +#ifdef SUPPORT_PCRE32 +pcre2_code_32 *compiled_code32; +pcre2_compile_context_32 *pat_context32, *default_pat_context32; +pcre2_match_context_32 *dat_context32, *default_dat_context32; +#endif + + + + +/* ---------------- Mode-dependent, runtime-testing macros ------------------*/ + +/* Define macros for variables and functions that must be selected dynamically +depending on the mode setting (8, 16, 32). These are dependent on which modes +are supported. First handle cases when more than one mode is supported. */ + +#if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \ + defined (SUPPORT_PCRE32)) >= 2 + +#define CHAR_SIZE (1 << test_mode) + +/* ----- All three modes supported ----- */ + +#if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32) + +#define DATCTXCPY(a,b) \ + if (test_mode == PCRE8_MODE) \ + memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \ + else if (test_mode == PCRE16_MODE) \ + memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \ + else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) + +#define PATCTXCPY(a,b) \ + if (test_mode == PCRE8_MODE) \ + memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \ + else if (test_mode == PCRE16_MODE) \ + memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \ + else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) + +#define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \ + (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b) + +#define PCRE2_COMPILE(a,b,c,d,e,f,g) \ + if (test_mode == PCRE8_MODE) \ + G(a,8) = G(pcre2_compile,8)(G(b,8),c,d,e,f,G(g,8)); \ + else if (test_mode == PCRE16_MODE) \ + G(a,16) = G(pcre2_compile,16)(G(b,16),c,d,e,f,G(g,16)); \ + else \ + G(a,32) = G(pcre2_compile,32)(G(b,32),c,d,e,f,G(g,32)) + +#define PCRE2_JIT_COMPILE(a,b) \ + if (test_mode == PCRE8_MODE) \ + G(pcre2_jit_compile,8)(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + G(pcre2_jit_compile,16)(G(a,16),b); \ + else \ + G(pcre2_jit_compile,32)(G(a,32),b) + +#define PCRE2_PATTERN_INFO(a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + a = G(pcre2_pattern_info,8)(G(b,8),c,d); \ + else if (test_mode == PCRE16_MODE) \ + a = G(pcre2_pattern_info,16)(G(b,16),c,d); \ + else \ + a = G(pcre2_pattern_info,32)(G(b,32),c,d) + +#define PTR(x) ( \ + (test_mode == PCRE8_MODE)? (void *)G(x,8) : \ + (test_mode == PCRE16_MODE)? (void *)G(x,16) : \ + (void *)G(x,32)) + +#define SET(x,y) \ + if (test_mode == PCRE8_MODE) G(x,8) = y; \ + else if (test_mode == PCRE16_MODE) G(x,16) = y; \ + else G(x,32) = y + +#define SUB1(a,b) \ + if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \ + else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \ + else G(a,32)(G(b,32)) + +#define SUB2(a,b,c) \ + if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \ + else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \ + else G(a,32)(G(b,32),G(c,32)) + +#define TEST(x,r,y) ( \ + (test_mode == PCRE8_MODE && G(x,8) r (y)) || \ + (test_mode == PCRE16_MODE && G(x,16) r (y)) || \ + (test_mode == PCRE32_MODE && G(x,32) r (y))) + + + + +/* ----- Two out of three modes are supported ----- */ + +#else + +/* We can use some macro trickery to make a single set of definitions work in +the three different cases. */ + +/* ----- 32-bit and 16-bit but not 8-bit supported ----- */ + +#if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16) +#define BITONE 32 +#define BITTWO 16 + +/* ----- 32-bit and 8-bit but not 16-bit supported ----- */ + +#elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8) +#define BITONE 32 +#define BITTWO 8 + +/* ----- 16-bit and 8-bit but not 32-bit supported ----- */ + +#else +#define BITONE 16 +#define BITTWO 8 +#endif + + +/* ----- Common macros for two-mode cases ----- */ + +#define DATCTXCPY(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \ + else \ + memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO))) + +#define PATCTXCPY(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \ + else \ + memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO))) + +#define FLD(a,b) \ + ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b) + +#define PCRE2_COMPILE(a,b,c,d,e,f,g) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE) = G(pcre2_compile,BITONE)(G(b,BITONE),c,d,e,f,G(g,BITONE)); \ + else \ + G(a,BITTWO) = G(pcre2_compile,BITTWO)(G(b,BITTWO),c,d,e,f,G(g,BITTWO)) + +#define PCRE2_JIT_COMPILE(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_jit_compile,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_jit_compile,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_PATTERN_INFO(a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_pattern_info,BITONE)(G(b,BITONE),c,d); \ + else \ + a = G(pcre2_pattern_info,BITTWO)(G(b,BITTWO),c,d) + +#define PTR(x) ( \ + (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \ + (void *)G(x,BITTWO)) + +#define SET(x,y) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) = y; \ + else G(x,BITTWO) = y + +#define SUB1(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE))(G(b,BITONE)); \ + else \ + G(a,BITTWO))(G(b,BITTWO)) + +#define SUB2(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \ + else \ + G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO)) + +#define TEST(x,r,y) ( \ + (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \ + (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y))) + +#endif /* Two out of three modes */ + +/* ----- End of cases where more than one mode is supported ----- */ + + +/* ----- Only 8-bit mode is supported ----- */ + +#elif defined SUPPORT_PCRE8 +#define CHAR_SIZE 1 +#define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)) +#define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)) +#define FLD(a,b) G(a,8)->b +#define PCRE2_COMPILE(a,b,c,d,e,f,g) \ + G(a,8) = G(pcre2_compile,8)(G(b,8),c,d,e,f,G(g,8)) +#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_8(G(a,8),b) +#define PCRE2_PATTERN_INFO(a,b,c,d) \ + a = pcre2_pattern_info_8(G(b,8),c,d) +#define PTR(x) (void *)G(x,8) +#define SET(x,y) G(x,8) = y +#define SUB1(a,b) G(a,8)(G(b,8)) +#define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8)) +#define TEST(x,r,y) (G(a,8) r (y)) + + +/* ----- Only 16-bit mode is supported ----- */ + +#elif defined SUPPORT_PCRE16 +#define CHAR_SIZE 2 +#define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)) +#define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)) +#define FLD(a,b) G(a,16)->b +#define PCRE2_COMPILE(a,b,c,d,e,f,g) \ + G(a,16) = G(pcre2_compile,16)(G(b,16),c,d,e,f,G(g,16)) +#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b) +#define PCRE2_PATTERN_INFO(a,b,c,d) \ + a = pcre2_pattern_info_16(G(b,16),c,d) +#define PTR(x) (void *)G(x,16) +#define SET(x,y) G(x,16) = y +#define SUB1(a,b) G(a,16)(G(b,16)) +#define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16)) +#define TEST(x,r,y) (G(a,16) r (y)) + + +/* ----- Only 32-bit mode is supported ----- */ + +#elif defined SUPPORT_PCRE32 +#define CHAR_SIZE 4 +#define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) +#define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) +#define FLD(a,b) G(a,32)->b +#define PCRE2_COMPILE(a,b,c,d,e,f,g) \ + G(a,32) = G(pcre2_compile,32)(G(b,32),c,d,e,f,G(g,32)) +#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b) +#define PCRE2_PATTERN_INFO(a,b,c,d) \ + a = pcre2_pattern_info_32(G(b,32),c,d) +#define PTR(x) (void *)G(x,32) +#define SET(x,y) G(x,32) = y +#define SUB1(a,b) G(a,32)(G(b,32)) +#define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32)) +#define TEST(x,r,y) (G(a,32) r (y)) + +#endif + +/* ----- End of mode-specific function call macros ----- */ + + + + +/************************************************* +* Convert character value to UTF-8 * +*************************************************/ + +/* This function takes an integer value in the range 0 - 0x7fffffff +and encodes it as a UTF-8 character in 0 to 6 bytes. + +Arguments: + cvalue the character value + utf8bytes pointer to buffer for result - at least 6 bytes long + +Returns: number of characters placed in the buffer +*/ + +static int +ord2utf8(uint32_t cvalue, uint8_t *utf8bytes) { -printf("This is a dummy placeholder file for pcre2test.\n"); +register int i, j; +if (cvalue > 0x7fffffffu) + return -1; +for (i = 0; i < utf8_table1_size; i++) + if (cvalue <= (uint32_t)utf8_table1[i]) break; +utf8bytes += i; +for (j = i; j > 0; j--) + { + *utf8bytes-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } +*utf8bytes = utf8_table2[i] | cvalue; +return i + 1; +} + + + +/************************************************* +* Convert UTF-8 string to value * +*************************************************/ + +/* This function reads one or more bytes that represent a UTF-8 character, +and returns the codepoint of that character. Note that the function supports +the original UTF-8 definition of RFC 2279, allowing for values in the range 0 +to 0x7fffffff, up to 6 bytes long. This makes it possible to generate +codepoints greater than 0x10ffff which are useful for testing PCRE's error +checking, and also for generating 32-bit non-UTF data values above the UTF +limit. + +Argument: + utf8bytes a pointer to the byte vector + vptr a pointer to an int to receive the value + +Returns: > 0 => the number of bytes consumed + -6 to 0 => malformed UTF-8 character at offset = (-return) +*/ + +static int +utf82ord(uint8_t *utf8bytes, uint32_t *vptr) +{ +uint32_t c = *utf8bytes++; +uint32_t d = c; +int i, j, s; + +for (i = -1; i < 6; i++) /* i is number of additional bytes */ + { + if ((d & 0x80) == 0) break; + d <<= 1; + } + +if (i == -1) { *vptr = c; return 1; } /* ascii character */ +if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ + +/* i now has a value in the range 1-5 */ + +s = 6*i; +d = (c & utf8_table3[i]) << s; + +for (j = 0; j < i; j++) + { + c = *utf8bytes++; + if ((c & 0xc0) != 0x80) return -(j+1); + s -= 6; + d |= (c & 0x3f) << s; + } + +/* Check that encoding was the correct unique one */ + +for (j = 0; j < utf8_table1_size; j++) + if (d <= (uint32_t)utf8_table1[j]) break; +if (j != i) return -(i+1); + +/* Valid value */ + +*vptr = d; +return i+1; +} + + + +#ifdef SUPPORT_PCRE16 +/************************************************* +* Convert a string to 16-bit * +*************************************************/ + +/* The input is always interpreted as a string of UTF-8 bytes. If all the input +bytes are ASCII, the space needed for a 16-bit string is exactly double the +8-bit size. Otherwise, the size needed for a 16-bit string is no more than +double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 +in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The +result is always left in pbuffer16. + +Note that this function does not object to surrogate values. This is +deliberate; it makes it possible to construct UTF-16 strings that are invalid, +for the purpose of testing that they are correctly faulted. + +Arguments: + p points to a byte string + utf non-zero if converting to UTF-16 + len number of bytes in the string (excluding trailing zero) + +Returns: number of 16-bit data items used (excluding trailing zero) + OR -1 if a UTF-8 string is malformed + OR -2 if a value > 0x10ffff is encountered in UTF mode + OR -3 if a value > 0xffff is encountered when not in UTF mode +*/ + +static int +to16(uint8_t *p, int utf, int len) +{ +uint16_t *pp; + +if (pbuffer16_size < 2*len + 2) + { + if (pbuffer16 != NULL) free(pbuffer16); + pbuffer16_size = 2*len + 2; + pbuffer16 = (uint16_t *)malloc(pbuffer16_size); + if (pbuffer16 == NULL) + { + fprintf(stderr, "pcretest: malloc(%d) failed for pbuffer16\n", pbuffer16_size); + exit(1); + } + } +pp = pbuffer16; + +while (len > 0) + { + uint32_t c; + int chlen = utf82ord(p, &c); + if (chlen <= 0) return -1; + if (c > 0x10ffff) return -2; + p += chlen; + len -= chlen; + if (c < 0x10000) *pp++ = c; else + { + if (!utf) return -3; + c -= 0x10000; + *pp++ = 0xD800 | (c >> 10); + *pp++ = 0xDC00 | (c & 0x3ff); + } + } + +*pp = 0; +return pp - pbuffer16; +} +#endif + + + +#ifdef SUPPORT_PCRE32 +/************************************************* +* Convert a string to 32-bit * +*************************************************/ + +/* The input is always interpreted as a string of UTF-8 bytes. If all the input +bytes are ASCII, the space needed for a 32-bit string is exactly four times the +8-bit size. Otherwise, the size needed for a 32-bit string is no more than four +times, because the number of characters must be less than the number of bytes. +The result is always left in pbuffer32. + +Note that this function does not object to surrogate values. This is +deliberate; it makes it possible to construct UTF-32 strings that are invalid, +for the purpose of testing that they are correctly faulted. + +Arguments: + p points to a byte string + utf true if UTF-8 (to be converted to UTF-32) + len number of bytes in the string (excluding trailing zero) + +Returns: number of 32-bit data items used (excluding trailing zero) + OR -1 if a UTF-8 string is malformed + OR -2 if a value > 0x10ffff is encountered in UTF mode +*/ + +static int +to32(uint8_t *p, int utf, int len) +{ +uint32_t *pp; + +if (pbuffer32_size < 4*len + 4) + { + if (pbuffer32 != NULL) free(pbuffer32); + pbuffer32_size = 4*len + 4; + pbuffer32 = (uint32_t *)malloc(pbuffer32_size); + if (pbuffer32 == NULL) + { + fprintf(stderr, "pcretest: malloc(%d) failed for pbuffer32\n", pbuffer32_size); + exit(1); + } + } +pp = pbuffer32; + +while (len > 0) + { + uint32_t c; + int chlen = utf82ord(p, &c); + if (chlen <= 0) return -1; + if (utf && c > 0x10ffff) return -2; + p += chlen; + len -= chlen; + *pp++ = c; + } + +*pp = 0; +return pp - pbuffer32; +} +#endif /* SUPPORT_PCRE32 */ + + + +/************************************************* +* Read or extend an input line * +*************************************************/ + +/* Input lines are read into buffer, but both patterns and data lines can be +continued over multiple input lines. In addition, if the buffer fills up, we +want to automatically expand it so as to be able to handle extremely large +lines that are needed for certain stress tests. When the input buffer is +expanded, the other two buffers must also be expanded likewise, and the +contents of pbuffer, which are a copy of the input for callouts, must be +preserved (for when expansion happens for a data line). This is not the most +optimal way of handling this, but hey, this is just a test program! + +Arguments: + f the file to read + start where in buffer to start (this *must* be within buffer) + prompt for stdin or readline() + +Returns: pointer to the start of new data + could be a copy of start, or could be moved + NULL if no data read and EOF reached +*/ + +static uint8_t * +extend_inputline(FILE *f, uint8_t *start, const char *prompt) +{ +uint8_t *here = start; + +for (;;) + { + size_t rlen = (size_t)(buffer_size - (here - buffer)); + + if (rlen > 1000) + { + int dlen; + + /* If libreadline or libedit support is required, use readline() to read a + line if the input is a terminal. Note that readline() removes the trailing + newline, so we must put it back again, to be compatible with fgets(). */ + +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) + if (isatty(fileno(f))) + { + size_t len; + char *s = readline(prompt); + if (s == NULL) return (here == start)? NULL : start; + len = strlen(s); + if (len > 0) add_history(s); + if (len > rlen - 1) len = rlen - 1; + memcpy(here, s, len); + here[len] = '\n'; + here[len+1] = 0; + free(s); + } + else +#endif + + /* Read the next line by normal means, prompting if the file is stdin. */ + + { + if (f == stdin) printf("%s", prompt); + if (fgets((char *)here, rlen, f) == NULL) + return (here == start)? NULL : start; + } + + dlen = (int)strlen((char *)here); + if (dlen > 0 && here[dlen - 1] == '\n') return start; + here += dlen; + } + + else + { + int new_buffer_size = 2*buffer_size; + uint8_t *new_buffer = (uint8_t *)malloc(new_buffer_size); + uint8_t *new_pbuffer = (uint8_t *)malloc(new_buffer_size); + + if (new_buffer == NULL || new_pbuffer == NULL) + { + fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_buffer_size); + exit(1); + } + + memcpy(new_buffer, buffer, buffer_size); + memcpy(new_pbuffer, pbuffer, buffer_size); + + buffer_size = new_buffer_size; + + start = new_buffer + (start - buffer); + here = new_buffer + (here - buffer); + + free(buffer); + free(pbuffer); + + buffer = new_buffer; + pbuffer = pbuffer8 = new_pbuffer; + } + } + +/* Control never gets here */ +} + + + +/************************************************* +* Case-independent strncmp() function * +*************************************************/ + +/* +Arguments: + s first string + t second string + n number of characters to compare + +Returns: < 0, = 0, or > 0, according to the comparison +*/ + +static int +strncmpic(const uint8_t *s, const uint8_t *t, int n) +{ +while (n--) + { + int c = tolower(*s++) - tolower(*t++); + if (c) return c; + } return 0; } + + + +/************************************************* +* Read number from string * +*************************************************/ + +/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess +around with conditional compilation, just do the job by hand. It is only used +for unpicking arguments, so just keep it simple. + +Arguments: + str string to be converted + endptr where to put the end pointer + +Returns: the unsigned long +*/ + +static int +get_value(const char *str, const char **endptr) +{ +int result = 0; +while(*str != 0 && isspace(*str)) str++; +while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0'); +*endptr = str; +return(result); +} + + + +/************************************************* +* Scan the main modifier list * +*************************************************/ + +/* This function searches the modifier list for a modifier whose name matches +the initial characters of the given string. + +Argument: + p the string + lenptr where to return the length matched + +Returns: an index in the modifier list, or -1 on failure +*/ + +static int +scan_modifiers(const uint8_t *p, unsigned int *lenptr) +{ +int bot = 0; +int top = MODLISTCOUNT; + +while (top > bot) + { + int mid = (bot + top)/2; + unsigned int len = strlen(modlist[mid].name); + int c = strncmp((char *)p, modlist[mid].name, len); + if (c == 0) + { + *lenptr = len; + return mid; + } + if (c > 0) bot = mid + 1; else top = mid; + } + +return -1; + +} + + + +/************************************************* +* Check a modifer and find its field * +*************************************************/ + +/* This function is called when a modifier has been identified. We check that +it is allowed here and find the field that is to be changed. + +Arguments: + m the modifier list entry + ctx CTX_PAT => pattern context + CTX_DEFPAT => default pattern context + CTX_DAT => data context + CTX_DEFDAT => default data context + CTX_DEFANY => any default context (depends on the modifier) + pctl point to pattern control block + dctl point to data control block + c a single character or 0 + +Returns: a field pointer or NULL +*/ + +static void * +check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c) +{ +void *field = NULL; +switch (m->which) + { + case MOD_CTC: + if (ctx == CTX_DEFPAT || ctx == CTX_DEFANY) field = PTR(default_pat_context); + else if (ctx == CTX_PAT) field = PTR(pat_context); + break; + + case MOD_CTM: + if (ctx == CTX_DEFDAT || ctx == CTX_DEFANY) field = PTR(default_dat_context); + else if (ctx == CTX_DAT) field = PTR(dat_context); + break; + + case MOD_DAT: + if (dctl != NULL) field = dctl; + break; + + case MOD_PAT: + if (pctl != NULL) field = pctl; + break; + + case MOD_PD: + if (dctl != NULL) field = dctl; + else if (pctl != NULL) field = pctl; + break; + } + +if (field == NULL) + { + if (c == 0) + fprintf(outfile, "** '%s' is not valid here\n", m->name); + else + fprintf(outfile, "** /%c is not valid here\n", c); + return NULL; + } + +return (char *)field + m->offset; +} + + + +/************************************************* +* Decode a modifier list * +*************************************************/ + +/* A pointers to a context or control block is NULL when called in cases when +that block is not relevant. They are never all relevant in one call. In +particular, at least one of patctl and datctl is always NULL. + +Arguments: + p point to modifier string + ctx CTX_PAT => pattern context + CTX_DEFPAT => default pattern context + CTX_DAT => data context + CTX_DEFDAT => default data context + CTX_DEFANY => any default context (depends on the modifier) + pctl point to pattern control block + dctl point to data control block + +Returns: TRUE if successful decode, FALSE otherwise +*/ + +static BOOL +decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl) +{ +uint8_t *ep, *pp; +BOOL first = TRUE; + +for (;;) + { + void *field; + modstruct *m; + BOOL off = FALSE; + unsigned int i, len; + int index; + char *endptr; + + /* Skip white space and commas; after a comma we have passed the first + item. */ + + while (isspace(*p)) p++; + if (*p == ',') first = FALSE; + while (isspace(*p) || *p == ',') p++; + if (*p == 0) break; + + /* Find the end of the item. */ + + for (ep = p; *ep != 0 && *ep != ',' && !isspace(*ep); ep++); + + /* Remember if the first character is '-'. */ + + if (*p == '-') + { + off = TRUE; + p++; + } + + /* Scan for a full-length modifier name. */ + + index = scan_modifiers(p, &len); + + /* If the first modifier is unrecognized, try to interpret it as a sequence + of single-character abbreviated modifiers. None of these modifiers have any + associated data. They just set options or control bits. */ + + if (index < 0) + { + uint32_t cc; + + if (!first) + { + fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); + if (ep - p == 1) + fprintf(outfile, "** Single-character modifiers must come first\n"); + return FALSE; + } + + for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p)) + { + if (p[1] == cc) /* Handle doubled characters */ + { + cc = (cc << 8) | cc; + p++; + } + + for (i = 0; i < C1MODLISTCOUNT; i++) + if (cc == c1modlist[i].onechar) break; + + if (i >= C1MODLISTCOUNT) + { + fprintf(outfile, "** Unrecognized single-character modifier '%c'\n", + *p); + return FALSE; + } + + if (c1modlist[i].index >= 0) + { + index = c1modlist[i].index; + } + + else + { + index = scan_modifiers((uint8_t *)(c1modlist[i].fullname), &len); + if (index < 0) + { + fprintf(outfile, "** Internal error: single-character equivalent " + "modifier '%s' not found\n", c1modlist[i].fullname); + return FALSE; + } + c1modlist[i].index = index; /* Cache for next time */ + } + + field = check_modifier(modlist + index, ctx, pctl, dctl, *p); + if (field == NULL) return FALSE; + *((uint32_t *)field) |= modlist[index].value; + } + + continue; /* With tne next (fullname) modifier */ + } + + /* We have a match on a full-name modifier. Check for the existence of data + when needed. */ + + m = modlist + index; /* Save typing */ + pp = p + len; /* End of modifier name */ + + if (m->type != MOD_CTL && m->type != MOD_OPT) + { + if (*pp++ != '=') + { + fprintf(outfile, "** '=' expected after '%s'\n", m->name); + return FALSE; + } + if (off) + { + fprintf(outfile, "** '-' is not valid for '%s'\n", m->name); + return FALSE; + } + } + + /* These on/off types have no data. */ + + else if (*pp != ',' && *pp != '\n' && *pp != 0) + { + fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); + return FALSE; + } + + /* Set the data length for those types that have data. Then find the field + that is to be set. If check_modifier() returns NULL, it has already output an + error message. */ + + len = ep - pp; + field = check_modifier(m, ctx, pctl, dctl, 0); + if (field == NULL) return FALSE; + + /* Process according to data type. */ + + switch (m->type) + { + case MOD_CTL: + case MOD_OPT: + if (off) *((uint32_t *)field) &= ~m->value; + else *((uint32_t *)field) |= m->value; + break; + + case MOD_BSR: + if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0) + *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; + else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0) + *((uint16_t *)field) = PCRE2_BSR_UNICODE; + else goto INVALID_VALUE; + pp = ep; + break; + + case MOD_IN2: + if (!isdigit(*pp)) goto INVALID_VALUE; + ((uint32_t *)field)[0] = (uint32_t)strtoul((const char *)pp, &endptr, 10); + if (*endptr == '/') + ((uint32_t *)field)[1] = (uint32_t)strtoul((const char *)endptr+1, &endptr, 10); + else ((uint32_t *)field)[1] = 0; + pp = (uint8_t *)endptr; + break; + + case MOD_INT: + if (!isdigit(*pp)) goto INVALID_VALUE; + *((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10); + pp = (uint8_t *)endptr; + break; + + case MOD_NL: + for (i = 0; i < sizeof(newlines)/sizeof(char *); i++) + if (len == strlen(newlines[i]) && + strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break; + if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE; + *((uint16_t *)field) = i; + pp = ep; + break; + + case MOD_NN: /* Name or number; may be several */ + if (isdigit(*pp)) + { + int ct = MAXCPYGET - 1; + field = (char *)field - m->offset + m->value; /* Adjust field ptr */ + while (*((uint32_t *)field) != 0 && ct-- > 0) /* Skip previous */ + field = (char *)field + sizeof(uint32_t); + if (ct <= 0) + { + fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name); + return FALSE; + } + *((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10); + pp = (uint8_t *)endptr; + } + + /* Multiple strings are put end to end. */ + + else + { + char *nn = (char *)field; + while (*nn != 0) nn += strlen(nn) + 1; + if (nn + len + 1 - (char *)field > LENCPYGET) + { + fprintf(outfile, "** Too many named '%s' modifiers\n", m->name); + return FALSE; + } + memcpy(nn, pp, len); + nn[len] = 0 ; + nn[len+1] = 0; + pp = ep; + } + break; + + case MOD_STR: + memcpy(field, pp, len); + pp[len] = 0; + pp = ep; + break; + } + + if (*pp != ',' && *pp != '\n' && *pp != 0) + { + fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name); + return FALSE; + } + + p = pp; + } + +return TRUE; + +INVALID_VALUE: +fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p); +return FALSE; +} + + +/************************************************* +* Get info from a pattern * +*************************************************/ + +/* A wrapped call to pcre2_pattern_info(), applied to the current compiled +pattern. + +Arguments: + what code for the required information + where where to put the answer + +Returns: the return from pcre2_pattern_info() +*/ + +static int +pattern_info(int what, void *where) +{ +int rc; +PCRE2_PATTERN_INFO(rc, compiled_code, what, where); +if (rc < 0 && rc != PCRE2_ERROR_UNSET) + { + fprintf(outfile, "Error %d from pcre2_pattern_info(%d)\n", rc, what); + if (rc == PCRE2_ERROR_BADMODE) + fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " + "%d-bit mode\n", 8 * CHAR_SIZE, + 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK)); + } +return rc; +} + + + +/************************************************* +* Show information about a pattern * +*************************************************/ + +/* This function is called after a pattern has been compiled or loaded from a +file, if any of the information-requesting controls have been set. + +Arguments: none + +Returns: PR_OK continue processing next line + PR_SKIP skip to a blank line + PR_ABEND abort the pcre2test run +*/ + +static int +show_pattern_info(void) +{ +#ifdef FIXME + + +if ((pat_patctl.control & (CTL_BYTECODE|CTL_FULLBYTECODE)) != 0) + { + fprintf(outfile, "------------------------------------------------------------------\n"); + PCRE2_PRINTINT(outfile, (pat_patctl.control & CTL_FULLBYTECODE) != 0); + } + +/* We already have the options in get_options (see above) */ + +if ((pat_patctl.control & CTL_INFO) != 0) + { + unsigned long int all_options; + uint32_t first_cunit, last_cunit; + uint32_t match_limit, recursion_limit; + int count, backrefmax, first_ctype, last_ctype, jchanged, + hascrorlf, maxlookbehind, match_empty; + int nameentrysize, namecount; + const void *nametable; + + /* These info requests should always succeed. */ + + if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax) + + pattern_info(PCRE2_INFO_CAPTURECOUNT, &count) + + pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit) + + pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype) + + pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf) + + pattern_info(PCRE2_INFO_JCHANGED, &jchanged) + + pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit) + + pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype) + + pattern_info(PCRE2_INFO_MATCH_EMPTY, &match_empty) + + pattern_info(PCRE2_INFO_MATCH_LIMIT, &match_limit) + + pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind) + + pattern_info(PCRE2_INFO_NAMECOUNT, &namecount) + + pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize) + + pattern_info(PCRE2_INFO_NAMETABLE, &nametable) + + pattern_info(PCRE2_INFO_RECURSION_LIMIT, &recursion_limit) + != 0) + return PR_ABEND; + + fprintf(outfile, "Capturing subpattern count = %d\n", count); + + if (backrefmax > 0) + fprintf(outfile, "Max back reference = %d\n", backrefmax); + + if (maxlookbehind > 0) + fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); + + if (match_limit > 0) + fprintf(outfile, "Match limit = %u\n", match_limit); + + if (recursion_limit > 0) + fprintf(outfile, "Recursion limit = %u\n", recursion_limit); + + if (namecount > 0) + { + fprintf(outfile, "Named capturing subpatterns:\n"); + while (namecount-- > 0) + { + int imm2_size = test_mode == PCRE8_MODE ? 2 : 1; + int length = (int)STRLEN(nametable + imm2_size); + fprintf(outfile, " "); + PCHARSV(nametable, imm2_size, length, outfile); + while (length++ < nameentrysize - imm2_size) putc(' ', outfile); +#ifdef SUPPORT_PCRE32 + if (test_mode == PCRE32_MODE) + fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0])); +#endif +#ifdef SUPPORT_PCRE16 + if (test_mode == PCRE16_MODE) + fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0])); +#endif +#ifdef SUPPORT_PCRE8 + if (test_mode == PCRE8_MODE) + fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]); +#endif + nametable += nameentrysize * CHAR_SIZE; + } + } + + if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); + if (match_empty) fprintf(outfile, "May match empty string\n"); + + +#ifdef FIXME + + all_options = REAL_PCRE_OPTIONS(re); + if (do_flip) all_options = swap_uint32(all_options); + + if (get_options == 0) fprintf(outfile, "No options\n"); + else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", + ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", + ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", + ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "", + ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "", + ((get_options & PCRE_DOTALL) != 0)? " dotall" : "", + ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "", + ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "", + ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", + ((get_options & PCRE_EXTRA) != 0)? " extra" : "", + ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", + ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", + ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "", + ((get_options & PCRE_UTF8) != 0)? " utf" : "", + ((get_options & PCRE_UCP) != 0)? " ucp" : "", + ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "", + ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", + ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "", + ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : ""); + +#endif /* FIXME */ + + + if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); + + +#ifdef FIXME + + switch (get_options & PCRE_NEWLINE_BITS) + { + case PCRE_NEWLINE_CR: + fprintf(outfile, "Forced newline sequence: CR\n"); + break; + + case PCRE_NEWLINE_LF: + fprintf(outfile, "Forced newline sequence: LF\n"); + break; + + case PCRE_NEWLINE_CRLF: + fprintf(outfile, "Forced newline sequence: CRLF\n"); + break; + + case PCRE_NEWLINE_ANYCRLF: + fprintf(outfile, "Forced newline sequence: ANYCRLF\n"); + break; + + case PCRE_NEWLINE_ANY: + fprintf(outfile, "Forced newline sequence: ANY\n"); + break; + + default: + break; + } +#endif /* FIXME */ + + + if (first_ctype == 2) + { + fprintf(outfile, "First char at start or follows newline\n"); + } + else if (first_ctype == 1) + { + const char *caseless = + ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)? + "" : " (caseless)"; + + if (PRINTOK(first_cunit)) + fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless); + else + { + fprintf(outfile, "First code unit = "); + pchar(first_cunit, outfile); + fprintf(outfile, "%s\n", caseless); + } + } + else + { + fprintf(outfile, "No first code unit\n"); + } + + if (last_ctype == 0) + { + fprintf(outfile, "No last code unit\n"); + } + else + { + const char *caseless = + ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)? + "" : " (caseless)"; + + if (PRINTOK(last_cunit)) + fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless); + else + { + fprintf(outfile, "Last code unit = "); + pchar(last_cunit, outfile); + fprintf(outfile, "%s\n", caseless); + } + } + +#ifdef FIXME + + if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0) + fprintf(outfile, "Subject length lower bound = %d\n", minlength); + + if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0) + { + if (start_bits == NULL) + fprintf(outfile, "No starting char list\n"); + else + { + int i; + int c = 24; + fprintf(outfile, "Starting chars: "); + for (i = 0; i < 256; i++) + { + if ((start_bits[i/8] & (1<<(i&7))) != 0) + { + if (c > 75) + { + fprintf(outfile, "\n "); + c = 2; + } + if (PRINTOK(i) && i != ' ') + { + fprintf(outfile, "%c ", i); + c += 2; + } + else + { + fprintf(outfile, "\\x%02x ", i); + c += 5; + } + } + } + fprintf(outfile, "\n"); + } + } + + /* Show this only if the JIT was set by /S, not by -s. */ + + if ((study_options & PCRE_STUDY_ALLJIT) != 0 && + (force_study_options & PCRE_STUDY_ALLJIT) == 0) + { + int jit; + if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0) + { + if (jit) + fprintf(outfile, "JIT study was successful\n"); + else +#ifdef SUPPORT_JIT + fprintf(outfile, "JIT study was not successful\n"); +#else + fprintf(outfile, "JIT support is not available in this version of PCRE\n"); +#endif + + +#endif /* FIXME */ + + + } + +#endif /* TOP FIXME */ + +return PR_OK; +} + + + +/************************************************* +* Process command line * +*************************************************/ + +/* This function is called for lines beginning with # and a character that is +not ! or whitespace, when encountered between tests. The line is in buffer. + +Arguments: none + +Returns: PR_OK continue processing next line + PR_SKIP skip to a blank line + PR_ABEND abort the pcre2test run +*/ + +static int +process_command(void) +{ +if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8])) + { + (void)decode_modifiers(buffer + 8, CTX_DEFPAT, &def_patctl, NULL); + } +else if (strncmp((char *)buffer, "#data", 5) == 0 && isspace(buffer[5])) + { + (void)decode_modifiers(buffer + 5, CTX_DEFDAT, NULL, &def_datctl); + } +else if (strncmp((char *)buffer, "#load", 5) == 0 && isspace(buffer[5])) + { +/* FIXME */ +fprintf(outfile, "** #load not yet implemented\n"); +return PR_ABEND; + +#ifdef FIXME + + +/* See if the pattern is to be loaded pre-compiled from a file. */ + +if (*p == '<' && strchr((char *)(p+1), '<') == NULL) + { + uint32_t magic; + uint8_t sbuf[8]; + FILE *f; + + p++; + if (*p == '!') + { + do_debug = TRUE; + do_showinfo = TRUE; + p++; + } + + pp = p + (int)strlen((char *)p); + while (isspace(pp[-1])) pp--; + *pp = 0; + + f = fopen((char *)p, "rb"); + if (f == NULL) + { + fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno)); + continue; + } + if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ; + + true_size = + (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3]; + true_study_size = + (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7]; + + re = (pcre *)new_malloc(true_size); + if (re == NULL) + { + printf("** Failed to get %d bytes of memory for pcre object\n", + (int)true_size); + yield = 1; + goto EXIT; + } + if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ; + + magic = REAL_PCRE_MAGIC(re); + if (magic != MAGIC_NUMBER) + { + if (swap_uint32(magic) == MAGIC_NUMBER) + { + do_flip = 1; + } + else + { + fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p); + new_free(re); + fclose(f); + continue; + } + } + + /* We hide the byte-invert info for little and big endian tests. */ + fprintf(outfile, "Compiled pattern%s loaded from %s\n", + do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p); + + /* Now see if there is any following study data. */ + + if (true_study_size != 0) + { + pcre_study_data *psd; + + extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size); + extra->flags = PCRE_EXTRA_STUDY_DATA; + + psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra)); + extra->study_data = psd; + + if (fread(psd, 1, true_study_size, f) != true_study_size) + { + FAIL_READ: + fprintf(outfile, "Failed to read data from %s\n", p); + if (extra != NULL) + { + PCRE_FREE_STUDY(extra); + } + new_free(re); + fclose(f); + continue; + } + fprintf(outfile, "Study data loaded from %s\n", p); + do_study = 1; /* To get the data output if requested */ + } + else fprintf(outfile, "No study data\n"); + + /* Flip the necessary bytes. */ + if (do_flip) + { + int rc; + PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL); + if (rc == PCRE_ERROR_BADMODE) + { + uint32_t flags_in_host_byte_order; + if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER) + flags_in_host_byte_order = REAL_PCRE_FLAGS(re); + else + flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re)); + /* Simulate the result of the function call below. */ + fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, + test_mode == PCRE32_MODE ? "32" : test_mode == PCRE16_MODE ? "16" : "", + PCRE_INFO_OPTIONS); + fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " + "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & test_mode_MASK)); + new_free(re); + fclose(f); + continue; + } + } + + /* Need to know if UTF-8 for printing data strings. */ + + if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) + { + new_free(re); + fclose(f); + continue; + } + use_utf = (get_options & PCRE_UTF8) != 0; + + fclose(f); + goto SHOW_INFO; + } + +#endif /* FIXME */ + + + } +return PR_OK; +} + + + +/************************************************* +* Process pattern line * +*************************************************/ + +/* This function is called when the input buffer contains the start of a +pattern. The first character is known to be a valid delimiter. The pattern is +read, modifiers are interpreted, and a suitable local context is set up for +this test. The pattern is then compiled. + +Arguments: none + +Returns: PR_OK continue processing next line + PR_SKIP skip to a blank line + PR_ABEND abort the pcre2test run +*/ + +static int +process_pattern(void) +{ +uint8_t *p = buffer; +unsigned int delimiter = *p++; +int patlen, errorcode; +size_t erroroffset; + +/* Initialize the context and pattern/data controls for this test from the +defaults. */ + +PATCTXCPY(pat_context, default_pat_context); +memcpy(&pat_patctl, &def_patctl, sizeof(patctl)); + +/* Find the end of the pattern, reading more lines if necessary. */ + +for(;;) + { + while (*p != 0) + { + if (*p == '\\' && p[1] != 0) p++; + else if (*p == delimiter) break; + p++; + } + if (*p != 0) break; + if ((p = extend_inputline(infile, p, " > ")) == NULL) + { + fprintf(outfile, "** Unexpected EOF\n"); + return PR_ABEND; + } + if (infile != stdin) fprintf(outfile, "%s", (char *)p); + } + +/* If the first character after the delimiter is backslash, make +the pattern end with backslash. This is purely to provide a way +of testing for the error message when a pattern ends with backslash. */ + +if (p[1] == '\\') *p++ = '\\'; + +/* Terminate the pattern at the delimiter, and save a copy of the pattern +for callouts. */ + +*p++ = 0; +patlen = p - buffer - 1; +strncpy((char *)pbuffer, (char *)(buffer+1), patlen); + +/* Look for modifiers and options after the final delimiter. If successful, +compile the pattern. */ + +if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP; + +/* Handle compiling via the POSIX interface, which doesn't support the +timing, showing, or debugging options, nor the ability to pass over +local character tables. Neither does it have 16-bit or 32-bit support. */ + +if ((pat_patctl.control & CTL_POSIX) != 0) + { + int rc; + int cflags = 0; + + if ((pat_patctl.options & PCRE2_UTF) != 0) cflags |= REG_UTF; + if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP; + if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE; + if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE; + if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL; + if ((pat_patctl.options & PCRE2_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; + if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY; + + rc = regcomp(&preg, (char *)pbuffer, cflags); + + /* Compilation failed. */ + + if (rc != 0) + { + (void)regerror(rc, &preg, (char *)buffer, buffer_size); + fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer); + return PR_SKIP; + } + + return PR_OK; + } + +/* Handle compiling via the native interface, converting the input in non-8-bit +modes. */ + +#ifdef SUPPORT_PCRE16 +if (test_mode == PCRE16_MODE) + patlen = to16(pbuffer, pat_patctl.options & PCRE2_UTF, + (int)strlen((char *)pbuffer)); +#endif + +#ifdef SUPPORT_PCRE32 +if (test_mode == PCRE32_MODE) + patlen = to32(pbuffer, pat_patctl.options & PCRE2_UTF, + (int)strlen((char *)pbuffer)); +#endif + +switch(patlen) + { + case -1: + fprintf(outfile, "** Failed: invalid UTF-8 string cannot be " + "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32); + return PR_SKIP; + + case -2: + fprintf(outfile, "** Failed: character value greater than 0x10ffff " + "cannot be converted to UTF\n"); + return PR_SKIP; + + case -3: + fprintf(outfile, "** Failed: character value greater than 0xffff " + "cannot be converted to 16-bit in non-UTF mode\n"); + return PR_SKIP; + + default: + break; + } + +/* Compile many times when timing */ + +if (timeit > 0) + { + register int i; + clock_t time_taken; + clock_t start_time = clock(); + for (i = 0; i < timeit; i++) + { + PCRE2_COMPILE(compiled_code, pbuffer, patlen, + pat_patctl.options, &errorcode, &erroroffset, pat_context); + if (TEST(compiled_code, !=, NULL)) + { SUB1(pcre2_code_free, compiled_code); } + } + total_compile_time += (time_taken = clock() - start_time); + fprintf(outfile, "Compile time %.4f milliseconds\n", + (((double)time_taken * 1000.0) / (double)timeit) / + (double)CLOCKS_PER_SEC); + } + +/* FIXME: implement timing for JIT compile. */ + +/* A final compile that is used "for real". */ + +PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options, &errorcode, + &erroroffset, pat_context); + +/* Compilation failed; go back for another re, skipping to blank line +if non-interactive. */ + +if (TEST(compiled_code, ==, NULL)) + { + fprintf(outfile, "Failed: %d at offset %d\n", errorcode, (int)erroroffset); + +/* FIXME get error message */ + + return PR_SKIP; + } + +/* Call the JIT compiler if requested. */ + +if (pat_patctl.jit != 0) + { PCRE2_JIT_COMPILE(compiled_code, pat_patctl.jit); } + +/* Output code size and other information if requested. */ + +if ((pat_patctl.control & CTL_MEMORY) != 0) + { + size_t size; + size_t name_entry_size; + int name_count; + (void)pattern_info(PCRE2_INFO_SIZE, &size); + (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count); + (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); + fprintf(outfile, "Memory allocation (code space): %d\n", + (int)(size - name_count * name_entry_size - + FLD(compiled_code, name_table_offset))); + if (pat_patctl.jit != 0) + { + (void)pattern_info(PCRE2_INFO_JITSIZE, &size); + fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size); + } + } + +if ((pat_patctl.control & CTL_ANYINFO) != 0) + { + int rc = show_pattern_info(); + if (rc != PR_OK) return rc; + } + + +#ifdef FIXME + +/* If the '>' option was present, we write out the regex to a file, and +that is all. The first 8 bytes of the file are the regex length and then +the study length, in big-endian order. */ + +if (to_file != NULL) + { + FILE *f = fopen((char *)to_file, "wb"); + if (f == NULL) + { + fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno)); + } + else + { + uint8_t sbuf[8]; + +/* Extract the size for possible writing before possibly flipping it, +and remember the store that was got. */ + +true_size = REAL_PCRE_SIZE(re); + + + + + if (do_flip) regexflip(re, extra); + sbuf[0] = (uint8_t)((true_size >> 24) & 255); + sbuf[1] = (uint8_t)((true_size >> 16) & 255); + sbuf[2] = (uint8_t)((true_size >> 8) & 255); + sbuf[3] = (uint8_t)((true_size) & 255); + sbuf[4] = (uint8_t)((true_study_size >> 24) & 255); + sbuf[5] = (uint8_t)((true_study_size >> 16) & 255); + sbuf[6] = (uint8_t)((true_study_size >> 8) & 255); + sbuf[7] = (uint8_t)((true_study_size) & 255); + + if (fwrite(sbuf, 1, 8, f) < 8 || + fwrite(re, 1, true_size, f) < true_size) + { + fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno)); + } + else + { + fprintf(outfile, "Compiled pattern written to %s\n", to_file); + + /* If there is study data, write it. */ + + if (extra != NULL) + { + if (fwrite(extra->study_data, 1, true_study_size, f) < + true_study_size) + { + fprintf(outfile, "Write error on %s: %s\n", to_file, + strerror(errno)); + } + else fprintf(outfile, "Study data written to %s\n", to_file); + } + } + fclose(f); + } + + new_free(re); + if (extra != NULL) + { + PCRE_FREE_STUDY(extra); + } + if (locale_set) + { + new_free((void *)tables); + setlocale(LC_CTYPE, "C"); + locale_set = 0; + } + continue; /* With next regex */ + } + +#endif /* FIXME */ + + + + + +return PR_OK; +} + + + + +/************************************************* +* Process data line * +*************************************************/ + +/* The line is in buffer; it will not be empty. + +Arguments: none + +Returns: PR_OK continue processing next line + PR_SKIP skip to a blank line + PR_ABEND abort the pcre2test run +*/ + +static int +process_data(void) +{ +int len; +uint32_t c; +uint8_t *p; +uint8_t *bptr; +BOOL use_utf; + +#ifdef SUPPORT_PCRE8 +uint8_t *q8; +#endif +#ifdef SUPPORT_PCRE16 +uint16_t *q16; +#endif +#ifdef SUPPORT_PCRE32 +uint32_t *q32; +#endif + +DATCTXCPY(dat_context, default_dat_context); +memcpy(&dat_datctl, &def_datctl, sizeof(datctl)); + +use_utf = (FLD(compiled_code, compile_options) & PCRE2_UTF) != 0; + +len = strlen((const char *)buffer); +while (len > 0 && isspace(buffer[len-1])) len--; +buffer[len] = 0; + +p = buffer; +while (isspace(*p)) p++; + +/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create +invalid input to pcre2_exec, you must use \x?? or \x{} sequences. */ + +if (use_utf) + { + uint8_t *q; + uint32_t cc; + int n = 1; + for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc); + if (n <= 0) + { + fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input " + "in UTF mode\n"); + return PR_OK; + } + } + +#ifdef SUPPORT_VALGRIND +/* Mark the dbuffer as addressable but undefined again. */ +if (dbuffer != NULL) + { + VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE); + } +#endif + +/* Allocate a buffer to hold the data line; len+1 is an upper bound on +the number of pcre_uchar units that will be needed. */ + +while (dbuffer == NULL || (size_t)len >= dbuffer_size) + { + dbuffer_size *= 2; + dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size * CHAR_SIZE); + if (dbuffer == NULL) + { + fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size); + exit(1); + } + } + +#ifdef SUPPORT_PCRE8 +q8 = (uint8_t *) dbuffer; +#endif +#ifdef SUPPORT_PCRE16 +q16 = (uint16_t *) dbuffer; +#endif +#ifdef SUPPORT_PCRE32 +q32 = (uint32_t *) dbuffer; +#endif + +/* Scan the data line, interpreting data escapes, and put the result into a +buffer the appropriate width buffer. */ + +while ((c = *p++) != 0) + { + int i = 0; + + /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes. + In non-UTF mode, allow the value of the byte to fall through to later, + where values greater than 127 are turned into UTF-8 when running in + 16-bit or 32-bit mode. */ + + if (c != '\\') + { + if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); } + } + + /* Handle backslash escapes */ + + else switch ((c = *p++)) + { + case '\\': break; + case 'a': c = 7; break; + case 'b': c = '\b'; break; + case 'e': c = 27; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c -= '0'; + while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') + c = c * 8 + *p++ - '0'; + break; + + case 'o': + if (*p == '{') + { + uint8_t *pt = p; + c = 0; + for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++) + { + if (++i == 12) + fprintf(outfile, "** Too many octal digits in \\o{...} item; " + "using only the first twelve.\n"); + else c = c * 8 + *pt - '0'; + } + if (*pt == '}') p = pt + 1; + else fprintf(outfile, "** Missing } after \\o{ (assumed)\n"); + } + break; + + case 'x': + if (*p == '{') + { + uint8_t *pt = p; + c = 0; + + /* We used to have "while (isxdigit(*(++pt)))" here, but it fails + when isxdigit() is a macro that refers to its argument more than + once. This is banned by the C Standard, but apparently happens in at + least one MacOS environment. */ + + for (pt++; isxdigit(*pt); pt++) + { + if (++i == 9) + fprintf(outfile, "** Too many hex digits in \\x{...} item; " + "using only the first eight.\n"); + else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); + } + if (*pt == '}') + { + p = pt + 1; + break; + } + /* Not correct form for \x{...}; fall through */ + } + + /* \x without {} always defines just one byte in 8-bit mode. This + allows UTF-8 characters to be constructed byte by byte, and also allows + invalid UTF-8 sequences to be made. Just copy the byte in UTF mode. + Otherwise, pass it down to later code so that it can be turned into + UTF-8 when running in 16/32-bit mode. */ + + c = 0; + while (i++ < 2 && isxdigit(*p)) + { + c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); + p++; + } +#if defined SUPPORT_PCRE8 + if (use_utf && (test_mode == PCRE8_MODE)) + { + *q8++ = c; + continue; + } +#endif + break; + + case 0: /* \ followed by EOF allows for an empty line */ + p--; + continue; + + case '=': /* \= terminates the data, starts modifiers */ + goto ENDSTRING; + + default: + fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c); + break; + } + + /* We now have a character value in c that may be greater than 255. + In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater + than 127 in UTF mode must have come from \x{...} or octal constructs + because values from \x.. get this far only in non-UTF mode. */ + +#ifdef SUPPORT_PCRE8 + if (test_mode == PCRE8_MODE) + { +#ifndef NOUTF + if (use_utf) + { + if (c > 0x7fffffff) + { + fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff " + "and so cannot be converted to UTF-8\n", c); + return PR_OK; + } + q8 += ord2utf8(c, q8); + } + else +#endif + { + if (c > 0xffu) + { + fprintf(outfile, "** Character \\x{%x} is greater than 255 " + "and UTF-8 mode is not enabled.\n", c); + fprintf(outfile, "** Truncation will probably give the wrong " + "result.\n"); + } + *q8++ = c; + } + } +#endif +#ifdef SUPPORT_PCRE16 + if (test_mode == PCRE16_MODE) + { +#ifndef NOUTF + if (use_utf) + { + if (c > 0x10ffffu) + { + fprintf(outfile, "** Failed: character \\x{%x} is greater than " + "0x10ffff and so cannot be converted to UTF-16\n", c); + return PR_OK; + } + else if (c >= 0x10000u) + { + c-= 0x10000u; + *q16++ = 0xD800 | (c >> 10); + *q16++ = 0xDC00 | (c & 0x3ff); + } + else + *q16++ = c; + } + else +#endif + { + if (c > 0xffffu) + { + fprintf(outfile, "** Character \\x{%x} is greater than 0xffff " + "and UTF-16 mode is not enabled.\n", c); + fprintf(outfile, "** Truncation will probably give the wrong " + "result.\n"); + } + + *q16++ = c; + } + } +#endif +#ifdef SUPPORT_PCRE32 + if (test_mode == PCRE32_MODE) + { + *q32++ = c; + } +#endif + } + +ENDSTRING: + +/* Reached end of subject string */ + +#ifdef SUPPORT_PCRE8 +if (test_mode == PCRE8_MODE) + { + *q8 = 0; + len = (int)(q8 - (uint8_t *)dbuffer); + } +#endif +#ifdef SUPPORT_PCRE16 +if (test_mode == PCRE16_MODE) + { + *q16 = 0; + len = (int)(q16 - (uint16_t *)dbuffer); + } +#endif +#ifdef SUPPORT_PCRE32 +if (test_mode == PCRE32_MODE) + { + *q32 = 0; + len = (int)(q32 - (uint32_t *)dbuffer); + } +#endif + +/* If we're compiling with explicit valgrind support, Mark the data from after +its end to the end of the buffer as unaddressable, so that a read over the end +of the buffer will be seen by valgrind, even if it doesn't cause a crash. If +we're not building with valgrind support, at least move the data to the end of +the buffer so that it might at least cause a crash. If we are using the POSIX +interface, we must include the terminating zero. */ + +bptr = dbuffer; + +if ((dat_datctl.control & CTL_POSIX) != 0) + { +#ifdef SUPPORT_VALGRIND + VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1)); +#else + memmove(bptr + dbuffer_size - len - 1, bptr, len + 1); + bptr += dbuffer_size - len - 1; +#endif + } +else + { +#ifdef SUPPORT_VALGRIND + VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE); +#else + bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE); +#endif + } + + +/* FIXME */ +(void)bptr; + +#ifdef FIXME +if ((all_use_dfa || use_dfa) && find_match_limit) + { + printf("**Match limit not relevant for DFA matching: ignored\n"); + find_match_limit = 0; + } +#endif + +/* If the string was terminated by \= we must now interpret modifiers. */ + +if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl)) + return PR_OK; + +/* Now run the pattern match. */ + +/* FIXME */ + + + + +return PR_OK; +} + + + + +/************************************************* +* Print PCRE version * +*************************************************/ + +static void +print_version(FILE *f) +{ +VERSION_TYPE *vp; +fprintf(f, "PCRE version "); +for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp); +fprintf(outfile, "\n"); +} + + + +/************************************************* +* Print newline configuration * +*************************************************/ + +/* Output is always to stdout. + +Arguments: + rc the return code from PCRE_CONFIG_NEWLINE + isc TRUE if called from "-C newline" +Returns: nothing +*/ + +static void +print_newline_config(unsigned int rc, BOOL isc) +{ +if (!isc) printf(" Newline sequence is "); +if (rc < sizeof(newlines)/sizeof(char *)) + printf("%s\n", newlines[rc]); +else + printf("a non-standard value: %d\n", rc); +} + + + +/************************************************* +* Usage function * +*************************************************/ + +static void +usage(void) +{ +printf("Usage: pcre2test [options] [ []]\n\n"); +printf("Input and output default to stdin and stdout.\n"); +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) +printf("If input is a terminal, readline() is used to read from it.\n"); +#else +printf("This version of pcre2test is not linked with readline().\n"); +#endif +printf("\nOptions:\n"); +#ifdef SUPPORT_PCRE8 +printf(" -8 use the 8-bit library\n"); +#endif +#ifdef SUPPORT_PCRE16 +printf(" -16 use the 16-bit library\n"); +#endif +#ifdef SUPPORT_PCRE32 +printf(" -32 use the 32-bit library\n"); +#endif +printf(" -C show PCRE2 compile-time options and exit\n"); +printf(" -C arg show a specific compile-time option and exit\n"); +printf(" with its value if numeric (else 0). The arg can be:\n"); +printf(" linksize internal link size [2, 3, 4]\n"); +printf(" pcre8 8 bit library support enabled [0, 1]\n"); +printf(" pcre16 16 bit library support enabled [0, 1]\n"); +printf(" pcre32 32 bit library support enabled [0, 1]\n"); +printf(" utf Unicode Transformation Format supported [0, 1]\n"); +printf(" jit Just-in-time compiler supported [0, 1]\n"); +printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n"); +printf(" bsr \\R type [ANYCRLF, ANY]\n"); +printf(" -data set default data control fields\n"); +printf(" -help show usage information\n"); +printf(" -q quiet: do not output PCRE version number at start\n"); +printf(" -pattern set default pattern control fields\n"); +printf(" -S set stack size to megabytes\n"); +printf(" -t [] time compilation and execution, repeating times\n"); +printf(" -tm [] time execution (matching) only, repeating times\n"); +printf(" -T same as -t, but show total times at the end\n"); +printf(" -TM same as -tm, but show total time at the end\n"); +} + + + +/************************************************* +* Handle -C option * +*************************************************/ + +/* This option outputs configuration options and sets an appropriate return +code when asked for a single option. The code is abstracted into a separate +function because of its size. Use whichever pcre2_config() function is +available. + +Argument: an option name or NULL +Returns: the return code +*/ + +static int +c_option(const char *arg) +{ +unsigned long int lrc; +int rc; +int yield = 0; + +if (arg != NULL) + { + unsigned int i; + + for (i = 0; i < COPTLISTCOUNT; i++) + if (strcmp(arg, coptlist[i].name) == 0) break; + + if (i >= COPTLISTCOUNT) + { + fprintf(stderr, "** Unknown -C option '%s'\n", arg); + return -1; + } + + switch (coptlist[i].type) + { + case CONF_BSR: + (void)PCRE2_CONFIG(coptlist[i].value, &rc); + printf("%s\n", rc? "ANYCRLF" : "ANY"); + break; + + case CONF_FIX: + yield = coptlist[i].value; + printf("%d\n", yield); + break; + + case CONF_FIZ: + rc = coptlist[i].value; + printf("%d\n", rc); + break; + + case CONF_INT: + (void)PCRE2_CONFIG(coptlist[i].value, &yield); + printf("%d\n", yield); + break; + + case CONF_NL: + (void)PCRE2_CONFIG(coptlist[i].value, &rc); + print_newline_config(rc, TRUE); + break; + } + +/* For VMS, return the value by setting a symbol, for certain values only. */ + +#ifdef __VMS + if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT) + { + char ucname[16]; + strcpy(ucname, coptlist[i].name); + for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]; + vms_setsymbol(ucname, 0, rc); + } +#endif + + return yield; + } + +/* No argument for -C: output all configuration information. */ + +print_version(stdout); +printf("\nCompiled with\n"); + +#ifdef EBCDIC +printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF); +#endif + +#ifdef SUPPORT_PCRE8 +printf(" 8-bit support\n"); +#endif +#ifdef SUPPORT_PCRE16 +printf(" 16-bit support\n"); +#endif +#ifdef SUPPORT_PCRE32 +printf(" 32-bit support\n"); +#endif + +(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc); +printf (" %sUTF support\n", rc ? "" : "No "); +(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc); +if (rc != 0) + { + const char *arch; + (void)PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, (void *)(&arch)); + printf(" Just-in-time compiler support: %s\n", arch); + } +else + { + printf(" No just-in-time compiler support\n"); + } +(void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &rc); +print_newline_config(rc, FALSE); +(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &rc); +printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" : + "all Unicode newlines"); +(void)PCRE2_CONFIG(PCRE2_CONFIG_LINK_SIZE, &rc); +printf(" Internal link size = %d\n", rc); +(void)PCRE2_CONFIG(PCRE2_CONFIG_POSIX_MALLOC_THRESHOLD, &rc); +printf(" POSIX malloc threshold = %d\n", rc); +(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENS_LIMIT, &lrc); +printf(" Parentheses nest limit = %ld\n", lrc); +(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCH_LIMIT, &lrc); +printf(" Default match limit = %ld\n", lrc); +(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCH_LIMIT_RECURSION, &lrc); +printf(" Default recursion depth limit = %ld\n", lrc); +(void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &rc); +printf(" Match recursion uses %s", rc? "stack" : "heap"); + +#ifdef FIXME +if (showstore) + { + PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0); + printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size); + } +#endif + +printf("\n"); +return 0; +} + + + +/************************************************* +* Main Program * +*************************************************/ + +int +main(int argc, char **argv) +{ +uint32_t yield = 0; +uint32_t op = 1; +uint32_t stack_size; +BOOL notdone = TRUE; +BOOL quiet = FALSE; +BOOL showtotaltimes = FALSE; +BOOL skipping = FALSE; +char *arg_data = NULL; +char *arg_pattern = NULL; + +PCRE2_JIT_STACK *jit_stack = NULL; + +/* Get the PCRE version number. */ + +PCRE2_VERSION(version, VERSION_SIZE); + +/* Get buffers from malloc() so that valgrind will check their misuse when +debugging. They grow automatically when very long lines are read. The 16- +and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */ + +buffer = (uint8_t *)malloc(buffer_size); +pbuffer = pbuffer8 = (uint8_t *)malloc(buffer_size); + +/* The following _setmode() stuff is some Windows magic that tells its runtime +library to translate CRLF into a single LF character. At least, that's what +I've been told: never having used Windows I take this all on trust. Originally +it set 0x8000, but then I was advised that _O_BINARY was better. */ + +#if defined(_WIN32) || defined(WIN32) +_setmode( _fileno( stdout ), _O_BINARY ); +#endif + +/* Initialization that does not depend on the running mode. */ + +memset(&def_patctl, sizeof(patctl), 0); +memset(&def_datctl, sizeof(datctl), 0); + +/* Scan command line options. */ + +while (argc > 1 && argv[op][0] == '-') + { + const char *endptr; + char *arg = argv[op]; + + /* Display and/or set return code for configuration options. */ + + if (strcmp(arg, "-C") == 0) + { + yield = c_option(argv[op + 1]); + goto EXIT; + } + + /* Select operating mode */ + + if (strcmp(arg, "-8") == 0) + { +#ifdef SUPPORT_PCRE8 + test_mode = PCRE8_MODE; +#else + fprintf(stderr, + "** This version of PCRE was built without 8-bit support\n"); + exit(1); +#endif + } + else if (strcmp(arg, "-16") == 0) + { +#ifdef SUPPORT_PCRE16 + test_mode = PCRE16_MODE; +#else + fprintf(stderr, + "** This version of PCRE was built without 16-bit support\n"); + exit(1); +#endif + } + else if (strcmp(arg, "-32") == 0) + { +#ifdef SUPPORT_PCRE32 + test_mode = PCRE32_MODE; +#else + fprintf(stderr, + "** This version of PCRE was built without 32-bit support\n"); + exit(1); +#endif + } + + /* Set quiet (no version verification) */ + + else if (strcmp(arg, "-q") == 0) quiet = TRUE; + + /* Set system stack size */ + + else if (strcmp(arg, "-S") == 0 && argc > 2 && + ((stack_size = get_value(argv[op+1], &endptr)), *endptr == 0)) + { +#if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS) + fprintf(stderr, "PCRE: -S is not supported on this OS\n"); + exit(1); +#else + int rc; + struct rlimit rlim; + getrlimit(RLIMIT_STACK, &rlim); + rlim.rlim_cur = stack_size * 1024 * 1024; + rc = setrlimit(RLIMIT_STACK, &rlim); + if (rc != 0) + { + fprintf(stderr, "PCRE: setrlimit() failed with error %d\n", rc); + exit(1); + } + op++; + argc--; +#endif + } + + /* Set timing parameters */ + + else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 || + strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0) + { + int temp; + int both = arg[2] == 0; + showtotaltimes = arg[1] == 'T'; + if (argc > 2 && (temp = get_value(argv[op+1], &endptr), *endptr == 0)) + { + timeitm = temp; + op++; + argc--; + } + else timeitm = LOOPREPEAT; + if (both) timeit = timeitm; + } + + /* Give help */ + + else if (strcmp(arg, "-help") == 0 || + strcmp(arg, "--help") == 0) + { + usage(); + goto EXIT; + } + + /* The following options save their data for processing once we know what + the running mode is. */ + + else if (strcmp(arg, "-data") == 0) + { + arg_data = argv[op+1]; + goto CHECK_VALUE_EXISTS; + } + + else if (strcmp(arg, "-pattern") == 0) + { + arg_pattern = argv[op+1]; + CHECK_VALUE_EXISTS: + if (argc <= 2) + { + fprintf(stderr, "** Missing value for %s\n", arg); + yield = 1; + goto EXIT; + } + op++; + argc--; + } + + /* Unrecognized option */ + + else + { + fprintf(stderr, "** Unknown or malformed option '%s'\n", arg); + usage(); + yield = 1; + goto EXIT; + } + op++; + argc--; + } + +/* Initialize things that cannot be done until we know which test mode we are +running in. */ + +#ifdef SUPPORT_PCRE8 +if (test_mode == PCRE8_MODE) + { + default_pat_context8 = pcre2_compile_context_create_8(NULL); + pat_context8 = pcre2_compile_context_create_8(NULL); + default_dat_context8 = pcre2_match_context_create_8(NULL); + dat_context8 = pcre2_match_context_create_8(NULL); + } +#endif + +#ifdef SUPPORT_PCRE16 +if (test_mode == PCRE16_MODE) + { + default_pat_context16 = pcre2_compile_context_create_16(NULL); + pat_context16 = pcre2_compile_context_create_16(NULL); + default_dat_context16 = pcre2_match_context_create_16(NULL); + dat_context16 = pcre2_match_context_create_16(NULL); + } +#endif + +#ifdef SUPPORT_PCRE32 +if (test_mode == PCRE32_MODE) + { + default_pat_context32 = pcre2_compile_context_create_32(NULL); + pat_context32 = pcre2_compile_context_create_32(NULL); + default_dat_context32 = pcre2_match_context_create_32(NULL); + dat_context32 = pcre2_match_context_create_32(NULL); + } +#endif + +/* Handle command line modifier settings, sending any error messages to +stderr. We need to know the mode before modifying the context, and it is tidier +to do them all in the same way. */ + +outfile = stderr; +if ((arg_pattern != NULL && + !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) || + (arg_data != NULL && + !decode_modifiers((uint8_t *)arg_data, CTX_DEFDAT, NULL, &def_datctl))) + { + yield = 1; + goto EXIT; + } + +/* Sort out the input and output files, defaulting to stdin/stdout. */ + +infile = stdin; +outfile = stdout; + +if (argc > 1) + { + infile = fopen(argv[op], INPUT_MODE); + if (infile == NULL) + { + printf("** Failed to open %s\n", argv[op]); + yield = 1; + goto EXIT; + } + } + +if (argc > 2) + { + outfile = fopen(argv[op+1], OUTPUT_MODE); + if (outfile == NULL) + { + printf("** Failed to open %s\n", argv[op+1]); + yield = 1; + goto EXIT; + } + } + +/* Output a heading line unless quiet, then process input lines. */ + +if (!quiet) print_version(outfile); + +while (notdone) + { + uint8_t *p; + int rc = PR_OK; + BOOL expectdata = TEST(compiled_code, !=, NULL) || preg.re_pcre2_code != NULL; + + if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL) + break; + if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); + fflush(outfile); + p = buffer; + + /* If we have a pattern set up for testing, or we are skipping after a + compile failure, a blank line terminates this test; otherwise process the + line as a data line. */ + + if (expectdata || skipping) + { + while (isspace(*p)) p++; + if (*p == 0) + { + if (preg.re_pcre2_code != NULL) + { + regfree(&preg); + preg.re_pcre2_code = NULL; + } + else + { + SUB1(pcre2_code_free, compiled_code); + SET(compiled_code, NULL); + } + skipping = FALSE; + } + else if (!skipping) rc = process_data(); + } + + /* We do not have a pattern set up for testing. Lines starting with # are + either comments or special commands. Blank lines are ignored. Otherwise, the + line must start with a valid delimiter. It is then processed as a pattern + line. */ + + else if (*p == '#') + { + if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue; + rc = process_command(); + } + + else if (strchr("\"/!'`-+=:;.,", *p) != NULL) + { + rc = process_pattern(); + } + + else + { + while (isspace(*p)) p++; + if (*p != 0) + { + fprintf(stderr, "** Invalid pattern delimiter '%c'.\n", *buffer); + rc = PR_SKIP; + } + } + + if (rc == PR_SKIP && infile != stdin) skipping = TRUE; + else if (rc == PR_ABEND) goto EXIT; + } + +/* Finish off a normal run. */ + +if (infile == stdin) fprintf(outfile, "\n"); + +if (showtotaltimes) + { + fprintf(outfile, "--------------------------------------\n"); + if (timeit > 0) + { + fprintf(outfile, "Total compile time %.4f milliseconds\n", + (((double)total_compile_time * 1000.0) / (double)timeit) / + (double)CLOCKS_PER_SEC); + } + fprintf(outfile, "Total execute time %.4f milliseconds\n", + (((double)total_match_time * 1000.0) / (double)timeitm) / + (double)CLOCKS_PER_SEC); + } + + +EXIT: + +if (infile != NULL && infile != stdin) fclose(infile); +if (outfile != NULL && outfile != stdout) fclose(outfile); + +free(buffer); +free(dbuffer); +free(pbuffer); + +#ifdef SUPPORT_PCRE8 +if (pat_context8 != NULL) pcre2_compile_context_free_8(pat_context8); +if (default_pat_context8 != NULL) + pcre2_compile_context_free_8(default_pat_context8); +if (dat_context8 != NULL) pcre2_match_context_free_8(dat_context8); +if (default_dat_context8 != NULL) + pcre2_match_context_free_8(default_dat_context8); +#endif + +#ifdef SUPPORT_PCRE16 +if (pbuffer16 != NULL) free(pbuffer16); +if (pat_context16 != NULL) pcre2_compile_context_free_16(pat_context16); +if (default_pat_context16 != NULL) + pcre2_compile_context_free_16(default_pat_context16); +if (dat_context16 != NULL) pcre2_match_context_free_16(dat_context16); +if (default_dat_context16 != NULL) + pcre2_match_context_free_16(default_dat_context16); +#endif + +#ifdef SUPPORT_PCRE32 +if (pbuffer32 != NULL) free(pbuffer32); +if (pat_context32 != NULL) pcre2_compile_context_free_32(pat_context32); +if (default_pat_context32 != NULL) + pcre2_compile_context_free_32(default_pat_context32); +if (dat_context32 != NULL) pcre2_match_context_free_32(dat_context32); +if (default_dat_context32 != NULL) + pcre2_match_context_free_32(default_dat_context32); +#endif + +#if defined(__VMS) + yield = SS$_NORMAL; /* Return values via DCL symbols */ +#endif + +/* FIXME: temp avoid compiler warnings. */ + +(void)jit_stack; + +return yield; +} + +/* End of pcre2test.c */