Experimental pattern conversion code (no public documentation yet).

This commit is contained in:
Philip.Hazel 2017-05-06 16:19:39 +00:00
parent aa224e4ac9
commit 6e943e5f47
15 changed files with 1871 additions and 28 deletions

View File

@ -330,6 +330,7 @@ COMMON_SOURCES = \
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
src/pcre2_convert.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
src/pcre2_find_bracket.c \
@ -621,6 +622,8 @@ EXTRA_DIST += \
testdata/testinput21 \
testdata/testinput22 \
testdata/testinput23 \
testdata/testinput24 \
testdata/testinput25 \
testdata/testinputEBC \
testdata/testoutput1 \
testdata/testoutput2 \
@ -661,6 +664,8 @@ EXTRA_DIST += \
testdata/testoutput22-32 \
testdata/testoutput22-8 \
testdata/testoutput23 \
testdata/testoutput24 \
testdata/testoutput25 \
testdata/testoutputEBC \
testdata/valgrind-jit.supp \
testdata/wintestinput3 \

37
RunTest
View File

@ -78,7 +78,9 @@ title20="Test 20: Serialization and code copy tests"
title21="Test 21: \C tests without UTF (supported for DFA matching)"
title22="Test 22: \C tests with UTF (not supported for DFA matching)"
title23="Test 23: \C disabled test"
maxtest=23
title24="Test 24: Non-UTF pattern conversion tests"
title25="Test 25: UTF pattern conversion tests"
maxtest=25
if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title0
@ -105,6 +107,8 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title21
echo $title22
echo $title23
echo $title24
echo $title25
exit 0
fi
@ -232,6 +236,8 @@ do20=no
do21=no
do22=no
do23=no
do24=no
do25=no
while [ $# -gt 0 ] ; do
case $1 in
@ -259,6 +265,8 @@ while [ $# -gt 0 ] ; do
21) do21=yes;;
22) do22=yes;;
23) do23=yes;;
24) do24=yes;;
25) do25=yes;;
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
@ -407,7 +415,8 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no \
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
$do24 = no -a $do25 = no \
]; then
do0=yes
do1=yes
@ -433,6 +442,8 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
do21=yes
do22=yes
do23=yes
do24=yes
do25=yes
fi
# Handle any explicit skips at this stage, so that an argument list may consist
@ -489,7 +500,7 @@ for bmode in "$test8" "$test16" "$test32"; do
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -64,-62,-2,-1,0,100,188,189,190,191 >>testtry
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,188,189,190,191 >>testtry
checkresult $? 2 "$opt"
fi
done
@ -824,6 +835,26 @@ for bmode in "$test8" "$test16" "$test32"; do
fi
fi
# Non-UTF pattern conversion tests
if [ "$do24" = yes ] ; then
echo $title24
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput24 testtry
checkresult $? 24 ""
fi
# UTF pattern converson tests
if [ "$do25" = yes ] ; then
echo $title25
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput25 testtry
checkresult $? 25 ""
fi
fi
# End of loop for 8/16/32-bit tests
done

View File

@ -176,6 +176,15 @@ ignored for pcre2_jit_match(). */
#define PCRE2_NO_JIT 0x00002000u
/* Options for pcre2_pattern_convert(). */
#define PCRE2_CONVERT_UTF 0x00000001u
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
#define PCRE2_CONVERT_GLOB_BASIC 0x00000004u
#define PCRE2_CONVERT_GLOB_BASH 0x00000008u
#define PCRE2_CONVERT_POSIX_BASIC 0x00000010u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000020u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be
greater than zero. */
@ -270,6 +279,8 @@ numbers must not be changed. */
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
/* Request types for pcre2_pattern_info() */
@ -351,6 +362,9 @@ typedef struct pcre2_real_compile_context pcre2_compile_context; \
struct pcre2_real_match_context; \
typedef struct pcre2_real_match_context pcre2_match_context; \
\
struct pcre2_real_convert_context; \
typedef struct pcre2_real_convert_context pcre2_convert_context; \
\
struct pcre2_real_code; \
typedef struct pcre2_real_code pcre2_code; \
\
@ -466,6 +480,16 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_recursion_memory_management(pcre2_match_context *, \
void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_copy(pcre2_convert_context *); \
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
/* Functions concerned with compiling a pattern to PCRE internal code. */
@ -572,6 +596,16 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
/* Functions for converting pattern source strings. */
#define PCRE2_CONVERT_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
PCRE2_SIZE *, pcre2_convert_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_converted_pattern_free(PCRE2_UCHAR *);
/* Functions for JIT processing */
#define PCRE2_JIT_FUNCTIONS \
@ -623,6 +657,7 @@ pcre2_compile are called by application code. */
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
@ -634,6 +669,7 @@ pcre2_compile are called by application code. */
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
@ -649,6 +685,10 @@ pcre2_compile are called by application code. */
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
@ -672,6 +712,7 @@ pcre2_compile are called by application code. */
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
@ -682,6 +723,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
@ -716,6 +758,8 @@ PCRE2_STRUCTURE_LIST \
PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
PCRE2_CONVERT_FUNCTIONS \
PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_FUNCTIONS \
PCRE2_PATTERN_INFO_FUNCTIONS \
@ -745,6 +789,7 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_GENERAL_INFO_FUNCTIONS
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_FUNCTIONS
#undef PCRE2_PATTERN_INFO_FUNCTIONS

View File

@ -176,6 +176,15 @@ ignored for pcre2_jit_match(). */
#define PCRE2_NO_JIT 0x00002000u
/* Options for pcre2_pattern_convert(). */
#define PCRE2_CONVERT_UTF 0x00000001u
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
#define PCRE2_CONVERT_GLOB_BASIC 0x00000004u
#define PCRE2_CONVERT_GLOB_BASH 0x00000008u
#define PCRE2_CONVERT_POSIX_BASIC 0x00000010u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000020u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be
greater than zero. */
@ -270,6 +279,8 @@ numbers must not be changed. */
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
/* Request types for pcre2_pattern_info() */
@ -351,6 +362,9 @@ typedef struct pcre2_real_compile_context pcre2_compile_context; \
struct pcre2_real_match_context; \
typedef struct pcre2_real_match_context pcre2_match_context; \
\
struct pcre2_real_convert_context; \
typedef struct pcre2_real_convert_context pcre2_convert_context; \
\
struct pcre2_real_code; \
typedef struct pcre2_real_code pcre2_code; \
\
@ -466,6 +480,16 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_recursion_memory_management(pcre2_match_context *, \
void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_copy(pcre2_convert_context *); \
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
/* Functions concerned with compiling a pattern to PCRE internal code. */
@ -572,6 +596,16 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
/* Functions for converting pattern source strings. */
#define PCRE2_CONVERT_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
PCRE2_SIZE *, pcre2_convert_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_converted_pattern_free(PCRE2_UCHAR *);
/* Functions for JIT processing */
#define PCRE2_JIT_FUNCTIONS \
@ -623,6 +657,7 @@ pcre2_compile are called by application code. */
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
@ -634,6 +669,7 @@ pcre2_compile are called by application code. */
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
@ -649,6 +685,10 @@ pcre2_compile are called by application code. */
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
@ -672,6 +712,7 @@ pcre2_compile are called by application code. */
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
@ -682,6 +723,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
@ -716,6 +758,8 @@ PCRE2_STRUCTURE_LIST \
PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
PCRE2_CONVERT_FUNCTIONS \
PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_FUNCTIONS \
PCRE2_PATTERN_INFO_FUNCTIONS \
@ -745,6 +789,7 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_GENERAL_INFO_FUNCTIONS
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_FUNCTIONS
#undef PCRE2_PATTERN_INFO_FUNCTIONS

View File

@ -188,6 +188,34 @@ return mcontext;
}
/* A default covert context is set up to save having to initialize at run time
when no context is supplied to the convert function. */
const pcre2_convert_context PRIV(default_convert_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */
#ifdef _WIN32
CHAR_BACKSLASH /* Default path separator */
#else /* is OS dependent */
CHAR_SLASH /* Not Windows */
#endif
};
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
pcre2_convert_context_create(pcre2_general_context *gcontext)
{
pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL;
*ccontext = PRIV(default_convert_context);
if (gcontext != NULL)
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
return ccontext;
}
/*************************************************
* Context copy functions *
*************************************************/
@ -229,11 +257,22 @@ return new;
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
pcre2_convert_context_copy(pcre2_convert_context *ccontext)
{
pcre2_convert_context *new =
ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
ccontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, ccontext, sizeof(pcre2_real_convert_context));
return new;
}
/*************************************************
* Context free functions *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_general_context_free(pcre2_general_context *gcontext)
{
@ -258,6 +297,12 @@ if (mcontext != NULL)
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_convert_context_free(pcre2_convert_context *ccontext)
{
if (ccontext != NULL)
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
}
/*************************************************
@ -269,7 +314,7 @@ data is given. Only some of the functions are able to test the validity of the
data. */
/* ------------ Compile contexts ------------ */
/* ------------ Compile context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext,
@ -336,7 +381,7 @@ return 0;
}
/* ------------ Match contexts ------------ */
/* ------------ Match context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
@ -390,4 +435,16 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
return 0;
}
/* ------------ Convert context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
{
if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
ccontext->glob_separator = separator;
return 0;
}
/* End of pcre2_context.c */

721
src/pcre2_convert.c Normal file
View File

@ -0,0 +1,721 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB_BASIC|PCRE2_CONVERT_GLOB_BASH| \
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
TYPE_OPTIONS)
#define DUMMY_BUFFER_SIZE 100
/* Some pcre2_compile() error numbers are used herein. */
#define ERROR_END_BACKSLASH 101
#define ERROR_MISSING_SQUARE_BRACKET 106
#define ERROR_NO_UNICODE 132
/* Generated pattern fragments */
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
#define STR_BACKSLASH_z STR_BACKSLASH STR_z
#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
/* States for range and POSIX class processing */
enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED };
enum { POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
/* Macro to add a character string to the output buffer, checking for overflow. */
#define PUTCHARS(string) \
{ \
for (s = (char *)(string); *s != 0; s++) \
{ \
if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
*p++ = *s; \
} \
}
/*************************************************
* Convert a POSIX pattern *
*************************************************/
/* This function handles both basic and extended POSIX patterns.
Arguments:
pattype the pattern type
pattern the pattern
plength length in code units
utf TRUE if UTF
use_buffer where to put the output
use_length length of use_buffer
bufflenptr where to put the used length
dummyrun TRUE if a dummy run
ccontext the convert context
Returns: 0 => success
!0 => error code
*/
static int
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
{
char *s;
PCRE2_SPTR posix = pattern;
PCRE2_UCHAR *p = use_buffer;
PCRE2_UCHAR *pp = p;
PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
PCRE2_SIZE convlength = 0;
uint32_t posix_class_state = POSIX_CLASS_NOT_STARTED;
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
BOOL inclass = FALSE;
BOOL nextisliteral = FALSE;
(void)utf; /* Not used when Unicode not supported */
(void)ccontext; /* Not currently used */
/* Initialize default for error offset as end of input. */
*bufflenptr = plength;
/* Now scan the input */
while (plength > 0)
{
uint32_t c, sc;
int clength = 1;
/* Add in the length of the last item, then, if in the dummy run, pull the
pointer back to the start of the (temporary) buffer and then remember the
start of the next item. */
convlength += p - pp;
if (dummyrun) p = use_buffer;
pp = p;
/* Pick up the next character */
#ifndef SUPPORT_UNICODE
c = *posix;
#else
GETCHARLENTEST(c, posix, clength);
#endif
posix += clength;
plength -= clength;
sc = nextisliteral? 0 : c;
nextisliteral = FALSE;
/* Handle a character within a class. */
if (inclass)
{
if (c == CHAR_RIGHT_SQUARE_BRACKET)
{
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
inclass = FALSE;
}
/* Not the end of the class */
else
{
switch (posix_class_state)
{
case POSIX_CLASS_STARTED:
if (c <= 127 && islower(c)) break; /* Remain in started state */
posix_class_state = POSIX_CLASS_NOT_STARTED;
if (c == CHAR_COLON && plength > 0 &&
*posix == CHAR_RIGHT_SQUARE_BRACKET)
{
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
plength--;
posix++;
continue; /* With next character after :] */
}
/* Fall through */
case POSIX_CLASS_NOT_STARTED:
if (c == CHAR_LEFT_SQUARE_BRACKET)
posix_class_state = POSIX_CLASS_STARTING;
break;
case POSIX_CLASS_STARTING:
if (c == CHAR_COLON) posix_class_state = POSIX_CLASS_STARTED;
break;
}
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix - clength, CU2BYTES(clength));
p += clength;
}
}
/* Handle a character not within a class. */
else switch(sc)
{
case CHAR_LEFT_SQUARE_BRACKET:
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
/* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */
if (plength >= 6)
{
if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
posix[1] == CHAR_COLON &&
(posix[2] == CHAR_LESS_THAN_SIGN ||
posix[2] == CHAR_GREATER_THAN_SIGN) &&
posix[3] == CHAR_COLON &&
posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
{
if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix, CU2BYTES(6));
p += 6;
posix += 6;
plength -= 6;
continue; /* With next character */
}
}
/* Handle "normal" character classes */
posix_class_state = POSIX_CLASS_NOT_STARTED;
inclass = TRUE;
/* Handle ^ and ] as first characters */
if (plength > 0)
{
if (*posix == CHAR_CIRCUMFLEX_ACCENT)
{
posix++;
plength--;
PUTCHARS(STR_CIRCUMFLEX_ACCENT);
}
if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
{
posix++;
plength--;
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
}
}
break;
case CHAR_BACKSLASH:
if (plength <= 0) return ERROR_END_BACKSLASH;
if (!extended && *posix < 127 && strchr("?+|()0123456789", *posix) != NULL)
{
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
*p++ = *posix++;
plength--;
}
else nextisliteral = TRUE;
break;
case CHAR_QUESTION_MARK:
case CHAR_PLUS:
case CHAR_LEFT_CURLY_BRACKET:
case CHAR_RIGHT_CURLY_BRACKET:
case CHAR_VERTICAL_LINE:
case CHAR_LEFT_PARENTHESIS:
case CHAR_RIGHT_PARENTHESIS:
if (!extended) PUTCHARS(STR_BACKSLASH);
/* Fall through */
case CHAR_ASTERISK:
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
*p++ = sc;
break;
default:
if (c < 256 && strchr("\\{}?*+[]()|.^$", c) != NULL)
{
PUTCHARS(STR_BACKSLASH);
}
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix - clength, CU2BYTES(clength));
p += clength;
break;
}
}
if (inclass) return ERROR_MISSING_SQUARE_BRACKET;
convlength += p - pp; /* Final segment */
*bufflenptr = convlength;
*p++ = 0;
return 0;
}
/*************************************************
* Convert a glob pattern *
*************************************************/
/* For a basic glob, only * ? and [...] are recognized.
Arguments:
pattype the pattern type
pattern the pattern
plength length in code units
utf TRUE if UTF
use_buffer where to put the output
use_length length of use_buffer
bufflenptr where to put the used length
dummyrun TRUE if a dummy run
ccontext the convert context
Returns: 0 => success
!0 => error code
*/
static int
convert_glob(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
{
char *s;
char not_sep_class[8];
char lookbehind_not_sep[12];
PCRE2_SPTR glob = pattern;
PCRE2_UCHAR *p = use_buffer;
PCRE2_UCHAR *pp = p;
PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
PCRE2_SIZE convlength = 0;
uint32_t range_start = 0;
uint32_t range_state = RANGE_NOT_STARTED;
uint32_t posix_class_state = POSIX_CLASS_NOT_STARTED;
BOOL inclass = FALSE;
BOOL nextisliteral = FALSE;
BOOL endswith = FALSE;
BOOL sep_in_range = FALSE;
(void)utf; /* Not used when Unicode not supported */
(void)pattype; /* Pro tem */
/* Set up a string containing [^<sep>] where <sep> is the possibly escaped part
separator. */
s = not_sep_class;
*s++ = CHAR_LEFT_SQUARE_BRACKET;
*s++ = CHAR_CIRCUMFLEX_ACCENT;
if (ccontext->glob_separator == CHAR_BACKSLASH) *s++ = CHAR_BACKSLASH;
*s++ = ccontext->glob_separator;
*s++ = CHAR_RIGHT_SQUARE_BRACKET;
*s++ = 0;
/* Set up a string containing (?<!<sep>) where <sep> is the possibly escaped
part separator. */
s = lookbehind_not_sep;
*s++ = CHAR_LEFT_PARENTHESIS;
*s++ = CHAR_QUESTION_MARK;
*s++ = CHAR_LESS_THAN_SIGN;
*s++ = CHAR_EXCLAMATION_MARK;
if (ccontext->glob_separator == CHAR_BACKSLASH) *s++ = CHAR_BACKSLASH;
*s++ = ccontext->glob_separator;
*s++ = CHAR_RIGHT_PARENTHESIS;
*s++ = 0;
/* Initialize default for error offset as end of input. */
*bufflenptr = plength;
/* If the pattern starts with * and contains at least one more character but no
other asterisks or part separators, it means "ends with what follows". This can
be optimized. */
if (plength > 1 && *glob == CHAR_ASTERISK)
{
PCRE2_SPTR pt;
for (pt = glob + plength - 1; pt > glob; pt--)
if (*pt == ccontext->glob_separator || *pt == CHAR_ASTERISK) break;
endswith = pt == glob;
if (endswith) PUTCHARS(STR_QUERY_s);
}
/* Output starts with \A and ends with \z and a binary zero. */
PUTCHARS(STR_BACKSLASH_A);
/* If the pattern starts with a wildcard, it must not match a subject that
starts with a dot. */
if (plength > 1 &&
(*glob == CHAR_ASTERISK || *glob == CHAR_QUESTION_MARK ||
*glob == CHAR_LEFT_SQUARE_BRACKET))
PUTCHARS(STR_LOOKAHEAD_NOT_DOT);
/* Now scan the input */
while (plength > 0)
{
uint32_t c, sc;
int clength = 1;
/* Add in the length of the last item, then, if in the dummy run, pull the
pointer back to the start of the (temporary) buffer and then remember the
start of the next item. */
convlength += p - pp;
if (dummyrun) p = use_buffer;
pp = p;
/* Pick up the next character */
#ifndef SUPPORT_UNICODE
c = *glob;
#else
GETCHARLENTEST(c, glob, clength);
#endif
glob += clength;
plength -= clength;
sc = nextisliteral? 0 : c;
nextisliteral = FALSE;
/* Handle a character within a class. */
if (inclass)
{
/* A literal part separator is a syntax error */
if (c == ccontext->glob_separator)
{
*bufflenptr = glob - pattern - 1;
return PCRE2_ERROR_CONVERT_SYNTAX;
}
/* At the end of the class, add a lookbehind for not the separator if any
range in the class includes the separator. */
if (c == CHAR_RIGHT_SQUARE_BRACKET)
{
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
if (sep_in_range) PUTCHARS(lookbehind_not_sep);
inclass = FALSE;
}
/* Not the end of the class */
else
{
switch (posix_class_state)
{
case POSIX_CLASS_STARTED:
if (c <= 127 && islower(c)) break; /* Remain in started state */
posix_class_state = POSIX_CLASS_NOT_STARTED;
if (c == CHAR_COLON && plength > 0 &&
*glob == CHAR_RIGHT_SQUARE_BRACKET)
{
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
plength--;
glob++;
continue; /* With next character after :] */
}
/* Fall through */
case POSIX_CLASS_NOT_STARTED:
if (c == CHAR_LEFT_SQUARE_BRACKET)
posix_class_state = POSIX_CLASS_STARTING;
break;
case POSIX_CLASS_STARTING:
if (c == CHAR_COLON) posix_class_state = POSIX_CLASS_STARTED;
break;
}
if (range_state == RANGE_STARTING && c == CHAR_MINUS)
range_state = RANGE_STARTED;
else if (range_state == RANGE_STARTED)
{
if (range_start <= ccontext->glob_separator &&
c >= ccontext->glob_separator)
sep_in_range = TRUE;
range_state = RANGE_NOT_STARTED;
}
else
{
range_state = RANGE_STARTING;
range_start = c;
}
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, glob - clength, CU2BYTES(clength));
p += clength;
}
}
/* Handle a character not within a class. */
else switch(sc)
{
case CHAR_ASTERISK:
if (endswith)
{
PUTCHARS(STR_DOT_STAR_LOOKBEHIND);
}
else
{
PUTCHARS(not_sep_class);
PUTCHARS(STR_ASTERISK);
}
break;
case CHAR_QUESTION_MARK:
PUTCHARS(not_sep_class);
break;
case CHAR_LEFT_SQUARE_BRACKET:
posix_class_state = POSIX_CLASS_NOT_STARTED;
range_state = RANGE_NOT_STARTED;
sep_in_range = FALSE;
inclass = TRUE;
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
/* Handle ! and ] as first characters */
if (plength > 0)
{
if (*glob == CHAR_EXCLAMATION_MARK)
{
glob++;
plength--;
PUTCHARS(STR_CIRCUMFLEX_ACCENT);
}
if (plength > 0 && *glob == CHAR_RIGHT_SQUARE_BRACKET)
{
glob++;
plength--;
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
range_start = CHAR_RIGHT_SQUARE_BRACKET;
range_state = RANGE_STARTING;
}
}
break;
case CHAR_BACKSLASH:
if (plength <= 0) return ERROR_END_BACKSLASH;
nextisliteral = TRUE;
break;
default:
if (c < 256 && strchr("\\{}?*+[]()|.^$", c) != NULL)
{
PUTCHARS(STR_BACKSLASH);
}
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, glob - clength, CU2BYTES(clength));
p += clength;
break;
}
}
if (inclass) return ERROR_MISSING_SQUARE_BRACKET;
if (endswith) PUTCHARS(STR_RIGHT_PARENTHESIS);
PUTCHARS(STR_BACKSLASH_z);
convlength += p - pp; /* Final segment */
*bufflenptr = convlength;
*p++ = 0;
return 0;
}
/*************************************************
* Convert pattern *
*************************************************/
/* This is the external-facing function for converting other forms of pattern
into PCRE2 regular expression patterns. On error, the bufflenptr argument is
used to return an offset in the original pattern.
Arguments:
pattern the input pattern
plength length of input, or PCRE2_ZERO_TERMINATED
options options bits
buffptr pointer to pointer to output buffer
bufflenptr pointer to length of output buffer
ccontext convert context or NULL
Returns: 0 for success, else an error code (+ve or -ve)
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
pcre2_convert_context *ccontext)
{
int i, rc;
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
PCRE2_UCHAR *use_buffer = dummy_buffer;
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
uint32_t pattype = options & TYPE_OPTIONS;
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
(pattype & (~pattype+1)) != pattype || /* More than one type set */
pattype == 0) /* No type set */
{
*bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_BADOPTION;
}
if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
if (ccontext == NULL) ccontext =
(pcre2_convert_context *)(&PRIV(default_convert_context));
/* Check UTF if required. */
#ifndef SUPPORT_UNICODE
if (utf) return ERROR_NO_UNICODE;
#else
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
{
PCRE2_SIZE erroroffset;
rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
if (rc != 0)
{
*bufflenptr = erroroffset;
return rc;
}
}
#endif
/* If buffptr is not NULL, and what it points to is not NULL, we are being
provided with a buffer and a length, so set them as the buffer to use. */
if (buffptr != NULL && *buffptr != NULL)
{
use_buffer = *buffptr;
use_length = *bufflenptr;
}
/* Call an individual converter, either just once (if a buffer was provided or
just the length is needed), or twice (if a memory allocation is required). */
for (i = 0; i < 2; i++)
{
PCRE2_UCHAR *allocated;
BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
switch(pattype)
{
case PCRE2_CONVERT_GLOB_BASIC:
rc = convert_glob(pattype, pattern, plength, utf, use_buffer, use_length,
bufflenptr, dummyrun, ccontext);
break;
case PCRE2_CONVERT_POSIX_BASIC:
case PCRE2_CONVERT_POSIX_EXTENDED:
rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
bufflenptr, dummyrun, ccontext);
break;
default:
return PCRE2_ERROR_INTERNAL;
}
if (rc != 0 || /* Error */
buffptr == NULL || /* Just the length is required */
*buffptr != NULL) /* Buffer was provided or allocated */
return rc;
/* Allocate memory for the buffer, with hidden space for an allocator at
the start. The next time round the loop runs the conversion for real. */
allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
(*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
*buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
use_buffer = *buffptr;
use_length = *bufflenptr + 1;
}
/* Control should never get here. */
return PCRE2_ERROR_INTERNAL;
}
/*************************************************
* Free converted pattern *
*************************************************/
/* This frees a converted pattern that was put in newly-allocated memory.
Argument: the converted pattern
Returns: nothing
*/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
{
if (converted != NULL)
{
pcre2_memctl *memctl =
(pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/* End of pcre2_convert.c */

View File

@ -256,7 +256,8 @@ static const unsigned char match_error_texts[] =
"match with end before start is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
"heap limit exceeded\0"
"heap limit exceeded\0"
"invalid syntax\0"
;

View File

@ -1851,6 +1851,7 @@ extern const uint8_t PRIV(utf8_table4)[];
#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_)
#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_)
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
#define _pcre2_default_convert_context PCRE2_SUFFIX(_pcre2_default_convert_context_)
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
#if PCRE2_CODE_UNIT_WIDTH == 32
@ -1874,6 +1875,7 @@ extern const uint8_t PRIV(OP_lengths)[];
extern const uint32_t PRIV(callout_end_delims)[];
extern const uint32_t PRIV(callout_start_delims)[];
extern const pcre2_compile_context PRIV(default_compile_context);
extern const pcre2_convert_context PRIV(default_convert_context);
extern const pcre2_match_context PRIV(default_match_context);
extern const uint8_t PRIV(default_tables)[];
extern const uint32_t PRIV(hspace_list)[];

View File

@ -590,6 +590,13 @@ typedef struct pcre2_real_match_context {
uint32_t depth_limit;
} pcre2_real_match_context;
/* The real convert context structure. */
typedef struct pcre2_real_convert_context {
pcre2_memctl memctl;
uint32_t glob_separator;
} pcre2_real_convert_context;
/* The real compiled code structure. The type for the blocksize field is
defined specially because it is required in pcre2_serialize_decode() when
copying the size from possibly unaligned memory into a variable of the same

View File

@ -186,16 +186,17 @@ void vms_setsymbol( char *, char *, int );
#endif
#endif
#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
#define LOCALESIZE 32 /* Size of locale name */
#define LOOPREPEAT 500000 /* Default loop count for timing */
#define MALLOCLISTSIZE 20 /* For remembering mallocs */
#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */
#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
#define LOCALESIZE 32 /* Size of locale name */
#define LOOPREPEAT 500000 /* Default loop count for timing */
#define MALLOCLISTSIZE 20 /* For remembering mallocs */
#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
/* Make sure the buffer into which replacement strings are copied is big enough
to hold them as 32-bit code units. */
@ -335,6 +336,7 @@ modes, so use the form of the first that is available. */
#define PCRE2_JIT_STACK pcre2_jit_stack_8
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
#elif defined SUPPORT_PCRE2_16
@ -344,6 +346,7 @@ modes, so use the form of the first that is available. */
#define PCRE2_JIT_STACK pcre2_jit_stack_16
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
#elif defined SUPPORT_PCRE2_32
@ -353,6 +356,7 @@ modes, so use the form of the first that is available. */
#define PCRE2_JIT_STACK pcre2_jit_stack_32
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
#endif
@ -377,7 +381,7 @@ static cmdstruct cmdlist[] = {
{ "save", CMD_SAVE },
{ "subject", CMD_SUBJECT }};
#define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct)
#define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
/* ------------- Structures and tables for handling modifiers -------------- */
@ -387,6 +391,22 @@ of PCRE2_NEWLINE_xx in pcre2.h. */
static const char *newlines[] = {
"DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
/* Structure and table for handling pattern conversion types. */
typedef struct convertstruct {
const char *name;
uint32_t option;
} convertstruct;
static convertstruct convertlist[] = {
{ "glob_basic", PCRE2_CONVERT_GLOB_BASIC },
{ "glob_bash", PCRE2_CONVERT_GLOB_BASH },
{ "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
{ "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
{ "unset", CONVERT_UNSET }};
#define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
/* Modifier types and applicability */
enum { MOD_CTC, /* Applies to a compile context */
@ -398,6 +418,8 @@ enum { MOD_CTC, /* Applies to a compile context */
MOD_PDP, /* As MOD_PD, OK for Perl test */
MOD_PND, /* As MOD_PD, but not for a default pattern */
MOD_PNDP, /* As MOD_PND, OK for Perl test */
MOD_CHR, /* Is a single character */
MOD_CON, /* Is a "convert" type */
MOD_CTL, /* Is a control bit */
MOD_BSR, /* Is a BSR value */
MOD_IN2, /* Is one or two unsigned integers */
@ -496,6 +518,9 @@ typedef struct patctl { /* Structure for pattern modifiers. */
uint32_t jit;
uint32_t stackguard_test;
uint32_t tables_id;
uint32_t convert_type;
uint32_t convert_length;
uint32_t convert_glob_separator;
uint32_t regerror_buffsize;
uint8_t locale[LOCALESIZE];
} patctl;
@ -568,6 +593,9 @@ static modstruct modlist[] = {
{ "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
{ "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
{ "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
{ "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
{ "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
{ "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
{ "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
@ -884,6 +912,7 @@ static uint8_t *dbuffer = NULL;
static pcre2_code_8 *compiled_code8;
static pcre2_general_context_8 *general_context8, *general_context_copy8;
static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
static pcre2_convert_context_8 *con_context8, *default_con_context8;
static pcre2_match_context_8 *dat_context8, *default_dat_context8;
static pcre2_match_data_8 *match_data8;
#endif
@ -892,6 +921,7 @@ static pcre2_match_data_8 *match_data8;
static pcre2_code_16 *compiled_code16;
static pcre2_general_context_16 *general_context16, *general_context_copy16;
static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
static pcre2_convert_context_16 *con_context16, *default_con_context16;
static pcre2_match_context_16 *dat_context16, *default_dat_context16;
static pcre2_match_data_16 *match_data16;
static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
@ -902,6 +932,7 @@ static uint16_t *pbuffer16 = NULL;
static pcre2_code_32 *compiled_code32;
static pcre2_general_context_32 *general_context32, *general_context_copy32;
static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
static pcre2_convert_context_32 *con_context32, *default_con_context32;
static pcre2_match_context_32 *dat_context32, *default_dat_context32;
static pcre2_match_data_32 *match_data32;
static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
@ -942,6 +973,21 @@ are supported. */
(test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
(uint32_t)(((PCRE2_SPTR32)(a))[b]))
#define CONCTXCPY(a,b) \
if (test_mode == PCRE8_MODE) \
memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
else if (test_mode == PCRE16_MODE) \
memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
#define CONVERT_COPY(a,b,c) \
if (test_mode == PCRE8_MODE) \
memcpy(G(a,8),(char *)b,c); \
else if (test_mode == PCRE16_MODE) \
memcpy(G(a,16),(char *)b,(c)*2); \
else if (test_mode == PCRE32_MODE) \
memcpy(G(a,32),(char *)b,(c)*4)
#define DATCTXCPY(a,b) \
if (test_mode == PCRE8_MODE) \
memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
@ -1018,6 +1064,11 @@ are supported. */
else \
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
#define PCRE2_CONVERTED_PATTERN_FREE(a) \
if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
if (test_mode == PCRE8_MODE) \
a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
@ -1129,6 +1180,14 @@ are supported. */
else \
pcre2_match_data_free_32(G(a,32))
#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
if (test_mode == PCRE8_MODE) \
a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
else if (test_mode == PCRE16_MODE) \
a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
else \
a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
#define PCRE2_PATTERN_INFO(a,b,c,d) \
if (test_mode == PCRE8_MODE) \
a = pcre2_pattern_info_8(G(b,8),c,d); \
@ -1209,6 +1268,14 @@ are supported. */
else \
pcre2_set_depth_limit_32(G(a,32),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
if (test_mode == PCRE8_MODE) \
r = pcre2_set_glob_separator_8(G(a,8),b); \
else if (test_mode == PCRE16_MODE) \
r = pcre2_set_glob_separator_16(G(a,16),b); \
else \
r = pcre2_set_glob_separator_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_heap_limit_8(G(a,8),b); \
@ -1436,6 +1503,17 @@ the three different cases. */
(uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
(uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
#define CONCTXCPY(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
else \
memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
#define CONVERT_COPY(a,b,c) \
(test_mode == G(G(PCRE,BITONE),_MODE))? \
memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
#define DATCTXCPY(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
@ -1495,6 +1573,12 @@ the three different cases. */
else \
G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
#define PCRE2_CONVERTED_PATTERN_FREE(a) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
else \
G(pcre2_converted_pattern)free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
@ -1591,6 +1675,12 @@ the three different cases. */
else \
G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
else \
a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
#define PCRE2_PATTERN_INFO(a,b,c,d) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
@ -1653,6 +1743,12 @@ the three different cases. */
else \
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
else \
r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
@ -1820,6 +1916,8 @@ the three different cases. */
#define CASTFLD(t,a,b) (t)(G(a,8)->b)
#define CASTVAR(t,x) (t)G(x,8)
#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
#define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
#define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
#define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
#define FLD(a,b) G(a,8)->b
#define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
@ -1835,6 +1933,8 @@ the three different cases. */
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
#define PCRE2_CONVERTED_PATTERN_FREE(a) \
pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
@ -1857,6 +1957,7 @@ the three different cases. */
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_info_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
@ -1872,6 +1973,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
@ -1917,6 +2019,8 @@ the three different cases. */
#define CASTFLD(t,a,b) (t)(G(a,16)->b)
#define CASTVAR(t,x) (t)G(x,16)
#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
#define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
#define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
#define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
#define FLD(a,b) G(a,16)->b
#define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
@ -1932,6 +2036,8 @@ the three different cases. */
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
#define PCRE2_CONVERTED_PATTERN_FREE(a) \
pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
@ -1954,6 +2060,7 @@ the three different cases. */
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_info_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
#define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
@ -1969,6 +2076,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
@ -2014,6 +2122,8 @@ the three different cases. */
#define CASTFLD(t,a,b) (t)(G(a,32)->b)
#define CASTVAR(t,x) (t)G(x,32)
#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
#define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
#define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
#define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
#define FLD(a,b) G(a,32)->b
#define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
@ -2029,6 +2139,8 @@ the three different cases. */
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
#define PCRE2_CONVERTED_PATTERN_FREE(a) \
pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
@ -2051,6 +2163,7 @@ the three different cases. */
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_info_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
#define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
@ -2066,6 +2179,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
@ -3220,7 +3334,7 @@ strncmpic(const uint8_t *s, const uint8_t *t, int n)
while (n--)
{
int c = tolower(*s++) - tolower(*t++);
if (c) return c;
if (c != 0) return c;
}
return 0;
}
@ -3466,15 +3580,15 @@ for (;;)
field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
if (field == NULL) return FALSE;
/* /x is a special case; a second appearance changes PCRE2_EXTENDED to
PCRE2_EXTENDED_MORE. */
PCRE2_EXTENDED_MORE. */
if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
{
{
*((uint32_t *)field) &= ~PCRE2_EXTENDED;
*((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
}
}
else
*((uint32_t *)field) |= modlist[index].value;
}
@ -3550,6 +3664,26 @@ for (;;)
}
pp = ep;
break;
case MOD_CHR: /* A single character */
*((uint32_t *)field) = *pp++;
break;
case MOD_CON: /* A convert type */
for (i = 0; i < convertlistcount; i++)
{
if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
{
if (*((uint32_t *)field) == CONVERT_UNSET)
*((uint32_t *)field) = convertlist[i].option;
else
*((uint32_t *)field) |= convertlist[i].option;
break;
}
}
if (i >= convertlistcount) goto INVALID_VALUE;
pp = ep;
break;
case MOD_IN2: /* One or two unsigned integers */
if (!isdigit(*pp)) goto INVALID_VALUE;
@ -4759,9 +4893,18 @@ if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
return PR_SKIP;
}
}
/* The convert and posix modifiers are mutually exclusive. */
/* Check for mutually exclusive modifiers. At present, these are all in the
first control word. */
if (pat_patctl.convert_type != CONVERT_UNSET &&
(pat_patctl.control & CTL_POSIX) != 0)
{
fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
return PR_SKIP;
}
/* Check for mutually exclusive control modifiers. At present, these are all in
the first control word. */
for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
{
@ -5158,7 +5301,69 @@ switch(errorcode)
}
/* The pattern is now in pbuffer[8|16|32], with the length in code units in
patlen. By default we pass a zero-terminated pattern, but a length is passed if
patlen. If it is to be converted, copy the result back afterwards so that it
it ends up back in the usual place. */
if (pat_patctl.convert_type != CONVERT_UNSET)
{
int rc;
uint32_t convert_options = pat_patctl.convert_type;
void *converted_pattern;
PCRE2_SIZE converted_length;
if (pat_patctl.convert_length != 0)
{
converted_length = pat_patctl.convert_length;
converted_pattern = malloc(converted_length * code_unit_size);
if (converted_pattern == NULL)
{
fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
return PR_SKIP;
}
}
else converted_pattern = NULL; /* Let the library allocate */
if (utf) convert_options |= PCRE2_CONVERT_UTF;
if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
CONCTXCPY(con_context, default_con_context);
if (pat_patctl.convert_glob_separator != 0)
{
PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
if (rc != 0)
{
fprintf(outfile, "** Invalid glob separator '%c'\n",
pat_patctl.convert_glob_separator);
return PR_SKIP;
}
}
PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
&converted_pattern, &converted_length, con_context);
if (rc != 0)
{
fprintf(outfile, "** Pattern conversion error at offset %lu: ",
converted_length);
if (!print_error_message(rc, "", "\n")) return PR_ABEND;
return PR_SKIP;
}
/* Output the converted pattern, copy it, then free it. */
PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
fprintf(outfile, "\n");
patlen = converted_length;
CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
if (pat_patctl.convert_length != 0)
free(converted_pattern);
else
PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
}
/* By default we pass a zero-terminated pattern, but a length is passed if
"use_length" was specified or this is a hex pattern (which might contain binary
zeros). When valgrind is supported, arrange for the unused part of the buffer
to be marked as no access. */
@ -7584,7 +7789,10 @@ _setmode( _fileno( stdout ), _O_BINARY );
/* Initialization that does not depend on the running mode. */
locale_name[0] = 0;
memset(&def_patctl, 0, sizeof(patctl));
def_patctl.convert_type = CONVERT_UNSET;
memset(&def_datctl, 0, sizeof(datctl));
def_datctl.oveccount = DEFAULT_OVECCOUNT;
def_datctl.copy_numbers[0] = -1;
@ -7896,6 +8104,8 @@ max_oveccount = DEFAULT_OVECCOUNT;
G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
#define CONTEXTTESTS \

264
testdata/testinput24 vendored Normal file
View File

@ -0,0 +1,264 @@
# This file tests the auxiliary pattern conversion features of the PCRE2
# library, in non-UTF mode.
#forbid_utf
#newline_default lf any anycrlf
# -------- Tests of glob conversion --------
# Set the glob separator explicitly so that different OS defaults are not a
# problem. Then test various errors.
#pattern convert=glob_basic,convert_glob_separator=/
/abc/posix
# More than one glob type is an error.
/abc/convert=glob_bash
# Separator must be / \ or .
/a*b/convert_glob_separator=%
# Can't have separator in a class
"[ab/cd]"
"[,-/]"
/[ab/
# Length check
/abc/convert_length=7
/abc/convert_length=8
# Now some actual tests
/a?b[]xy]*c/
azb]1234c
# Tests from the gitwildmatch list, with some additions
/foo/
foo
/= Expect no match
bar
//
\
/???/
foo
\= Expect no match
foobar
/*/
foo
\
/f*/
foo
f
/*f/
oof
\= Expect no match
foo
/*foo*/
foo
food
aprilfool
/*ob*a*r*/
foobar
/*ab/
aaaaaaabababab
/foo\*/
foo*
/foo\*bar/
\= Expect no match
foobar
/f\\oo/
f\\oo
/*[al]?/
ball
/[ten]/
\= Expect no match
ten
/t[a-g]n/
ten
/a[]]b/
a]b
/a[]-]b/
a-b
a]b
\= Expect no match
aab
/a[]a-]b/
aab
/]/
]
/t[!a-g]n/
ton
\= Expect no match
ten
'[[:alpha:]][[:digit:]][[:upper:]]'
a1B
'[[:digit:][:upper:][:space:]]'
A
1
\ \=
\= Expect no match
a
.
'[a-c[:digit:]x-z]'
5
b
y
\= Expect no match
q
# End of gitwildmatch tests
/*.j?g/
pic01.jpg
.jpg
pic02.jxg
\= Expect no match
pic03.j/g
/A[+-0]B/
A+B
A.B
A0B
\= Expect no match
A/B
/*x?z/
abc.xyz
\= Expect no match
.xyz
/?x?z/
axyz
\= Expect no match
.xyz
"[,-0]x?z"
,xyz
\= Expect no match
/xyz
.xyz
".x*"
.xabc
/a[--0]z/
a-z
a.z
a0z
\= Expect no match
a/z
a1z
/<[a-c-d]>/
<a>
<b>
<c>
<d>
<->
/a[[:digit:].]z/
a1z
a.z
\= Expect no match
a:z
/a[[:digit].]z/
a[.]z
a:.]z
ad.]z
/<[[:a[:digit:]b]>/
<[>
<:>
<a>
<9>
<b>
\= Expect no match
<d>
/a*b/convert_glob_separator=\
/a*b/convert_glob_separator=.
/a*b/convert_glob_separator=/
#pattern convert=unset
#pattern convert=posix_extended
/a[[:>:]z/
/<[[:a[:digit:]b]>/
<[>
<:>
<a>
<9>
<b>
\= Expect no match
<d>
/a+\1b\\c|d[ab\c]/
/a[[:<:]]b[[:>:]]/
/<[]bc]>/
<]>
<b>
<c>
/<[^]bc]>/
<.>
\= Expect no match
<]>
<b>
/(a)\1b/
a1b
\= Expect no match
aab
#pattern convert=unset
#pattern convert=posix_basic
/a*b+c\+[def](ab)\(cd\)/
/\(a\)\1b/
aab
\= Expect no match
a1b
#pattern convert=unset
/abc/
# End of testinput24

18
testdata/testinput25 vendored Normal file
View File

@ -0,0 +1,18 @@
# This file tests the auxiliary pattern conversion features of the PCRE2
# library, in UTF mode.
#newline_default lf any anycrlf
# -------- Tests of glob conversion --------
# Set the glob separator explicitly so that different OS defaults are not a
# problem. Then test various errors.
#pattern convert=glob_basic,convert_glob_separator=/
# The fact that this one works in 9 bytes in the 8-bit library shows that the
# output is in UTF-8, though pcre2test shows the character as an escape.
/'>' c4 a3 '<'/hex,utf,convert_length=9
# End of testinput25

View File

@ -15964,7 +15964,7 @@ Subject length lower bound = 1
------------------------------------------------------------------
# End of testinput2
Error -64: PCRE2_ERROR_BADDATA (unknown error number)
Error -65: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
Error -2: partial match
Error -1: no match

418
testdata/testoutput24 vendored Normal file
View File

@ -0,0 +1,418 @@
# This file tests the auxiliary pattern conversion features of the PCRE2
# library, in non-UTF mode.
#forbid_utf
#newline_default lf any anycrlf
# -------- Tests of glob conversion --------
# Set the glob separator explicitly so that different OS defaults are not a
# problem. Then test various errors.
#pattern convert=glob_basic,convert_glob_separator=/
/abc/posix
** The convert and posix modifiers are mutually exclusive
# More than one glob type is an error.
/abc/convert=glob_bash
** Pattern conversion error at offset 0: bad option value
# Separator must be / \ or .
/a*b/convert_glob_separator=%
** Invalid glob separator '%'
# Can't have separator in a class
"[ab/cd]"
** Pattern conversion error at offset 3: invalid syntax
"[,-/]"
** Pattern conversion error at offset 3: invalid syntax
/[ab/
** Pattern conversion error at offset 3: missing terminating ] for character class
# Length check
/abc/convert_length=7
** Pattern conversion error at offset 3: no more memory
/abc/convert_length=8
\Aabc\z
# Now some actual tests
/a?b[]xy]*c/
\Aa[^/]b[]xy][^/]*c\z
azb]1234c
0: azb]1234c
# Tests from the gitwildmatch list, with some additions
/foo/
\Afoo\z
foo
0: foo
/= Expect no match
No match
bar
No match
//
\A\z
\
0:
/???/
\A(?!\.)[^/][^/][^/]\z
foo
0: foo
\= Expect no match
foobar
No match
/*/
\A[^/]*\z
foo
0: foo
\
0:
/f*/
\Af[^/]*\z
foo
0: foo
f
0: f
/*f/
(?s)\A(?!\.).*(?<=f)\z
oof
0: oof
\= Expect no match
foo
No match
/*foo*/
\A(?!\.)[^/]*foo[^/]*\z
foo
0: foo
food
0: food
aprilfool
0: aprilfool
/*ob*a*r*/
\A(?!\.)[^/]*ob[^/]*a[^/]*r[^/]*\z
foobar
0: foobar
/*ab/
(?s)\A(?!\.).*(?<=ab)\z
aaaaaaabababab
0: aaaaaaabababab
/foo\*/
\Afoo\*\z
foo*
0: foo*
/foo\*bar/
\Afoo\*bar\z
\= Expect no match
foobar
No match
/f\\oo/
\Af\\oo\z
f\\oo
0: f\oo
/*[al]?/
(?s)\A(?!\.).*(?<=[al][^/])\z
ball
0: ball
/[ten]/
\A(?!\.)[ten]\z
\= Expect no match
ten
No match
/t[a-g]n/
\At[a-g]n\z
ten
0: ten
/a[]]b/
\Aa[]]b\z
a]b
0: a]b
/a[]-]b/
\Aa[]-]b\z
a-b
0: a-b
a]b
0: a]b
\= Expect no match
aab
No match
/a[]a-]b/
\Aa[]a-]b\z
aab
0: aab
/]/
\A\]\z
]
0: ]
/t[!a-g]n/
\At[^a-g]n\z
ton
0: ton
\= Expect no match
ten
No match
'[[:alpha:]][[:digit:]][[:upper:]]'
\A(?!\.)[[:alpha:]][[:digit:]][[:upper:]]\z
a1B
0: a1B
'[[:digit:][:upper:][:space:]]'
\A(?!\.)[[:digit:][:upper:][:space:]]\z
A
0: A
1
0: 1
\ \=
0:
\= Expect no match
a
No match
.
No match
'[a-c[:digit:]x-z]'
\A(?!\.)[a-c[:digit:]x-z]\z
5
0: 5
b
0: b
y
0: y
\= Expect no match
q
No match
# End of gitwildmatch tests
/*.j?g/
(?s)\A(?!\.).*(?<=\.j[^/]g)\z
pic01.jpg
0: pic01.jpg
.jpg
No match
pic02.jxg
0: pic02.jxg
\= Expect no match
pic03.j/g
No match
/A[+-0]B/
\AA[+-0](?<!/)B\z
A+B
0: A+B
A.B
0: A.B
A0B
0: A0B
\= Expect no match
A/B
No match
/*x?z/
(?s)\A(?!\.).*(?<=x[^/]z)\z
abc.xyz
0: abc.xyz
\= Expect no match
.xyz
No match
/?x?z/
\A(?!\.)[^/]x[^/]z\z
axyz
0: axyz
\= Expect no match
.xyz
No match
"[,-0]x?z"
\A(?!\.)[,-0](?<!/)x[^/]z\z
,xyz
0: ,xyz
\= Expect no match
/xyz
No match
.xyz
No match
".x*"
\A\.x[^/]*\z
.xabc
0: .xabc
/a[--0]z/
\Aa[--0](?<!/)z\z
a-z
0: a-z
a.z
0: a.z
a0z
0: a0z
\= Expect no match
a/z
No match
a1z
No match
/<[a-c-d]>/
\A<[a-c-d]>\z
<a>
0: <a>
<b>
0: <b>
<c>
0: <c>
<d>
0: <d>
<->
0: <->
/a[[:digit:].]z/
\Aa[[:digit:].]z\z
a1z
0: a1z
a.z
0: a.z
\= Expect no match
a:z
No match
/a[[:digit].]z/
\Aa[[:digit]\.\]z\z
a[.]z
0: a[.]z
a:.]z
0: a:.]z
ad.]z
0: ad.]z
/<[[:a[:digit:]b]>/
\A<[[:a[:digit:]b]>\z
<[>
0: <[>
<:>
0: <:>
<a>
0: <a>
<9>
0: <9>
<b>
0: <b>
\= Expect no match
<d>
No match
/a*b/convert_glob_separator=\
\Aa[^\\]*b\z
/a*b/convert_glob_separator=.
\Aa[^.]*b\z
/a*b/convert_glob_separator=/
\Aa[^/]*b\z
#pattern convert=unset
#pattern convert=posix_extended
/a[[:>:]z/
a[[:>:]z
Failed: error 130 at offset 4: unknown POSIX class name
/<[[:a[:digit:]b]>/
<[[:a[:digit:]b]>
<[>
0: <[>
<:>
0: <:>
<a>
0: <a>
<9>
0: <9>
<b>
0: <b>
\= Expect no match
<d>
No match
/a+\1b\\c|d[ab\c]/
a+1b\\c|d[ab\\c]
/a[[:<:]]b[[:>:]]/
a[[:<:]]b[[:>:]]
/<[]bc]>/
<[]bc]>
<]>
0: <]>
<b>
0: <b>
<c>
0: <c>
/<[^]bc]>/
<[^]bc]>
<.>
0: <.>
\= Expect no match
<]>
No match
<b>
No match
/(a)\1b/
(a)1b
a1b
0: a1b
1: a
\= Expect no match
aab
No match
#pattern convert=unset
#pattern convert=posix_basic
/a*b+c\+[def](ab)\(cd\)/
a*b\+c+[def]\(ab\)(cd)
/\(a\)\1b/
(a)\1b
aab
0: aab
1: a
\= Expect no match
a1b
No match
#pattern convert=unset
/abc/
# End of testinput24

19
testdata/testoutput25 vendored Normal file
View File

@ -0,0 +1,19 @@
# This file tests the auxiliary pattern conversion features of the PCRE2
# library, in UTF mode.
#newline_default lf any anycrlf
# -------- Tests of glob conversion --------
# Set the glob separator explicitly so that different OS defaults are not a
# problem. Then test various errors.
#pattern convert=glob_basic,convert_glob_separator=/
# The fact that this one works in 9 bytes in the 8-bit library shows that the
# output is in UTF-8, though pcre2test shows the character as an escape.
/'>' c4 a3 '<'/hex,utf,convert_length=9
\A>\x{123}<\z
# End of testinput25