Replace multiple copies of extended grapheme sequence code with a single
subroutine.
This commit is contained in:
parent
badfbcdd12
commit
42f547bf4d
|
@ -432,6 +432,7 @@ SET(PCRE2_SOURCES
|
||||||
src/pcre2_convert.c
|
src/pcre2_convert.c
|
||||||
src/pcre2_dfa_match.c
|
src/pcre2_dfa_match.c
|
||||||
src/pcre2_error.c
|
src/pcre2_error.c
|
||||||
|
src/pcre2_extuni.c
|
||||||
src/pcre2_find_bracket.c
|
src/pcre2_find_bracket.c
|
||||||
src/pcre2_jit_compile.c
|
src/pcre2_jit_compile.c
|
||||||
src/pcre2_maketables.c
|
src/pcre2_maketables.c
|
||||||
|
|
|
@ -7,6 +7,10 @@ Version 10.31 xx-xxx-201x
|
||||||
|
|
||||||
1. Fix typo (missing ]) in VMS code in pcre2test.c.
|
1. Fix typo (missing ]) in VMS code in pcre2test.c.
|
||||||
|
|
||||||
|
2. Replace the replicated code for matching extended Unicode grapheme sequences
|
||||||
|
(which got a lot more complicated by change 10.30/49) by a single subroutine
|
||||||
|
that is called by both pcre2_match() and pcre2_dfa_match().
|
||||||
|
|
||||||
|
|
||||||
Version 10.30 14-August-2017
|
Version 10.30 14-August-2017
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
|
@ -351,6 +351,7 @@ COMMON_SOURCES = \
|
||||||
src/pcre2_convert.c \
|
src/pcre2_convert.c \
|
||||||
src/pcre2_dfa_match.c \
|
src/pcre2_dfa_match.c \
|
||||||
src/pcre2_error.c \
|
src/pcre2_error.c \
|
||||||
|
src/pcre2_extuni.c \
|
||||||
src/pcre2_find_bracket.c \
|
src/pcre2_find_bracket.c \
|
||||||
src/pcre2_internal.h \
|
src/pcre2_internal.h \
|
||||||
src/pcre2_intmodedep.h \
|
src/pcre2_intmodedep.h \
|
||||||
|
|
|
@ -91,8 +91,10 @@ can skip ahead to the CMake section.
|
||||||
pcre2_compile.c
|
pcre2_compile.c
|
||||||
pcre2_config.c
|
pcre2_config.c
|
||||||
pcre2_context.c
|
pcre2_context.c
|
||||||
|
pcre2_convert.c
|
||||||
pcre2_dfa_match.c
|
pcre2_dfa_match.c
|
||||||
pcre2_error.c
|
pcre2_error.c
|
||||||
|
pcre2_extuni.c
|
||||||
pcre2_find_bracket.c
|
pcre2_find_bracket.c
|
||||||
pcre2_jit_compile.c
|
pcre2_jit_compile.c
|
||||||
pcre2_maketables.c
|
pcre2_maketables.c
|
||||||
|
@ -377,4 +379,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
|
||||||
recommended download site.
|
recommended download site.
|
||||||
|
|
||||||
=============================
|
=============================
|
||||||
Last Updated: 17 March 2017
|
Last Updated: 12 September 2017
|
||||||
|
|
|
@ -196,8 +196,10 @@ files="\
|
||||||
src/pcre2_compile.c \
|
src/pcre2_compile.c \
|
||||||
src/pcre2_config.c \
|
src/pcre2_config.c \
|
||||||
src/pcre2_context.c \
|
src/pcre2_context.c \
|
||||||
|
src/pcre2_convert.c \
|
||||||
src/pcre2_dfa_match.c \
|
src/pcre2_dfa_match.c \
|
||||||
src/pcre2_error.c \
|
src/pcre2_error.c \
|
||||||
|
src/pcre2_extuni.c \
|
||||||
src/pcre2_find_bracket.c \
|
src/pcre2_find_bracket.c \
|
||||||
src/pcre2_internal.h \
|
src/pcre2_internal.h \
|
||||||
src/pcre2_intmodedep.h \
|
src/pcre2_intmodedep.h \
|
||||||
|
|
3
README
3
README
|
@ -773,6 +773,7 @@ The distribution should contain the files listed below.
|
||||||
src/pcre2_convert.c )
|
src/pcre2_convert.c )
|
||||||
src/pcre2_dfa_match.c )
|
src/pcre2_dfa_match.c )
|
||||||
src/pcre2_error.c )
|
src/pcre2_error.c )
|
||||||
|
src/pcre2_extuni.c )
|
||||||
src/pcre2_find_bracket.c )
|
src/pcre2_find_bracket.c )
|
||||||
src/pcre2_jit_compile.c )
|
src/pcre2_jit_compile.c )
|
||||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||||
|
@ -882,4 +883,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 18 July 2017
|
Last updated: 12 September 2017
|
||||||
|
|
|
@ -1364,63 +1364,14 @@ for (;;)
|
||||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
{
|
{
|
||||||
uint32_t lgb, rgb;
|
|
||||||
PCRE2_SPTR nptr = ptr + clen;
|
|
||||||
int ncount = 0;
|
int ncount = 0;
|
||||||
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
|
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
|
||||||
{
|
{
|
||||||
active_count--; /* Remove non-match possibility */
|
active_count--; /* Remove non-match possibility */
|
||||||
next_active_state--;
|
next_active_state--;
|
||||||
}
|
}
|
||||||
lgb = UCD_GRAPHBREAK(c);
|
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
while (nptr < end_subject)
|
&ncount);
|
||||||
{
|
|
||||||
dlen = 1;
|
|
||||||
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
|
||||||
rgb = UCD_GRAPHBREAK(d);
|
|
||||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
|
||||||
|
|
||||||
/* Not breaking between Regional Indicators is allowed only if
|
|
||||||
there are an even number of preceding RIs. */
|
|
||||||
|
|
||||||
if (lgb == ucp_gbRegionalIndicator &&
|
|
||||||
rgb == ucp_gbRegionalIndicator)
|
|
||||||
{
|
|
||||||
int ricount = 0;
|
|
||||||
PCRE2_SPTR bptr = nptr - 1;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf) BACKCHAR(bptr);
|
|
||||||
#endif
|
|
||||||
/* bptr is pointing to the left-hand character */
|
|
||||||
|
|
||||||
while (bptr > mb->start_subject)
|
|
||||||
{
|
|
||||||
bptr--;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
BACKCHAR(bptr);
|
|
||||||
GETCHAR(d, bptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
d = *bptr;
|
|
||||||
if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
|
|
||||||
ricount++;
|
|
||||||
}
|
|
||||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
|
||||||
any number of Extend before a following E_Modifier. */
|
|
||||||
|
|
||||||
if (rgb != ucp_gbExtend ||
|
|
||||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
|
||||||
lgb = rgb;
|
|
||||||
|
|
||||||
ncount++;
|
|
||||||
nptr += dlen;
|
|
||||||
}
|
|
||||||
count++;
|
count++;
|
||||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||||
}
|
}
|
||||||
|
@ -1663,8 +1614,6 @@ for (;;)
|
||||||
ADD_ACTIVE(state_offset + 2, 0);
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
{
|
{
|
||||||
uint32_t lgb, rgb;
|
|
||||||
PCRE2_SPTR nptr = ptr + clen;
|
|
||||||
int ncount = 0;
|
int ncount = 0;
|
||||||
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
|
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
|
||||||
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
|
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
|
||||||
|
@ -1672,55 +1621,8 @@ for (;;)
|
||||||
active_count--; /* Remove non-match possibility */
|
active_count--; /* Remove non-match possibility */
|
||||||
next_active_state--;
|
next_active_state--;
|
||||||
}
|
}
|
||||||
lgb = UCD_GRAPHBREAK(c);
|
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
while (nptr < end_subject)
|
&ncount);
|
||||||
{
|
|
||||||
dlen = 1;
|
|
||||||
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
|
||||||
rgb = UCD_GRAPHBREAK(d);
|
|
||||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
|
||||||
|
|
||||||
/* Not breaking between Regional Indicators is allowed only if
|
|
||||||
there are an even number of preceding RIs. */
|
|
||||||
|
|
||||||
if (lgb == ucp_gbRegionalIndicator &&
|
|
||||||
rgb == ucp_gbRegionalIndicator)
|
|
||||||
{
|
|
||||||
int ricount = 0;
|
|
||||||
PCRE2_SPTR bptr = nptr - 1;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf) BACKCHAR(bptr);
|
|
||||||
#endif
|
|
||||||
/* bptr is pointing to the left-hand character */
|
|
||||||
|
|
||||||
while (bptr > mb->start_subject)
|
|
||||||
{
|
|
||||||
bptr--;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
BACKCHAR(bptr);
|
|
||||||
GETCHAR(d, bptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
d = *bptr;
|
|
||||||
if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
|
|
||||||
ricount++;
|
|
||||||
}
|
|
||||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
|
||||||
any number of Extend before a following E_Modifier. */
|
|
||||||
|
|
||||||
if (rgb != ucp_gbExtend ||
|
|
||||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
|
||||||
lgb = rgb;
|
|
||||||
|
|
||||||
ncount++;
|
|
||||||
nptr += dlen;
|
|
||||||
}
|
|
||||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -1973,63 +1875,15 @@ for (;;)
|
||||||
count = current_state->count; /* Number already matched */
|
count = current_state->count; /* Number already matched */
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
{
|
{
|
||||||
uint32_t lgb, rgb;
|
PCRE2_SPTR nptr;
|
||||||
PCRE2_SPTR nptr = ptr + clen;
|
|
||||||
int ncount = 0;
|
int ncount = 0;
|
||||||
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
|
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
|
||||||
{
|
{
|
||||||
active_count--; /* Remove non-match possibility */
|
active_count--; /* Remove non-match possibility */
|
||||||
next_active_state--;
|
next_active_state--;
|
||||||
}
|
}
|
||||||
lgb = UCD_GRAPHBREAK(c);
|
nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
while (nptr < end_subject)
|
&ncount);
|
||||||
{
|
|
||||||
dlen = 1;
|
|
||||||
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
|
||||||
rgb = UCD_GRAPHBREAK(d);
|
|
||||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
|
||||||
|
|
||||||
/* Not breaking between Regional Indicators is allowed only if
|
|
||||||
there are an even number of preceding RIs. */
|
|
||||||
|
|
||||||
if (lgb == ucp_gbRegionalIndicator &&
|
|
||||||
rgb == ucp_gbRegionalIndicator)
|
|
||||||
{
|
|
||||||
int ricount = 0;
|
|
||||||
PCRE2_SPTR bptr = nptr - 1;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf) BACKCHAR(bptr);
|
|
||||||
#endif
|
|
||||||
/* bptr is pointing to the left-hand character */
|
|
||||||
|
|
||||||
while (bptr > mb->start_subject)
|
|
||||||
{
|
|
||||||
bptr--;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
BACKCHAR(bptr);
|
|
||||||
GETCHAR(d, bptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
d = *bptr;
|
|
||||||
if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
|
|
||||||
ricount++;
|
|
||||||
}
|
|
||||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
|
||||||
any number of Extend before a following E_Modifier. */
|
|
||||||
|
|
||||||
if (rgb != ucp_gbExtend ||
|
|
||||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
|
||||||
lgb = rgb;
|
|
||||||
|
|
||||||
ncount++;
|
|
||||||
nptr += dlen;
|
|
||||||
}
|
|
||||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
reset_could_continue = TRUE;
|
reset_could_continue = TRUE;
|
||||||
if (++count >= (int)GET2(code, 1))
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
@ -2206,58 +2060,9 @@ for (;;)
|
||||||
case OP_EXTUNI:
|
case OP_EXTUNI:
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
{
|
{
|
||||||
uint32_t lgb, rgb;
|
|
||||||
PCRE2_SPTR nptr = ptr + clen;
|
|
||||||
int ncount = 0;
|
int ncount = 0;
|
||||||
lgb = UCD_GRAPHBREAK(c);
|
PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
|
||||||
while (nptr < end_subject)
|
end_subject, utf, &ncount);
|
||||||
{
|
|
||||||
dlen = 1;
|
|
||||||
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
|
||||||
rgb = UCD_GRAPHBREAK(d);
|
|
||||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
|
||||||
|
|
||||||
/* Not breaking between Regional Indicators is allowed only if
|
|
||||||
there are an even number of preceding RIs. */
|
|
||||||
|
|
||||||
if (lgb == ucp_gbRegionalIndicator &&
|
|
||||||
rgb == ucp_gbRegionalIndicator)
|
|
||||||
{
|
|
||||||
int ricount = 0;
|
|
||||||
PCRE2_SPTR bptr = nptr - 1;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf) BACKCHAR(bptr);
|
|
||||||
#endif
|
|
||||||
/* bptr is pointing to the left-hand character */
|
|
||||||
|
|
||||||
while (bptr > mb->start_subject)
|
|
||||||
{
|
|
||||||
bptr--;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
BACKCHAR(bptr);
|
|
||||||
GETCHAR(d, bptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
d = *bptr;
|
|
||||||
if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
|
|
||||||
ricount++;
|
|
||||||
}
|
|
||||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
|
||||||
any number of Extend before a following E_Modifier. */
|
|
||||||
|
|
||||||
if (rgb != ucp_gbExtend ||
|
|
||||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
|
||||||
lgb = rgb;
|
|
||||||
|
|
||||||
ncount++;
|
|
||||||
nptr += dlen;
|
|
||||||
}
|
|
||||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
reset_could_continue = TRUE;
|
reset_could_continue = TRUE;
|
||||||
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
|
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
|
||||||
|
|
|
@ -0,0 +1,129 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This module contains an internal function that is used to match a Unicode
|
||||||
|
extended grapheme sequence. It is used by both pcre2_match() and
|
||||||
|
pcre2_def_match(). */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Match an extended grapheme sequence *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
Arguments:
|
||||||
|
c the first character
|
||||||
|
eptr pointer to next character
|
||||||
|
start_subject pointer to start of subject
|
||||||
|
end_subject pointer to end of subject
|
||||||
|
utf TRUE if in UTF mode
|
||||||
|
xcount pointer to count of additional characters,
|
||||||
|
or NULL if count not needed
|
||||||
|
|
||||||
|
Returns: pointer after the end of the sequence
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_SPTR
|
||||||
|
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||||
|
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||||
|
{
|
||||||
|
int lgb = UCD_GRAPHBREAK(c);
|
||||||
|
|
||||||
|
while (eptr < end_subject)
|
||||||
|
{
|
||||||
|
int rgb;
|
||||||
|
int len = 1;
|
||||||
|
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
|
||||||
|
rgb = UCD_GRAPHBREAK(c);
|
||||||
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
||||||
|
|
||||||
|
/* Not breaking between Regional Indicators is allowed only if there
|
||||||
|
are an even number of preceding RIs. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
|
||||||
|
{
|
||||||
|
int ricount = 0;
|
||||||
|
PCRE2_SPTR bptr = eptr - 1;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf) BACKCHAR(bptr);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* bptr is pointing to the left-hand character */
|
||||||
|
|
||||||
|
while (bptr > start_subject)
|
||||||
|
{
|
||||||
|
bptr--;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
BACKCHAR(bptr);
|
||||||
|
GETCHAR(c, bptr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
c = *bptr;
|
||||||
|
if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
|
||||||
|
ricount++;
|
||||||
|
}
|
||||||
|
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||||
|
any number of Extend before a following E_Modifier. */
|
||||||
|
|
||||||
|
if (rgb != ucp_gbExtend ||
|
||||||
|
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||||
|
lgb = rgb;
|
||||||
|
|
||||||
|
eptr += len;
|
||||||
|
if (xcount != NULL) *xcount += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return eptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_extuni.c */
|
|
@ -1926,6 +1926,7 @@ is available. */
|
||||||
|
|
||||||
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
||||||
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
|
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
|
||||||
|
#define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_)
|
||||||
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
||||||
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
||||||
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
||||||
|
@ -1949,6 +1950,8 @@ extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
||||||
const compile_block *);
|
const compile_block *);
|
||||||
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
||||||
int *, uint32_t, BOOL, compile_block *);
|
int *, uint32_t, BOOL, compile_block *);
|
||||||
|
extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
|
||||||
|
BOOL, int *);
|
||||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||||
uint32_t *, BOOL);
|
uint32_t *, BOOL);
|
||||||
|
|
|
@ -2440,55 +2440,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int lgb, rgb;
|
|
||||||
GETCHARINCTEST(fc, Feptr);
|
GETCHARINCTEST(fc, Feptr);
|
||||||
lgb = UCD_GRAPHBREAK(fc);
|
Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
|
||||||
while (Feptr < mb->end_subject)
|
NULL);
|
||||||
{
|
|
||||||
int len = 1;
|
|
||||||
if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
|
|
||||||
rgb = UCD_GRAPHBREAK(fc);
|
|
||||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
|
||||||
|
|
||||||
/* Not breaking between Regional Indicators is allowed only if there
|
|
||||||
are an even number of preceding RIs. */
|
|
||||||
|
|
||||||
if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
|
|
||||||
{
|
|
||||||
int ricount = 0;
|
|
||||||
PCRE2_SPTR bptr = Feptr - 1;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf) BACKCHAR(bptr);
|
|
||||||
#endif
|
|
||||||
/* bptr is pointing to the left-hand character */
|
|
||||||
|
|
||||||
while (bptr > mb->start_subject)
|
|
||||||
{
|
|
||||||
bptr--;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
BACKCHAR(bptr);
|
|
||||||
GETCHAR(fc, bptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
fc = *bptr;
|
|
||||||
if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
|
|
||||||
ricount++;
|
|
||||||
}
|
|
||||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
|
||||||
any number of Extend before a following E_Modifier. */
|
|
||||||
|
|
||||||
if (rgb != ucp_gbExtend ||
|
|
||||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
|
||||||
lgb = rgb;
|
|
||||||
|
|
||||||
Feptr += len;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
CHECK_PARTIAL();
|
CHECK_PARTIAL();
|
||||||
Fecode++;
|
Fecode++;
|
||||||
|
@ -2785,61 +2739,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int lgb, rgb;
|
|
||||||
GETCHARINCTEST(fc, Feptr);
|
GETCHARINCTEST(fc, Feptr);
|
||||||
lgb = UCD_GRAPHBREAK(fc);
|
Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
|
||||||
while (Feptr < mb->end_subject)
|
mb->end_subject, utf, NULL);
|
||||||
{
|
|
||||||
int len = 1;
|
|
||||||
if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
|
|
||||||
rgb = UCD_GRAPHBREAK(fc);
|
|
||||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
|
||||||
|
|
||||||
/* Not breaking between Regional Indicators is allowed only if
|
|
||||||
there are an even number of preceding RIs. */
|
|
||||||
|
|
||||||
if (lgb == ucp_gbRegionalIndicator &&
|
|
||||||
rgb == ucp_gbRegionalIndicator)
|
|
||||||
{
|
|
||||||
int ricount = 0;
|
|
||||||
PCRE2_SPTR bptr = Feptr - 1;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf) BACKCHAR(bptr);
|
|
||||||
#endif
|
|
||||||
/* bptr is pointing to the left-hand character */
|
|
||||||
|
|
||||||
while (bptr > mb->start_subject)
|
|
||||||
{
|
|
||||||
bptr--;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
BACKCHAR(bptr);
|
|
||||||
GETCHAR(fc, bptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
fc = *bptr;
|
|
||||||
if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
|
|
||||||
ricount++;
|
|
||||||
}
|
|
||||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
|
||||||
any number of Extend before a following E_Modifier. */
|
|
||||||
|
|
||||||
if (rgb != ucp_gbExtend ||
|
|
||||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
|
||||||
lgb = rgb;
|
|
||||||
|
|
||||||
Feptr += len;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
CHECK_PARTIAL();
|
CHECK_PARTIAL();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
@ -3593,56 +3499,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int lgb, rgb;
|
|
||||||
GETCHARINCTEST(fc, Feptr);
|
GETCHARINCTEST(fc, Feptr);
|
||||||
lgb = UCD_GRAPHBREAK(fc);
|
Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
|
||||||
while (Feptr < mb->end_subject)
|
utf, NULL);
|
||||||
{
|
|
||||||
int len = 1;
|
|
||||||
if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
|
|
||||||
rgb = UCD_GRAPHBREAK(fc);
|
|
||||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
|
||||||
|
|
||||||
/* Not breaking between Regional Indicators is allowed only if
|
|
||||||
there are an even number of preceding RIs. */
|
|
||||||
|
|
||||||
if (lgb == ucp_gbRegionalIndicator &&
|
|
||||||
rgb == ucp_gbRegionalIndicator)
|
|
||||||
{
|
|
||||||
int ricount = 0;
|
|
||||||
PCRE2_SPTR bptr = Feptr - 1;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf) BACKCHAR(bptr);
|
|
||||||
#endif
|
|
||||||
/* bptr is pointing to the left-hand character */
|
|
||||||
|
|
||||||
while (bptr > mb->start_subject)
|
|
||||||
{
|
|
||||||
bptr--;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
BACKCHAR(bptr);
|
|
||||||
GETCHAR(fc, bptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
fc = *bptr;
|
|
||||||
if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
|
|
||||||
ricount++;
|
|
||||||
}
|
|
||||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
|
||||||
any number of Extend before a following E_Modifier. */
|
|
||||||
|
|
||||||
if (rgb != ucp_gbExtend ||
|
|
||||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
|
||||||
lgb = rgb;
|
|
||||||
|
|
||||||
Feptr += len;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
CHECK_PARTIAL();
|
CHECK_PARTIAL();
|
||||||
}
|
}
|
||||||
|
@ -4167,56 +4026,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int lgb, rgb;
|
|
||||||
GETCHARINCTEST(fc, Feptr);
|
GETCHARINCTEST(fc, Feptr);
|
||||||
lgb = UCD_GRAPHBREAK(fc);
|
Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
|
||||||
while (Feptr < mb->end_subject)
|
utf, NULL);
|
||||||
{
|
|
||||||
int len = 1;
|
|
||||||
if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
|
|
||||||
rgb = UCD_GRAPHBREAK(fc);
|
|
||||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
|
||||||
|
|
||||||
/* Not breaking between Regional Indicators is allowed only if
|
|
||||||
there are an even number of preceding RIs. */
|
|
||||||
|
|
||||||
if (lgb == ucp_gbRegionalIndicator &&
|
|
||||||
rgb == ucp_gbRegionalIndicator)
|
|
||||||
{
|
|
||||||
int ricount = 0;
|
|
||||||
PCRE2_SPTR bptr = Feptr - 1;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf) BACKCHAR(bptr);
|
|
||||||
#endif
|
|
||||||
/* bptr is pointing to the left-hand character */
|
|
||||||
|
|
||||||
while (bptr > mb->start_subject)
|
|
||||||
{
|
|
||||||
bptr--;
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
BACKCHAR(bptr);
|
|
||||||
GETCHAR(fc, bptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
fc = *bptr;
|
|
||||||
if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
|
|
||||||
ricount++;
|
|
||||||
}
|
|
||||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
|
||||||
any number of Extend before a following E_Modifier. */
|
|
||||||
|
|
||||||
if (rgb != ucp_gbExtend ||
|
|
||||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
|
||||||
lgb = rgb;
|
|
||||||
|
|
||||||
Feptr += len;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
CHECK_PARTIAL();
|
CHECK_PARTIAL();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue