Replace multiple copies of extended grapheme sequence code with a single
subroutine.
This commit is contained in:
parent
badfbcdd12
commit
42f547bf4d
|
@ -432,6 +432,7 @@ SET(PCRE2_SOURCES
|
|||
src/pcre2_convert.c
|
||||
src/pcre2_dfa_match.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_extuni.c
|
||||
src/pcre2_find_bracket.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_maketables.c
|
||||
|
|
|
@ -7,6 +7,10 @@ Version 10.31 xx-xxx-201x
|
|||
|
||||
1. Fix typo (missing ]) in VMS code in pcre2test.c.
|
||||
|
||||
2. Replace the replicated code for matching extended Unicode grapheme sequences
|
||||
(which got a lot more complicated by change 10.30/49) by a single subroutine
|
||||
that is called by both pcre2_match() and pcre2_dfa_match().
|
||||
|
||||
|
||||
Version 10.30 14-August-2017
|
||||
----------------------------
|
||||
|
|
|
@ -351,6 +351,7 @@ COMMON_SOURCES = \
|
|||
src/pcre2_convert.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_extuni.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
|
|
|
@ -91,8 +91,10 @@ can skip ahead to the CMake section.
|
|||
pcre2_compile.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_convert.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_extuni.c
|
||||
pcre2_find_bracket.c
|
||||
pcre2_jit_compile.c
|
||||
pcre2_maketables.c
|
||||
|
@ -377,4 +379,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
|
|||
recommended download site.
|
||||
|
||||
=============================
|
||||
Last Updated: 17 March 2017
|
||||
Last Updated: 12 September 2017
|
||||
|
|
|
@ -196,8 +196,10 @@ files="\
|
|||
src/pcre2_compile.c \
|
||||
src/pcre2_config.c \
|
||||
src/pcre2_context.c \
|
||||
src/pcre2_convert.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_extuni.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
|
|
3
README
3
README
|
@ -773,6 +773,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_convert.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_extuni.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
|
@ -882,4 +883,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 18 July 2017
|
||||
Last updated: 12 September 2017
|
||||
|
|
|
@ -1364,63 +1364,14 @@ for (;;)
|
|||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t lgb, rgb;
|
||||
PCRE2_SPTR nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
lgb = UCD_GRAPHBREAK(c);
|
||||
while (nptr < end_subject)
|
||||
{
|
||||
dlen = 1;
|
||||
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
||||
rgb = UCD_GRAPHBREAK(d);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if
|
||||
there are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator &&
|
||||
rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = nptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > mb->start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(d, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
d = *bptr;
|
||||
if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
ncount++;
|
||||
nptr += dlen;
|
||||
}
|
||||
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||
}
|
||||
|
@ -1663,8 +1614,6 @@ for (;;)
|
|||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t lgb, rgb;
|
||||
PCRE2_SPTR nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
|
||||
|
@ -1672,55 +1621,8 @@ for (;;)
|
|||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
lgb = UCD_GRAPHBREAK(c);
|
||||
while (nptr < end_subject)
|
||||
{
|
||||
dlen = 1;
|
||||
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
||||
rgb = UCD_GRAPHBREAK(d);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if
|
||||
there are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator &&
|
||||
rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = nptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > mb->start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(d, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
d = *bptr;
|
||||
if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
ncount++;
|
||||
nptr += dlen;
|
||||
}
|
||||
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||
}
|
||||
break;
|
||||
|
@ -1973,63 +1875,15 @@ for (;;)
|
|||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t lgb, rgb;
|
||||
PCRE2_SPTR nptr = ptr + clen;
|
||||
PCRE2_SPTR nptr;
|
||||
int ncount = 0;
|
||||
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
lgb = UCD_GRAPHBREAK(c);
|
||||
while (nptr < end_subject)
|
||||
{
|
||||
dlen = 1;
|
||||
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
||||
rgb = UCD_GRAPHBREAK(d);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if
|
||||
there are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator &&
|
||||
rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = nptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > mb->start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(d, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
d = *bptr;
|
||||
if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
ncount++;
|
||||
nptr += dlen;
|
||||
}
|
||||
nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
reset_could_continue = TRUE;
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
|
@ -2206,58 +2060,9 @@ for (;;)
|
|||
case OP_EXTUNI:
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t lgb, rgb;
|
||||
PCRE2_SPTR nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
lgb = UCD_GRAPHBREAK(c);
|
||||
while (nptr < end_subject)
|
||||
{
|
||||
dlen = 1;
|
||||
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
||||
rgb = UCD_GRAPHBREAK(d);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if
|
||||
there are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator &&
|
||||
rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = nptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > mb->start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(d, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
d = *bptr;
|
||||
if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
ncount++;
|
||||
nptr += dlen;
|
||||
}
|
||||
PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
|
||||
end_subject, utf, &ncount);
|
||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
reset_could_continue = TRUE;
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains an internal function that is used to match a Unicode
|
||||
extended grapheme sequence. It is used by both pcre2_match() and
|
||||
pcre2_def_match(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Match an extended grapheme sequence *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
c the first character
|
||||
eptr pointer to next character
|
||||
start_subject pointer to start of subject
|
||||
end_subject pointer to end of subject
|
||||
utf TRUE if in UTF mode
|
||||
xcount pointer to count of additional characters,
|
||||
or NULL if count not needed
|
||||
|
||||
Returns: pointer after the end of the sequence
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||
{
|
||||
int lgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
while (eptr < end_subject)
|
||||
{
|
||||
int rgb;
|
||||
int len = 1;
|
||||
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = eptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(c, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
c = *bptr;
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
eptr += len;
|
||||
if (xcount != NULL) *xcount += 1;
|
||||
}
|
||||
|
||||
return eptr;
|
||||
}
|
||||
|
||||
/* End of pcre2_extuni.c */
|
|
@ -1926,6 +1926,7 @@ is available. */
|
|||
|
||||
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
||||
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
|
||||
#define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_)
|
||||
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
||||
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
||||
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
||||
|
@ -1949,6 +1950,8 @@ extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
|||
const compile_block *);
|
||||
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
||||
int *, uint32_t, BOOL, compile_block *);
|
||||
extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
|
||||
BOOL, int *);
|
||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||
uint32_t *, BOOL);
|
||||
|
|
|
@ -2440,55 +2440,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
else
|
||||
{
|
||||
int lgb, rgb;
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
lgb = UCD_GRAPHBREAK(fc);
|
||||
while (Feptr < mb->end_subject)
|
||||
{
|
||||
int len = 1;
|
||||
if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
|
||||
rgb = UCD_GRAPHBREAK(fc);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = Feptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > mb->start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(fc, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
fc = *bptr;
|
||||
if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
Feptr += len;
|
||||
}
|
||||
Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
|
||||
NULL);
|
||||
}
|
||||
CHECK_PARTIAL();
|
||||
Fecode++;
|
||||
|
@ -2785,61 +2739,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
else
|
||||
{
|
||||
int lgb, rgb;
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
lgb = UCD_GRAPHBREAK(fc);
|
||||
while (Feptr < mb->end_subject)
|
||||
{
|
||||
int len = 1;
|
||||
if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
|
||||
rgb = UCD_GRAPHBREAK(fc);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if
|
||||
there are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator &&
|
||||
rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = Feptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > mb->start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(fc, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
fc = *bptr;
|
||||
if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
Feptr += len;
|
||||
}
|
||||
Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
|
||||
mb->end_subject, utf, NULL);
|
||||
}
|
||||
CHECK_PARTIAL();
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
@ -3593,56 +3499,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
else
|
||||
{
|
||||
int lgb, rgb;
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
lgb = UCD_GRAPHBREAK(fc);
|
||||
while (Feptr < mb->end_subject)
|
||||
{
|
||||
int len = 1;
|
||||
if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
|
||||
rgb = UCD_GRAPHBREAK(fc);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if
|
||||
there are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator &&
|
||||
rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = Feptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > mb->start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(fc, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
fc = *bptr;
|
||||
if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
Feptr += len;
|
||||
}
|
||||
Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
|
||||
utf, NULL);
|
||||
}
|
||||
CHECK_PARTIAL();
|
||||
}
|
||||
|
@ -4167,56 +4026,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
else
|
||||
{
|
||||
int lgb, rgb;
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
lgb = UCD_GRAPHBREAK(fc);
|
||||
while (Feptr < mb->end_subject)
|
||||
{
|
||||
int len = 1;
|
||||
if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
|
||||
rgb = UCD_GRAPHBREAK(fc);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if
|
||||
there are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegionalIndicator &&
|
||||
rgb == ucp_gbRegionalIndicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = Feptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(bptr);
|
||||
#endif
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > mb->start_subject)
|
||||
{
|
||||
bptr--;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(fc, bptr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
fc = *bptr;
|
||||
if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
lgb = rgb;
|
||||
|
||||
Feptr += len;
|
||||
}
|
||||
Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
|
||||
utf, NULL);
|
||||
}
|
||||
CHECK_PARTIAL();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue