Follow ucp changes in JIT.
This commit is contained in:
parent
68f9c49517
commit
305e273e99
|
@ -476,7 +476,7 @@ typedef struct compiler_common {
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
BOOL utf;
|
BOOL utf;
|
||||||
BOOL invalid_utf;
|
BOOL invalid_utf;
|
||||||
BOOL use_ucp;
|
BOOL ucp;
|
||||||
/* Points to saving area for iref. */
|
/* Points to saving area for iref. */
|
||||||
sljit_s32 iref_ptr;
|
sljit_s32 iref_ptr;
|
||||||
jump_list *getucd;
|
jump_list *getucd;
|
||||||
|
@ -3226,16 +3226,19 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR
|
||||||
unsigned int c;
|
unsigned int c;
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (common->utf)
|
if (common->utf || common->ucp)
|
||||||
{
|
{
|
||||||
GETCHAR(c, cc);
|
if (common->utf)
|
||||||
if (c > 127)
|
|
||||||
{
|
{
|
||||||
return c != UCD_OTHERCASE(c);
|
GETCHAR(c, cc);
|
||||||
}
|
}
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
else
|
||||||
|
c = *cc;
|
||||||
|
|
||||||
|
if (c > 127)
|
||||||
|
return c != UCD_OTHERCASE(c);
|
||||||
|
|
||||||
return common->fcc[c] != c;
|
return common->fcc[c] != c;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@ -3247,10 +3250,8 @@ static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigne
|
||||||
{
|
{
|
||||||
/* Returns with the othercase. */
|
/* Returns with the othercase. */
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (common->utf && c > 127)
|
if ((common->utf || common->ucp) && c > 127)
|
||||||
{
|
|
||||||
return UCD_OTHERCASE(c);
|
return UCD_OTHERCASE(c);
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
return TABLE_GET(c, common->fcc, c);
|
return TABLE_GET(c, common->fcc, c);
|
||||||
}
|
}
|
||||||
|
@ -3264,15 +3265,19 @@ int n;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (common->utf)
|
if (common->utf || common->ucp)
|
||||||
{
|
{
|
||||||
GETCHAR(c, cc);
|
if (common->utf)
|
||||||
|
{
|
||||||
|
GETCHAR(c, cc);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
c = *cc;
|
||||||
|
|
||||||
if (c <= 127)
|
if (c <= 127)
|
||||||
oc = common->fcc[c];
|
oc = common->fcc[c];
|
||||||
else
|
else
|
||||||
{
|
|
||||||
oc = UCD_OTHERCASE(c);
|
oc = UCD_OTHERCASE(c);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -5493,7 +5498,12 @@ while (TRUE)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
chr = *cc;
|
chr = *cc;
|
||||||
othercase[0] = TABLE_GET(chr, common->fcc, chr);
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (common->ucp && chr > 127)
|
||||||
|
othercase[0] = UCD_OTHERCASE(chr);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
othercase[0] = TABLE_GET(chr, common->fcc, chr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -5922,8 +5932,8 @@ oc = first_char;
|
||||||
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
|
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||||
{
|
{
|
||||||
oc = TABLE_GET(first_char, common->fcc, first_char);
|
oc = TABLE_GET(first_char, common->fcc, first_char);
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE
|
||||||
if (first_char > 127 && common->utf)
|
if (first_char > 127 && (common->utf || common->ucp))
|
||||||
oc = UCD_OTHERCASE(first_char);
|
oc = UCD_OTHERCASE(first_char);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -6133,8 +6143,8 @@ oc = req_char;
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
oc = TABLE_GET(req_char, common->fcc, req_char);
|
oc = TABLE_GET(req_char, common->fcc, req_char);
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UNICODE
|
||||||
if (req_char > 127 && common->utf)
|
if (req_char > 127 && (common->utf || common->ucp))
|
||||||
oc = UCD_OTHERCASE(req_char);
|
oc = UCD_OTHERCASE(req_char);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -6288,7 +6298,7 @@ else
|
||||||
|
|
||||||
/* Testing char type. */
|
/* Testing char type. */
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (common->use_ucp)
|
if (common->ucp)
|
||||||
{
|
{
|
||||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
|
||||||
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
|
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
|
||||||
|
@ -6334,7 +6344,7 @@ peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
|
||||||
|
|
||||||
valid_utf = LABEL();
|
valid_utf = LABEL();
|
||||||
|
|
||||||
if (common->use_ucp)
|
if (common->ucp)
|
||||||
{
|
{
|
||||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
|
||||||
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
|
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
|
||||||
|
@ -13216,7 +13226,7 @@ common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
|
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
|
||||||
common->utf = (re->overall_options & PCRE2_UTF) != 0;
|
common->utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||||
common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
|
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
|
||||||
if (common->utf)
|
if (common->utf)
|
||||||
{
|
{
|
||||||
if (common->nltype == NLTYPE_ANY)
|
if (common->nltype == NLTYPE_ANY)
|
||||||
|
|
|
@ -587,8 +587,6 @@
|
||||||
abc\x80\=startchar
|
abc\x80\=startchar
|
||||||
abc\x80\=startchar,offset=3
|
abc\x80\=startchar,offset=3
|
||||||
|
|
||||||
#subject no_jit
|
|
||||||
|
|
||||||
/\x{c1}+\x{e1}/iIB,ucp
|
/\x{c1}+\x{e1}/iIB,ucp
|
||||||
\x{c1}\x{c1}\x{c1}
|
\x{c1}\x{c1}\x{c1}
|
||||||
\x{e1}\x{e1}\x{e1}
|
\x{e1}\x{e1}\x{e1}
|
||||||
|
@ -612,6 +610,4 @@
|
||||||
/X(\x{e1})Y/replace=>\U$1<,substitute_extended
|
/X(\x{e1})Y/replace=>\U$1<,substitute_extended
|
||||||
X\x{e1}Y
|
X\x{e1}Y
|
||||||
|
|
||||||
#subject
|
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -481,8 +481,6 @@
|
||||||
/[ab\x{120}]+/iB,ucp
|
/[ab\x{120}]+/iB,ucp
|
||||||
aABb\x{121}\x{120}
|
aABb\x{121}\x{120}
|
||||||
|
|
||||||
#subject no_jit
|
|
||||||
|
|
||||||
/\x{c1}/i,no_start_optimize
|
/\x{c1}/i,no_start_optimize
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
\x{e1}
|
\x{e1}
|
||||||
|
@ -532,8 +530,6 @@
|
||||||
/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
|
/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
|
||||||
X\x{121}Y
|
X\x{121}Y
|
||||||
|
|
||||||
#subject
|
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -1817,8 +1817,6 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
|
||||||
abc\x80\=startchar,offset=3
|
abc\x80\=startchar,offset=3
|
||||||
Error -36 (bad UTF-8 offset)
|
Error -36 (bad UTF-8 offset)
|
||||||
|
|
||||||
#subject no_jit
|
|
||||||
|
|
||||||
/\x{c1}+\x{e1}/iIB,ucp
|
/\x{c1}+\x{e1}/iIB,ucp
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
@ -1873,6 +1871,4 @@ Subject length lower bound = 1
|
||||||
X\x{e1}Y
|
X\x{e1}Y
|
||||||
1: >\xe1<
|
1: >\xe1<
|
||||||
|
|
||||||
#subject
|
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -1670,8 +1670,6 @@ Subject length lower bound = 1
|
||||||
aABb\x{121}\x{120}
|
aABb\x{121}\x{120}
|
||||||
0: aABb\x{121}\x{120}
|
0: aABb\x{121}\x{120}
|
||||||
|
|
||||||
#subject no_jit
|
|
||||||
|
|
||||||
/\x{c1}/i,no_start_optimize
|
/\x{c1}/i,no_start_optimize
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
\x{e1}
|
\x{e1}
|
||||||
|
@ -1763,8 +1761,6 @@ Subject length lower bound = 1
|
||||||
X\x{121}Y
|
X\x{121}Y
|
||||||
1: >\x{120}<
|
1: >\x{120}<
|
||||||
|
|
||||||
#subject
|
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -1668,8 +1668,6 @@ Subject length lower bound = 1
|
||||||
aABb\x{121}\x{120}
|
aABb\x{121}\x{120}
|
||||||
0: aABb\x{121}\x{120}
|
0: aABb\x{121}\x{120}
|
||||||
|
|
||||||
#subject no_jit
|
|
||||||
|
|
||||||
/\x{c1}/i,no_start_optimize
|
/\x{c1}/i,no_start_optimize
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
\x{e1}
|
\x{e1}
|
||||||
|
@ -1761,8 +1759,6 @@ Subject length lower bound = 1
|
||||||
X\x{121}Y
|
X\x{121}Y
|
||||||
1: >\x{120}<
|
1: >\x{120}<
|
||||||
|
|
||||||
#subject
|
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
Loading…
Reference in New Issue