Follow ucp changes in JIT.

This commit is contained in:
Zoltán Herczeg 2020-02-26 10:18:43 +00:00
parent 68f9c49517
commit 305e273e99
6 changed files with 32 additions and 42 deletions

View File

@ -476,7 +476,7 @@ typedef struct compiler_common {
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
BOOL utf; BOOL utf;
BOOL invalid_utf; BOOL invalid_utf;
BOOL use_ucp; BOOL ucp;
/* Points to saving area for iref. */ /* Points to saving area for iref. */
sljit_s32 iref_ptr; sljit_s32 iref_ptr;
jump_list *getucd; jump_list *getucd;
@ -3226,16 +3226,19 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR
unsigned int c; unsigned int c;
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (common->utf) if (common->utf || common->ucp)
{
if (common->utf)
{ {
GETCHAR(c, cc); GETCHAR(c, cc);
if (c > 127)
{
return c != UCD_OTHERCASE(c);
} }
#if PCRE2_CODE_UNIT_WIDTH != 8 else
c = *cc;
if (c > 127)
return c != UCD_OTHERCASE(c);
return common->fcc[c] != c; return common->fcc[c] != c;
#endif
} }
else else
#endif #endif
@ -3247,10 +3250,8 @@ static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigne
{ {
/* Returns with the othercase. */ /* Returns with the othercase. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (common->utf && c > 127) if ((common->utf || common->ucp) && c > 127)
{
return UCD_OTHERCASE(c); return UCD_OTHERCASE(c);
}
#endif #endif
return TABLE_GET(c, common->fcc, c); return TABLE_GET(c, common->fcc, c);
} }
@ -3264,16 +3265,20 @@ int n;
#endif #endif
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (common->utf) if (common->utf || common->ucp)
{
if (common->utf)
{ {
GETCHAR(c, cc); GETCHAR(c, cc);
}
else
c = *cc;
if (c <= 127) if (c <= 127)
oc = common->fcc[c]; oc = common->fcc[c];
else else
{
oc = UCD_OTHERCASE(c); oc = UCD_OTHERCASE(c);
} }
}
else else
{ {
c = *cc; c = *cc;
@ -5493,6 +5498,11 @@ while (TRUE)
#endif #endif
{ {
chr = *cc; chr = *cc;
#ifdef SUPPORT_UNICODE
if (common->ucp && chr > 127)
othercase[0] = UCD_OTHERCASE(chr);
else
#endif
othercase[0] = TABLE_GET(chr, common->fcc, chr); othercase[0] = TABLE_GET(chr, common->fcc, chr);
} }
} }
@ -5922,8 +5932,8 @@ oc = first_char;
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0) if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
{ {
oc = TABLE_GET(first_char, common->fcc, first_char); oc = TABLE_GET(first_char, common->fcc, first_char);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE
if (first_char > 127 && common->utf) if (first_char > 127 && (common->utf || common->ucp))
oc = UCD_OTHERCASE(first_char); oc = UCD_OTHERCASE(first_char);
#endif #endif
} }
@ -6133,8 +6143,8 @@ oc = req_char;
if (caseless) if (caseless)
{ {
oc = TABLE_GET(req_char, common->fcc, req_char); oc = TABLE_GET(req_char, common->fcc, req_char);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 #if defined SUPPORT_UNICODE
if (req_char > 127 && common->utf) if (req_char > 127 && (common->utf || common->ucp))
oc = UCD_OTHERCASE(req_char); oc = UCD_OTHERCASE(req_char);
#endif #endif
} }
@ -6288,7 +6298,7 @@ else
/* Testing char type. */ /* Testing char type. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (common->use_ucp) if (common->ucp)
{ {
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
@ -6334,7 +6344,7 @@ peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
valid_utf = LABEL(); valid_utf = LABEL();
if (common->use_ucp) if (common->ucp)
{ {
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
@ -13216,7 +13226,7 @@ common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
common->utf = (re->overall_options & PCRE2_UTF) != 0; common->utf = (re->overall_options & PCRE2_UTF) != 0;
common->use_ucp = (re->overall_options & PCRE2_UCP) != 0; common->ucp = (re->overall_options & PCRE2_UCP) != 0;
if (common->utf) if (common->utf)
{ {
if (common->nltype == NLTYPE_ANY) if (common->nltype == NLTYPE_ANY)

View File

@ -587,8 +587,6 @@
abc\x80\=startchar abc\x80\=startchar
abc\x80\=startchar,offset=3 abc\x80\=startchar,offset=3
#subject no_jit
/\x{c1}+\x{e1}/iIB,ucp /\x{c1}+\x{e1}/iIB,ucp
\x{c1}\x{c1}\x{c1} \x{c1}\x{c1}\x{c1}
\x{e1}\x{e1}\x{e1} \x{e1}\x{e1}\x{e1}
@ -612,6 +610,4 @@
/X(\x{e1})Y/replace=>\U$1<,substitute_extended /X(\x{e1})Y/replace=>\U$1<,substitute_extended
X\x{e1}Y X\x{e1}Y
#subject
# End of testinput10 # End of testinput10

View File

@ -481,8 +481,6 @@
/[ab\x{120}]+/iB,ucp /[ab\x{120}]+/iB,ucp
aABb\x{121}\x{120} aABb\x{121}\x{120}
#subject no_jit
/\x{c1}/i,no_start_optimize /\x{c1}/i,no_start_optimize
\= Expect no match \= Expect no match
\x{e1} \x{e1}
@ -532,8 +530,6 @@
/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended /X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
X\x{121}Y X\x{121}Y
#subject
# ---------------------------------------------------- # ----------------------------------------------------
# End of testinput12 # End of testinput12

View File

@ -1817,8 +1817,6 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
abc\x80\=startchar,offset=3 abc\x80\=startchar,offset=3
Error -36 (bad UTF-8 offset) Error -36 (bad UTF-8 offset)
#subject no_jit
/\x{c1}+\x{e1}/iIB,ucp /\x{c1}+\x{e1}/iIB,ucp
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
@ -1873,6 +1871,4 @@ Subject length lower bound = 1
X\x{e1}Y X\x{e1}Y
1: >\xe1< 1: >\xe1<
#subject
# End of testinput10 # End of testinput10

View File

@ -1670,8 +1670,6 @@ Subject length lower bound = 1
aABb\x{121}\x{120} aABb\x{121}\x{120}
0: aABb\x{121}\x{120} 0: aABb\x{121}\x{120}
#subject no_jit
/\x{c1}/i,no_start_optimize /\x{c1}/i,no_start_optimize
\= Expect no match \= Expect no match
\x{e1} \x{e1}
@ -1763,8 +1761,6 @@ Subject length lower bound = 1
X\x{121}Y X\x{121}Y
1: >\x{120}< 1: >\x{120}<
#subject
# ---------------------------------------------------- # ----------------------------------------------------
# End of testinput12 # End of testinput12

View File

@ -1668,8 +1668,6 @@ Subject length lower bound = 1
aABb\x{121}\x{120} aABb\x{121}\x{120}
0: aABb\x{121}\x{120} 0: aABb\x{121}\x{120}
#subject no_jit
/\x{c1}/i,no_start_optimize /\x{c1}/i,no_start_optimize
\= Expect no match \= Expect no match
\x{e1} \x{e1}
@ -1761,8 +1759,6 @@ Subject length lower bound = 1
X\x{121}Y X\x{121}Y
1: >\x{120}< 1: >\x{120}<
#subject
# ---------------------------------------------------- # ----------------------------------------------------
# End of testinput12 # End of testinput12