diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 8aa57dd..2de5538 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -476,7 +476,7 @@ typedef struct compiler_common { #ifdef SUPPORT_UNICODE BOOL utf; BOOL invalid_utf; - BOOL use_ucp; + BOOL ucp; /* Points to saving area for iref. */ sljit_s32 iref_ptr; jump_list *getucd; @@ -3226,16 +3226,19 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR unsigned int c; #ifdef SUPPORT_UNICODE -if (common->utf) +if (common->utf || common->ucp) { - GETCHAR(c, cc); - if (c > 127) + if (common->utf) { - return c != UCD_OTHERCASE(c); + GETCHAR(c, cc); } -#if PCRE2_CODE_UNIT_WIDTH != 8 + else + c = *cc; + + if (c > 127) + return c != UCD_OTHERCASE(c); + return common->fcc[c] != c; -#endif } else #endif @@ -3247,10 +3250,8 @@ static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigne { /* Returns with the othercase. */ #ifdef SUPPORT_UNICODE -if (common->utf && c > 127) - { +if ((common->utf || common->ucp) && c > 127) return UCD_OTHERCASE(c); - } #endif return TABLE_GET(c, common->fcc, c); } @@ -3264,15 +3265,19 @@ int n; #endif #ifdef SUPPORT_UNICODE -if (common->utf) +if (common->utf || common->ucp) { - GETCHAR(c, cc); + if (common->utf) + { + GETCHAR(c, cc); + } + else + c = *cc; + if (c <= 127) oc = common->fcc[c]; else - { oc = UCD_OTHERCASE(c); - } } else { @@ -5493,7 +5498,12 @@ while (TRUE) #endif { chr = *cc; - othercase[0] = TABLE_GET(chr, common->fcc, chr); +#ifdef SUPPORT_UNICODE + if (common->ucp && chr > 127) + othercase[0] = UCD_OTHERCASE(chr); + else +#endif + othercase[0] = TABLE_GET(chr, common->fcc, chr); } } else @@ -5922,8 +5932,8 @@ oc = first_char; if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0) { oc = TABLE_GET(first_char, common->fcc, first_char); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 - if (first_char > 127 && common->utf) +#if defined SUPPORT_UNICODE + if (first_char > 127 && (common->utf || common->ucp)) oc = UCD_OTHERCASE(first_char); #endif } @@ -6133,8 +6143,8 @@ oc = req_char; if (caseless) { oc = TABLE_GET(req_char, common->fcc, req_char); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 - if (req_char > 127 && common->utf) +#if defined SUPPORT_UNICODE + if (req_char > 127 && (common->utf || common->ucp)) oc = UCD_OTHERCASE(req_char); #endif } @@ -6288,7 +6298,7 @@ else /* Testing char type. */ #ifdef SUPPORT_UNICODE -if (common->use_ucp) +if (common->ucp) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); @@ -6334,7 +6344,7 @@ peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2); valid_utf = LABEL(); -if (common->use_ucp) +if (common->ucp) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); @@ -13216,7 +13226,7 @@ common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0; #ifdef SUPPORT_UNICODE /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ common->utf = (re->overall_options & PCRE2_UTF) != 0; -common->use_ucp = (re->overall_options & PCRE2_UCP) != 0; +common->ucp = (re->overall_options & PCRE2_UCP) != 0; if (common->utf) { if (common->nltype == NLTYPE_ANY) diff --git a/testdata/testinput10 b/testdata/testinput10 index be6d426..b3c3197 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -587,8 +587,6 @@ abc\x80\=startchar abc\x80\=startchar,offset=3 -#subject no_jit - /\x{c1}+\x{e1}/iIB,ucp \x{c1}\x{c1}\x{c1} \x{e1}\x{e1}\x{e1} @@ -612,6 +610,4 @@ /X(\x{e1})Y/replace=>\U$1<,substitute_extended X\x{e1}Y -#subject - # End of testinput10 diff --git a/testdata/testinput12 b/testdata/testinput12 index beaf643..fbfacc5 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -481,8 +481,6 @@ /[ab\x{120}]+/iB,ucp aABb\x{121}\x{120} -#subject no_jit - /\x{c1}/i,no_start_optimize \= Expect no match \x{e1} @@ -532,8 +530,6 @@ /X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended X\x{121}Y -#subject - # ---------------------------------------------------- # End of testinput12 diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 9fe5ef6..59af535 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1817,8 +1817,6 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3 abc\x80\=startchar,offset=3 Error -36 (bad UTF-8 offset) -#subject no_jit - /\x{c1}+\x{e1}/iIB,ucp ------------------------------------------------------------------ Bra @@ -1873,6 +1871,4 @@ Subject length lower bound = 1 X\x{e1}Y 1: >\xe1< -#subject - # End of testinput10 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 6e545c3..9689ab1 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -1670,8 +1670,6 @@ Subject length lower bound = 1 aABb\x{121}\x{120} 0: aABb\x{121}\x{120} -#subject no_jit - /\x{c1}/i,no_start_optimize \= Expect no match \x{e1} @@ -1763,8 +1761,6 @@ Subject length lower bound = 1 X\x{121}Y 1: >\x{120}< -#subject - # ---------------------------------------------------- # End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 1a0783a..c51c517 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1668,8 +1668,6 @@ Subject length lower bound = 1 aABb\x{121}\x{120} 0: aABb\x{121}\x{120} -#subject no_jit - /\x{c1}/i,no_start_optimize \= Expect no match \x{e1} @@ -1761,8 +1759,6 @@ Subject length lower bound = 1 X\x{121}Y 1: >\x{120}< -#subject - # ---------------------------------------------------- # End of testinput12