Fix character type detection when 32-bit and UCP are enabled but UTF is not in JIT.
This commit is contained in:
parent
f7e077c746
commit
bdac9df4cf
|
@ -588,6 +588,8 @@ the start pointers when the end of the capturing group has not yet reached. */
|
||||||
|
|
||||||
#define READ_CHAR_MAX 0x7fffffff
|
#define READ_CHAR_MAX 0x7fffffff
|
||||||
|
|
||||||
|
#define INVALID_UTF_CHAR 888
|
||||||
|
|
||||||
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
|
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
|
||||||
{
|
{
|
||||||
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
|
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
|
||||||
|
@ -3558,10 +3560,30 @@ static void do_getucd(compiler_common *common)
|
||||||
/* Search the UCD record for the character comes in TMP1.
|
/* Search the UCD record for the character comes in TMP1.
|
||||||
Returns chartype in TMP1 and UCD offset in TMP2. */
|
Returns chartype in TMP1 and UCD offset in TMP2. */
|
||||||
DEFINE_COMPILER;
|
DEFINE_COMPILER;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
struct sljit_jump *jump;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined SLJIT_DEBUG && SLJIT_DEBUG
|
||||||
|
/* dummy_ucd_record */
|
||||||
|
const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
|
||||||
|
SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
|
||||||
|
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
|
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
|
||||||
|
|
||||||
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (!common->utf)
|
||||||
|
{
|
||||||
|
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
|
||||||
|
JUMPHERE(jump);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||||
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
||||||
|
@ -5969,6 +5991,15 @@ if (needstype || needsscript)
|
||||||
if (needschar && !charsaved)
|
if (needschar && !charsaved)
|
||||||
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (!common->utf)
|
||||||
|
{
|
||||||
|
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
|
||||||
|
JUMPHERE(jump);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||||
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
||||||
|
|
|
@ -361,6 +361,6 @@
|
||||||
/[\s[:^ascii:]]/B,ucp
|
/[\s[:^ascii:]]/B,ucp
|
||||||
|
|
||||||
/\pP/ucp
|
/\pP/ucp
|
||||||
\x{7fffffff}\=no_jit
|
\x{7fffffff}
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -1416,7 +1416,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\pP/ucp
|
/\pP/ucp
|
||||||
\x{7fffffff}\=no_jit
|
\x{7fffffff}
|
||||||
** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||||
** Truncation will probably give the wrong result.
|
** Truncation will probably give the wrong result.
|
||||||
No match
|
No match
|
||||||
|
|
|
@ -1410,7 +1410,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/\pP/ucp
|
/\pP/ucp
|
||||||
\x{7fffffff}\=no_jit
|
\x{7fffffff}
|
||||||
No match
|
No match
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
Loading…
Reference in New Issue