Improve character range optimization in JIT.
This commit is contained in:
parent
4404c8c419
commit
a16919ce6f
|
@ -362,7 +362,8 @@ typedef struct fast_forward_char_data {
|
||||||
PCRE2_UCHAR chars[MAX_DIFF_CHARS];
|
PCRE2_UCHAR chars[MAX_DIFF_CHARS];
|
||||||
} fast_forward_char_data;
|
} fast_forward_char_data;
|
||||||
|
|
||||||
#define MAX_RANGE_SIZE 4
|
#define MAX_CLASS_RANGE_SIZE 4
|
||||||
|
#define MAX_CLASS_CHARS_SIZE 3
|
||||||
|
|
||||||
typedef struct compiler_common {
|
typedef struct compiler_common {
|
||||||
/* The sljit ceneric compiler. */
|
/* The sljit ceneric compiler. */
|
||||||
|
@ -5318,7 +5319,7 @@ if (common->match_end_ptr != 0)
|
||||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
|
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
|
||||||
|
|
||||||
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
|
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
|
||||||
{
|
{
|
||||||
|
@ -5349,7 +5350,7 @@ if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
|
||||||
if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
|
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
|
||||||
{
|
{
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
if ((start_bits[31] & 0x80) != 0)
|
if ((start_bits[31] & 0x80) != 0)
|
||||||
|
@ -5618,11 +5619,11 @@ OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOC
|
||||||
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
|
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
|
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
|
||||||
{
|
{
|
||||||
/* May destroy TMP1. */
|
/* May destroy TMP1. */
|
||||||
DEFINE_COMPILER;
|
DEFINE_COMPILER;
|
||||||
int ranges[MAX_RANGE_SIZE];
|
int ranges[MAX_CLASS_RANGE_SIZE];
|
||||||
sljit_u8 bit, cbit, all;
|
sljit_u8 bit, cbit, all;
|
||||||
int i, byte, length = 0;
|
int i, byte, length = 0;
|
||||||
|
|
||||||
|
@ -5640,7 +5641,7 @@ for (i = 0; i < 256; )
|
||||||
cbit = (bits[byte] >> (i & 0x7)) & 0x1;
|
cbit = (bits[byte] >> (i & 0x7)) & 0x1;
|
||||||
if (cbit != bit)
|
if (cbit != bit)
|
||||||
{
|
{
|
||||||
if (length >= MAX_RANGE_SIZE)
|
if (length >= MAX_CLASS_RANGE_SIZE)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
ranges[length] = i;
|
ranges[length] = i;
|
||||||
length++;
|
length++;
|
||||||
|
@ -5653,7 +5654,7 @@ for (i = 0; i < 256; )
|
||||||
|
|
||||||
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
|
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
|
||||||
{
|
{
|
||||||
if (length >= MAX_RANGE_SIZE)
|
if (length >= MAX_CLASS_RANGE_SIZE)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
ranges[length] = 256;
|
ranges[length] = 256;
|
||||||
length++;
|
length++;
|
||||||
|
@ -5770,6 +5771,111 @@ switch(length)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
|
||||||
|
{
|
||||||
|
/* May destroy TMP1. */
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
|
||||||
|
uint8_t byte;
|
||||||
|
sljit_s32 type;
|
||||||
|
int i, j, k, len, c;
|
||||||
|
struct sljit_jump *jump;
|
||||||
|
jump_list *found = NULL;
|
||||||
|
|
||||||
|
if (invert)
|
||||||
|
nclass = !nclass;
|
||||||
|
|
||||||
|
len = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < 32; i++)
|
||||||
|
{
|
||||||
|
byte = bits[i];
|
||||||
|
|
||||||
|
if (nclass)
|
||||||
|
byte = ~byte;
|
||||||
|
|
||||||
|
j = 0;
|
||||||
|
while (byte != 0)
|
||||||
|
{
|
||||||
|
if (byte & 0x1)
|
||||||
|
{
|
||||||
|
c = i * 8 + j;
|
||||||
|
|
||||||
|
k = len;
|
||||||
|
|
||||||
|
if ((c & 0x20) != 0)
|
||||||
|
{
|
||||||
|
for (k = 0; k < len; k++)
|
||||||
|
if (char_list[k] == c - 0x20)
|
||||||
|
{
|
||||||
|
char_list[k] |= 0x120;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (k == len)
|
||||||
|
{
|
||||||
|
if (len >= MAX_CLASS_CHARS_SIZE)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
char_list[len++] = (uint16_t) c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte >>= 1;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jump = NULL;
|
||||||
|
j = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
{
|
||||||
|
if ((char_list[i] & 0x100) != 0)
|
||||||
|
j++;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
type = SLJIT_EQUAL;
|
||||||
|
if (!nclass && j == 0 && i + 1 == len)
|
||||||
|
type = SLJIT_NOT_EQUAL;
|
||||||
|
|
||||||
|
jump = CMP(type, TMP1, 0, SLJIT_IMM, char_list[i]);
|
||||||
|
|
||||||
|
add_jump(compiler, (nclass || type == SLJIT_NOT_EQUAL) ? backtracks : &found, jump);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (j != 0)
|
||||||
|
{
|
||||||
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
if ((char_list[i] & 0x100) != 0)
|
||||||
|
{
|
||||||
|
j--;
|
||||||
|
|
||||||
|
type = SLJIT_EQUAL;
|
||||||
|
if (!nclass && j == 0)
|
||||||
|
type = SLJIT_NOT_EQUAL;
|
||||||
|
|
||||||
|
jump = CMP(type, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
|
||||||
|
add_jump(compiler, (nclass || type == SLJIT_NOT_EQUAL) ? backtracks : &found, jump);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
set_jumps(found, LABEL());
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
|
||||||
|
{
|
||||||
|
/* May destroy TMP1. */
|
||||||
|
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
|
||||||
|
return TRUE;
|
||||||
|
return optimize_class_chars(common, bits, nclass, invert, backtracks);
|
||||||
|
}
|
||||||
|
|
||||||
static void check_anynewline(compiler_common *common)
|
static void check_anynewline(compiler_common *common)
|
||||||
{
|
{
|
||||||
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
|
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
|
||||||
|
@ -6288,7 +6394,7 @@ if ((cc[-1] & XCL_HASPROP) == 0)
|
||||||
if ((cc[-1] & XCL_MAP) != 0)
|
if ((cc[-1] & XCL_MAP) != 0)
|
||||||
{
|
{
|
||||||
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
|
||||||
if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
|
if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
|
||||||
{
|
{
|
||||||
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
|
||||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
||||||
|
@ -6315,7 +6421,7 @@ else if ((cc[-1] & XCL_MAP) != 0)
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
charsaved = TRUE;
|
charsaved = TRUE;
|
||||||
#endif
|
#endif
|
||||||
if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
|
if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
|
||||||
{
|
{
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
jump = NULL;
|
jump = NULL;
|
||||||
|
@ -7274,7 +7380,7 @@ switch(type)
|
||||||
read_char_range(common, 0, 255, type == OP_NCLASS);
|
read_char_range(common, 0, 255, type == OP_NCLASS);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
|
if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
|
||||||
return cc + 32 / sizeof(PCRE2_UCHAR);
|
return cc + 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
|
Loading…
Reference in New Issue