diff --git a/ChangeLog b/ChangeLog index f3d54dc..fcc1799 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,9 @@ Version 10.21 xx-xxx-xxxx 1. Improve matching speed of patterns starting with + or * in JIT. +2. Use memchr() to find the first character in an unanchored match in 8-bit +mode in the interpreter. This gives a significant speed improvement. + Version 10.20 30-June-2015 -------------------------- diff --git a/configure.ac b/configure.ac index 20f3937..9069d0e 100644 --- a/configure.ac +++ b/configure.ac @@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, [10]) -m4_define(pcre2_minor, [20]) -m4_define(pcre2_prerelease, []) -m4_define(pcre2_date, [2015-06-30]) +m4_define(pcre2_minor, [21]) +m4_define(pcre2_prerelease, [-RC1]) +m4_define(pcre2_date, [2015-07-06]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. diff --git a/src/pcre2_match.c b/src/pcre2_match.c index d3d5c1d..5d3e694 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -6783,7 +6783,8 @@ for(;;) end_subject = t; } - /* Advance to a unique first code unit if there is one. */ + /* Advance to a unique first code unit if there is one. In 8-bit mode, the + use of memchr() gives a big speed up. */ if (has_first_cu) { @@ -6793,8 +6794,15 @@ for(;;) (smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2) start_match++; else + { +#if PCRE2_CODE_UNIT_WIDTH != 8 while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu) start_match++; +#else + start_match = memchr(start_match, first_cu, end_subject - start_match); + if (start_match == NULL) start_match = end_subject; +#endif + } } /* Or to just after a linebreak for a multiline match */