From 54396a364d4189dc801b3dffa81d875d51a7d40d Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 6 Mar 2015 11:57:06 +0000 Subject: [PATCH] Fix bad compile of patterns like /[A-`]/i,utf where the range contains characters with multiple other cases and the ranges adjoin. --- ChangeLog | 5 +++++ src/pcre2_compile.c | 6 +++++- testdata/testinput4 | 3 +++ testdata/testinput5 | 3 +++ testdata/testoutput4 | 4 ++++ testdata/testoutput5 | 10 ++++++++++ 6 files changed, 30 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 34e1051..e9555b2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -115,6 +115,11 @@ of various issues, and new ones are still appear unfortunately. To fix existing and future issues, size computation is eliminated from the code, and replaced by on-demand memory allocation. +25. A pattern such as /(?i)[A-`]/, where characters in the other case are +adjacent to the end of the range, and the range contained characters with more +than one other case, caused incorrect behaviour when compiled in UTF mode. In +that example, the range a-j was left out of the class. + Version 10.00 05-January-2015 ----------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 280c3d0..a8defd1 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2864,7 +2864,11 @@ if ((options & PCRE2_CASELESS) != 0) range. Otherwise, use a recursive call to add the additional range. */ else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ - else if (od > end && oc <= end + 1) end = od; /* Extend upwards */ + else if (od > end && oc <= end + 1) + { + end = od; /* Extend upwards */ + if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); + } else n8 += add_to_class(classbits, uchardptr, options, cb, oc, od); } } diff --git a/testdata/testinput4 b/testdata/testinput4 index e121058..7f7d4ec 100644 --- a/testdata/testinput4 +++ b/testdata/testinput4 @@ -2216,4 +2216,7 @@ \x{23a}\x{2c65}\x{2c65}\x{2c65} \x{23a}\x{23a}\x{2c65}\x{23a} +/[A-`]/i,utf + abcdefghijklmno + # End of testinput4 diff --git a/testdata/testinput5 b/testdata/testinput5 index 4c8dfeb..9c3771b 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1638,4 +1638,7 @@ /[^\xff]((?1))/utf,debug +/[A-`]/iB,utf + abcdefghijklmno + # End of testinput5 diff --git a/testdata/testoutput4 b/testdata/testoutput4 index 774594c..80b14c6 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -3735,4 +3735,8 @@ No match \x{23a}\x{23a}\x{2c65}\x{23a} No match +/[A-`]/i,utf + abcdefghijklmno + 0: a + # End of testinput4 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 0a331e1..46e66c5 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -4009,4 +4009,14 @@ Subject length lower bound = 1 /[^\xff]((?1))/utf,debug Failed: error 140 at offset 11: recursion could loop indefinitely +/[A-`]/iB,utf +------------------------------------------------------------------ + Bra + [A-z\x{212a}\x{17f}] + Ket + End +------------------------------------------------------------------ + abcdefghijklmno + 0: a + # End of testinput5