From 5ced14bf24ea90abc10d0a0bedbc5c1e516f4739 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sat, 14 Nov 2015 17:28:19 +0000 Subject: [PATCH] Implement PCRE2_INFO_HASBACKSLASHC. --- ChangeLog | 2 ++ doc/pcre2api.3 | 9 +++++++-- src/pcre2.h | 1 + src/pcre2.h.in | 1 + src/pcre2_compile.c | 1 + src/pcre2_internal.h | 1 + src/pcre2_pattern_info.c | 7 ++++++- src/pcre2test.c | 11 +++++++---- testdata/testinput2 | 2 ++ testdata/testinput5 | 2 ++ testdata/testoutput2 | 7 +++++++ testdata/testoutput5 | 8 ++++++++ 12 files changed, 45 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 78570ef..6fa2f53 100644 --- a/ChangeLog +++ b/ChangeLog @@ -307,6 +307,8 @@ as for link size 2. 91. Document that JIT has a limit on pattern size, and give more information about JIT compile failures in pcre2test. +92. Implement PCRE2_INFO_HASBACKSLASHC. + Version 10.20 30-June-2015 -------------------------- diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index a7452bf..aeb5bf2 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "13 November 2015" "PCRE2 10.21" +.TH PCRE2API 3 "14 November 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -1643,6 +1643,11 @@ greater than 255 are supported, the flag bit for 255 means "any code unit of value 255 or above". If such a table was constructed, a pointer to it is returned. Otherwise NULL is returned. The third argument should point to an \fBconst uint8_t *\fP variable. +.sp + PCRE2_INFO_HASBACKSLASHC +.sp +Return 1 if the pattern contains any instances of \eC, otherwise 0. The third +argument should point to an \fBuint32_t\fP variable. .sp PCRE2_INFO_HASCRORLF .sp @@ -3092,6 +3097,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 13 November 2015 +Last updated: 14 November 2015 Copyright (c) 1997-2015 University of Cambridge. .fi diff --git a/src/pcre2.h b/src/pcre2.h index cd23e95..e3caa46 100644 --- a/src/pcre2.h +++ b/src/pcre2.h @@ -268,6 +268,7 @@ numbers must not be changed. */ #define PCRE2_INFO_NEWLINE 20 #define PCRE2_INFO_RECURSIONLIMIT 21 #define PCRE2_INFO_SIZE 22 +#define PCRE2_INFO_HASBACKSLASHC 23 /* Request types for pcre2_config(). */ diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 0e8e932..31490bf 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -268,6 +268,7 @@ numbers must not be changed. */ #define PCRE2_INFO_NEWLINE 20 #define PCRE2_INFO_RECURSIONLIMIT 21 #define PCRE2_INFO_SIZE 22 +#define PCRE2_INFO_HASBACKSLASHC 23 /* Request types for pcre2_config(). */ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 066921c..c6e84ce 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -7287,6 +7287,7 @@ for (;; ptr++) else { + if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */ if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && cb->max_lookbehind == 0) cb->max_lookbehind = 1; diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 3c9d954..9918560 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -531,6 +531,7 @@ bytes in a code unit in that mode. */ #define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ #define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ #define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ +#define PCRE2_HASBKC 0x00400000 /* contains \C */ #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c index a0e734c..28c6869 100644 --- a/src/pcre2_pattern_info.c +++ b/src/pcre2_pattern_info.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2015 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -77,6 +77,7 @@ if (where == NULL) /* Requests field length */ case PCRE2_INFO_CAPTURECOUNT: case PCRE2_INFO_FIRSTCODETYPE: case PCRE2_INFO_FIRSTCODEUNIT: + case PCRE2_INFO_HASBACKSLASHC: case PCRE2_INFO_HASCRORLF: case PCRE2_INFO_JCHANGED: case PCRE2_INFO_LASTCODETYPE: @@ -151,6 +152,10 @@ switch(what) &(re->start_bitmap[0]) : NULL; break; + case PCRE2_INFO_HASBACKSLASHC: + *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0; + break; + case PCRE2_INFO_HASCRORLF: *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0; break; diff --git a/src/pcre2test.c b/src/pcre2test.c index edb9f0a..552da7f 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -3745,8 +3745,9 @@ if ((pat_patctl.control & CTL_INFO) != 0) const uint8_t *start_bits; BOOL match_limit_set, recursion_limit_set; uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, - hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit, - minlength, nameentrysize, namecount, newline_convention, recursion_limit; + hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty, + match_limit, minlength, nameentrysize, namecount, newline_convention, + recursion_limit; /* These info requests may return PCRE2_ERROR_UNSET. */ @@ -3786,6 +3787,7 @@ if ((pat_patctl.control & CTL_INFO) != 0) pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) + pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) + pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) + + pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) + pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) + pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) + pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) + @@ -3840,8 +3842,9 @@ if ((pat_patctl.control & CTL_INFO) != 0) } } - if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); - if (match_empty) fprintf(outfile, "May match empty string\n"); + if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); + if (hasbackslashc) fprintf(outfile, "Contains \\C\n"); + if (match_empty) fprintf(outfile, "May match empty string\n"); pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE); diff --git a/testdata/testinput2 b/testdata/testinput2 index 36e0e04..796b59d 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -11,6 +11,8 @@ #forbid_utf #newline_default lf any anycrlf +/abc\Cdef/info + # Test binary zeroes in the pattern # /a\0B/ where 0 is a binary zero diff --git a/testdata/testinput5 b/testdata/testinput5 index fd5e6fd..a288459 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1689,4 +1689,6 @@ /abc/utf,replace=xyz abc\=zero_terminate +/abc\Cdef/info,utf + # End of testinput5 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 8e4f9c0..fd56e34 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -11,6 +11,13 @@ #forbid_utf #newline_default lf any anycrlf +/abc\Cdef/info +Capturing subpattern count = 0 +Contains \C +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 7 + # Test binary zeroes in the pattern # /a\0B/ where 0 is a binary zero diff --git a/testdata/testoutput5 b/testdata/testoutput5 index fd68c87..ef93100 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -4062,4 +4062,12 @@ No match abc\=zero_terminate 1: xyz +/abc\Cdef/info,utf +Capturing subpattern count = 0 +Contains \C +Options: utf +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 0 + # End of testinput5