Implement PCRE2_INFO_HASBACKSLASHC.

This commit is contained in:
Philip.Hazel 2015-11-14 17:28:19 +00:00
parent 7c429c593c
commit 5ced14bf24
12 changed files with 45 additions and 7 deletions

View File

@ -307,6 +307,8 @@ as for link size 2.
91. Document that JIT has a limit on pattern size, and give more information 91. Document that JIT has a limit on pattern size, and give more information
about JIT compile failures in pcre2test. about JIT compile failures in pcre2test.
92. Implement PCRE2_INFO_HASBACKSLASHC.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "13 November 2015" "PCRE2 10.21" .TH PCRE2API 3 "14 November 2015" "PCRE2 10.21"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.sp .sp
@ -1643,6 +1643,11 @@ greater than 255 are supported, the flag bit for 255 means "any code unit of
value 255 or above". If such a table was constructed, a pointer to it is value 255 or above". If such a table was constructed, a pointer to it is
returned. Otherwise NULL is returned. The third argument should point to an returned. Otherwise NULL is returned. The third argument should point to an
\fBconst uint8_t *\fP variable. \fBconst uint8_t *\fP variable.
.sp
PCRE2_INFO_HASBACKSLASHC
.sp
Return 1 if the pattern contains any instances of \eC, otherwise 0. The third
argument should point to an \fBuint32_t\fP variable.
.sp .sp
PCRE2_INFO_HASCRORLF PCRE2_INFO_HASCRORLF
.sp .sp
@ -3092,6 +3097,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 13 November 2015 Last updated: 14 November 2015
Copyright (c) 1997-2015 University of Cambridge. Copyright (c) 1997-2015 University of Cambridge.
.fi .fi

View File

@ -268,6 +268,7 @@ numbers must not be changed. */
#define PCRE2_INFO_NEWLINE 20 #define PCRE2_INFO_NEWLINE 20
#define PCRE2_INFO_RECURSIONLIMIT 21 #define PCRE2_INFO_RECURSIONLIMIT 21
#define PCRE2_INFO_SIZE 22 #define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23
/* Request types for pcre2_config(). */ /* Request types for pcre2_config(). */

View File

@ -268,6 +268,7 @@ numbers must not be changed. */
#define PCRE2_INFO_NEWLINE 20 #define PCRE2_INFO_NEWLINE 20
#define PCRE2_INFO_RECURSIONLIMIT 21 #define PCRE2_INFO_RECURSIONLIMIT 21
#define PCRE2_INFO_SIZE 22 #define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23
/* Request types for pcre2_config(). */ /* Request types for pcre2_config(). */

View File

@ -7287,6 +7287,7 @@ for (;; ptr++)
else else
{ {
if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
cb->max_lookbehind == 0) cb->max_lookbehind == 0)
cb->max_lookbehind = 1; cb->max_lookbehind = 1;

View File

@ -531,6 +531,7 @@ bytes in a code unit in that mode. */
#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ #define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ #define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ #define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */
#define PCRE2_HASBKC 0x00400000 /* contains \C */
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2014 University of Cambridge New API code Copyright (c) 2015 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -77,6 +77,7 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_CAPTURECOUNT: case PCRE2_INFO_CAPTURECOUNT:
case PCRE2_INFO_FIRSTCODETYPE: case PCRE2_INFO_FIRSTCODETYPE:
case PCRE2_INFO_FIRSTCODEUNIT: case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC:
case PCRE2_INFO_HASCRORLF: case PCRE2_INFO_HASCRORLF:
case PCRE2_INFO_JCHANGED: case PCRE2_INFO_JCHANGED:
case PCRE2_INFO_LASTCODETYPE: case PCRE2_INFO_LASTCODETYPE:
@ -151,6 +152,10 @@ switch(what)
&(re->start_bitmap[0]) : NULL; &(re->start_bitmap[0]) : NULL;
break; break;
case PCRE2_INFO_HASBACKSLASHC:
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
break;
case PCRE2_INFO_HASCRORLF: case PCRE2_INFO_HASCRORLF:
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0; *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
break; break;

View File

@ -3745,8 +3745,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
const uint8_t *start_bits; const uint8_t *start_bits;
BOOL match_limit_set, recursion_limit_set; BOOL match_limit_set, recursion_limit_set;
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit, hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
minlength, nameentrysize, namecount, newline_convention, recursion_limit; match_limit, minlength, nameentrysize, namecount, newline_convention,
recursion_limit;
/* These info requests may return PCRE2_ERROR_UNSET. */ /* These info requests may return PCRE2_ERROR_UNSET. */
@ -3786,6 +3787,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) + pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) + pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) + pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) + pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) + pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) + pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
@ -3841,6 +3843,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
} }
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
if (match_empty) fprintf(outfile, "May match empty string\n"); if (match_empty) fprintf(outfile, "May match empty string\n");
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);

2
testdata/testinput2 vendored
View File

@ -11,6 +11,8 @@
#forbid_utf #forbid_utf
#newline_default lf any anycrlf #newline_default lf any anycrlf
/abc\Cdef/info
# Test binary zeroes in the pattern # Test binary zeroes in the pattern
# /a\0B/ where 0 is a binary zero # /a\0B/ where 0 is a binary zero

2
testdata/testinput5 vendored
View File

@ -1689,4 +1689,6 @@
/abc/utf,replace=xyz /abc/utf,replace=xyz
abc\=zero_terminate abc\=zero_terminate
/abc\Cdef/info,utf
# End of testinput5 # End of testinput5

View File

@ -11,6 +11,13 @@
#forbid_utf #forbid_utf
#newline_default lf any anycrlf #newline_default lf any anycrlf
/abc\Cdef/info
Capturing subpattern count = 0
Contains \C
First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 7
# Test binary zeroes in the pattern # Test binary zeroes in the pattern
# /a\0B/ where 0 is a binary zero # /a\0B/ where 0 is a binary zero

View File

@ -4062,4 +4062,12 @@ No match
abc\=zero_terminate abc\=zero_terminate
1: xyz 1: xyz
/abc\Cdef/info,utf
Capturing subpattern count = 0
Contains \C
Options: utf
First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 0
# End of testinput5 # End of testinput5