From 2f27ca131d4bc4d6f066899bc91651916ac07ed6 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Thu, 16 Oct 2014 16:49:23 +0000 Subject: [PATCH] Update pcre2_pattern_info to return a length if "where" is NULL. --- doc/pcre2api.3 | 62 +++++++++++++++++++++++----------------- src/pcre2_pattern_info.c | 54 ++++++++++++++++++++++++++++------ 2 files changed, 81 insertions(+), 35 deletions(-) diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 3800de8..f766417 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "14 October 2014" "PCRE2 10.00" +.TH PCRE2API 3 "16 October 2014" "PCRE2 10.00" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -757,13 +757,16 @@ documentation has more details about these optional features. .P The first argument for \fBpcre2_config()\fP specifies which information is required. The second argument is a pointer to memory into which the information -is placed. If NULL is passed, the function returns the number of bytes that are +is placed. If NULL is passed, the function returns the amount of memory that is needed for the requested information. For calls that return numerical values, -\fIwhere\fP should point to appropriately aligned memory. +the value is in bytes; when requesting these values, \fIwhere\fP should point +to appropriately aligned memory. For calls that return strings, the required +length is given in code units, not counting the terminating zero. .P -The returned value from \fBpcre2_config()\fP is non-negative on success, or the -negative error code PCRE2_ERROR_BADOPTION if the value in the first argument is -not recognized. The following information is available: +When requesting information, the returned value from \fBpcre2_config()\fP is +non-negative on success, or the negative error code PCRE2_ERROR_BADOPTION if +the value in the first argument is not recognized. The following information is +available: .sp PCRE2_CONFIG_BSR .sp @@ -779,12 +782,13 @@ compiling is available; otherwise it is set to zero. .sp PCRE2_CONFIG_JITTARGET .sp -The \fIwhere\fP argument should point to a buffer that is at least 64 code -units long. It is filled with a string that contains the name of the -architecture for which the JIT compiler is configured, for example "x86 32bit -(little endian + unaligned)". If JIT support is not available, -PCRE2_ERROR_BADOPTION is returned, otherwise the length of the string, in code -units, is returned. +The \fIwhere\fP argument should point to a buffer that is at least 48 code +units long. (The exact length needed can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a +string that contains the name of the architecture for which the JIT compiler is +configured, for example "x86 32bit (little endian + unaligned)". If JIT support +is not available, PCRE2_ERROR_BADOPTION is returned, otherwise the length of +the string, in code units, is returned. .sp PCRE2_CONFIG_LINKSIZE .sp @@ -847,10 +851,12 @@ heap instead of recursive function calls. PCRE2_CONFIG_UNICODE_VERSION .sp The \fIwhere\fP argument should point to a buffer that is at least 24 code -units long. If PCRE2 has been compiled without Unicode support, this is filled -with the text "Unicode not supported". Otherwise, the Unicode version string -(for example, "7.0.0") is inserted. The string is zero-terminated. The function -returns the length of the string in code units. +units long. (The exact length needed can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled +without Unicode support, the buffer is filled with the text "Unicode not +supported". Otherwise, the Unicode version string (for example, "7.0.0") is +inserted. The string is zero-terminated. The function returns the length of the +string in code units. .sp PCRE2_CONFIG_UNICODE .sp @@ -859,9 +865,11 @@ otherwise it is set to zero. Unicode support implies UTF support. .sp PCRE2_CONFIG_VERSION .sp -The \fIwhere\fP argument should point to a buffer that is at least 12 code -units long. It is filled with the PCRE2 version string, zero-terminated. The -length of the string (in code units) is returned. +The \fIwhere\fP argument should point to a buffer that is at least 12 code +units long. (The exact length needed can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with +the PCRE2 version string, zero-terminated. The length of the string (in code +units) is returned. . . .SH "COMPILING A PATTERN" @@ -1350,20 +1358,20 @@ can be processed in different locales. The \fBpcre2_pattern_info()\fP function returns information about a compiled pattern. The first argument is a pointer to the compiled pattern. The second argument specifies which piece of information is required, and the third -argument is a pointer to a variable to receive the data. The yield of the -function is zero for success, or one of the following negative numbers: +argument is a pointer to a variable to receive the data. If the third argument +is NULL, the first argument is ignored, and the function returns the size in +bytes of the variable that is required for the information requested. +Otherwise, The yield of the function is zero for success, or one of the +following negative numbers: .sp PCRE2_ERROR_NULL the argument \fIcode\fP was NULL - the argument \fIwhere\fP was NULL PCRE2_ERROR_BADMAGIC the "magic number" was not found PCRE2_ERROR_BADOPTION the value of \fIwhat\fP was invalid PCRE2_ERROR_UNSET the requested field is not set .sp The "magic number" is placed at the start of each compiled pattern as an simple -check against passing an arbitrary memory pointer. -Here is -a typical call of \fBpcre2_pattern_info()\fP, to obtain the length of the compiled -pattern: +check against passing an arbitrary memory pointer. Here is a typical call of +\fBpcre2_pattern_info()\fP, to obtain the length of the compiled pattern: .sp int rc; size_t length; @@ -2624,6 +2632,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 14 October 2014 +Last updated: 16 October 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c index 0fe58bb..1eb9a83 100644 --- a/src/pcre2_pattern_info.c +++ b/src/pcre2_pattern_info.c @@ -54,9 +54,11 @@ POSSIBILITY OF SUCH DAMAGE. Arguments: code points to compiled code what what information is required - where where to put the information + where where to put the information; if NULL, return length -Returns: 0 if data returned, negative on error or unset value +Returns: 0 when data returned + > 0 when length requested + < 0 on error or unset value */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION @@ -64,7 +66,44 @@ pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) { const pcre2_real_code *re = (pcre2_real_code *)code; -if (re == NULL || where == NULL) return PCRE2_ERROR_NULL; +if (where == NULL) /* Requests field length */ + { + switch(what) + { + case PCRE2_INFO_ALLOPTIONS: + case PCRE2_INFO_ARGOPTIONS: + case PCRE2_INFO_BACKREFMAX: + case PCRE2_INFO_BSR: + case PCRE2_INFO_CAPTURECOUNT: + case PCRE2_INFO_FIRSTCODETYPE: + case PCRE2_INFO_FIRSTCODEUNIT: + case PCRE2_INFO_HASCRORLF: + case PCRE2_INFO_JCHANGED: + case PCRE2_INFO_LASTCODETYPE: + case PCRE2_INFO_LASTCODEUNIT: + case PCRE2_INFO_MATCHEMPTY: + case PCRE2_INFO_MATCHLIMIT: + case PCRE2_INFO_MAXLOOKBEHIND: + case PCRE2_INFO_MINLENGTH: + case PCRE2_INFO_NAMEENTRYSIZE: + case PCRE2_INFO_NAMECOUNT: + case PCRE2_INFO_NEWLINE: + case PCRE2_INFO_RECURSIONLIMIT: + return sizeof(uint32_t); + + case PCRE2_INFO_FIRSTBITMAP: + return sizeof(const uint8_t *); + + case PCRE2_INFO_JITSIZE: + case PCRE2_INFO_SIZE: + return sizeof(size_t); + + case PCRE2_INFO_NAMETABLE: + return sizeof(PCRE2_SPTR); + } + } + +if (re == NULL) return PCRE2_ERROR_NULL; /* Check that the first field in the block is the magic number. If it is not, return with PCRE2_ERROR_BADMAGIC. */ @@ -85,8 +124,7 @@ with different endianness. */ /* Check that this pattern was compiled in the correct bit mode */ -if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) - return PCRE2_ERROR_BADMODE; +if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; switch(what) { @@ -157,7 +195,7 @@ switch(what) case PCRE2_INFO_MATCHLIMIT: *((uint32_t *)where) = re->limit_match; - if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET; + if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET; break; case PCRE2_INFO_MAXLOOKBEHIND: @@ -177,7 +215,7 @@ switch(what) break; case PCRE2_INFO_NAMETABLE: - *((PCRE2_SPTR*)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code)); + *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code)); break; case PCRE2_INFO_NEWLINE: @@ -186,7 +224,7 @@ switch(what) case PCRE2_INFO_RECURSIONLIMIT: *((uint32_t *)where) = re->limit_recursion; - if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET; + if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET; break; case PCRE2_INFO_SIZE: