From 1a8cc3dab6adcbab36f12248e70d44aa7d8d99b9 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 10 Aug 2018 16:27:44 +0000 Subject: [PATCH] Make bcopy() emulation of memmove() work properly. --- ChangeLog | 7 +++++ src/pcre2_compile.c | 16 +++++----- src/pcre2_dfa_match.c | 2 +- src/pcre2_internal.h | 61 +++++++++++++---------------------- src/pcre2_string_utils.c | 38 +++++++++++++++++++++- src/pcre2grep.c | 39 ++++++++++++++++++++++- src/pcre2test.c | 68 +++++++++++++++++++++++++++++++--------- 7 files changed, 167 insertions(+), 64 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9918619..9e84c3a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -147,6 +147,13 @@ matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the end of its group during the parse process, but without another setting such as (?m) the compile phase got it right.) This bug was introduced by the refactoring in release 10.23. + +33. PCRE2 uses bcopy() if available when memmove() is not, and it used just to +define memmove() as function call to bcopy(). This hasn't been tested for a +long time because in pcre2test the result of memmove() was being used, whereas +bcopy() doesn't return a result. This feature is now refactored always to call +an emulation function when there is no memmove(). The emulation makes use of +bcopy() when available. Version 10.31 12-February-2018 diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 42adde7..d47225e 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -5655,7 +5655,7 @@ for (;; pptr++) if (class_has_8bitchar > 0) { *code++ |= XCL_MAP; - memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, + (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, CU2BYTES(class_uchardata - code)); if (negate_class && !xclass_has_prop) for (i = 0; i < 32; i++) classbits[i] = ~classbits[i]; @@ -6602,7 +6602,7 @@ for (;; pptr++) /* Wrap the recursion call in OP_BRA brackets. */ - memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); + (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); op_previous = *previous = OP_BRA; PUT(previous, 1, 2 + 2*LINK_SIZE); previous[2 + 2*LINK_SIZE] = OP_KET; @@ -6682,7 +6682,7 @@ for (;; pptr++) if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED) { - memmove(previous + 1, previous, CU2BYTES(len)); + (void)memmove(previous + 1, previous, CU2BYTES(len)); code++; if (repeat_max == 0) { @@ -6703,7 +6703,7 @@ for (;; pptr++) else { int linkoffset; - memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); + (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); code += 2 + LINK_SIZE; *previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRA; @@ -6904,7 +6904,7 @@ for (;; pptr++) if (*bracode == OP_COND || *bracode == OP_SCOND) { int nlen = (int)(code - bracode); - memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); + (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); code += 1 + LINK_SIZE; nlen += 1 + LINK_SIZE; *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS; @@ -7175,7 +7175,7 @@ for (;; pptr++) else { - memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); + (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; tempcode[0] = OP_ONCE; @@ -7715,7 +7715,7 @@ for (;;) { if (cb->open_caps->flag) { - memmove(start_bracket + 1 + LINK_SIZE, start_bracket, + (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket, CU2BYTES(code - start_bracket)); *start_bracket = OP_ONCE; code += 1 + LINK_SIZE; @@ -8315,7 +8315,7 @@ for (i = 0; i < tablecount; i++) if (crc < 0) { - memmove(slot + cb->name_entry_size, slot, + (void)memmove(slot + cb->name_entry_size, slot, CU2BYTES((tablecount - i) * cb->name_entry_size)); break; } diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 3e80ecc..9b43237 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -875,7 +875,7 @@ for (;;) else if (match_count > 0 && ++match_count * 2 > (int)offsetcount) match_count = 0; count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2; - if (count > 0) memmove(offsets + 2, offsets, + if (count > 0) (void)memmove(offsets + 2, offsets, (size_t)count * sizeof(PCRE2_SIZE)); if (offsetcount >= 2) { diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 8063291..527ae12 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -165,6 +165,16 @@ by "configure". */ #define INT64_OR_DOUBLE double #endif +/* External (in the C sense) functions and tables that are private to the +libraries are always referenced using the PRIV macro. This makes it possible +for pcre2test.c to include some of the source files from the libraries using a +different PRIV definition to avoid name clashes. It also makes it clear in the +code that a non-static object is being referenced. */ + +#ifndef PRIV +#define PRIV(name) _pcre2_##name +#endif + /* When compiling for use with the Virtual Pascal compiler, these functions need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT option on the command line. */ @@ -178,50 +188,15 @@ option on the command line. */ #define memset(s,c,n) _memset(s,c,n) #else /* VPCOMPAT */ -/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), -define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY -is set. Otherwise, include an emulating function for those systems that have -neither (there some non-Unix environments where this is the case). */ +/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define +a macro that calls an emulating function. */ #ifndef HAVE_MEMMOVE -#undef memmove /* some systems may have a macro */ -#ifdef HAVE_BCOPY -#define memmove(a, b, c) bcopy(b, a, c) -#else /* HAVE_BCOPY */ -static void * -pcre2_memmove(void *d, const void *s, size_t n) -{ -size_t i; -unsigned char *dest = (unsigned char *)d; -const unsigned char *src = (const unsigned char *)s; -if (dest > src) - { - dest += n; - src += n; - for (i = 0; i < n; ++i) *(--dest) = *(--src); - return (void *)dest; - } -else - { - for (i = 0; i < n; ++i) *dest++ = *src++; - return (void *)(dest - n); - } -} -#define memmove(a, b, c) pcre2_memmove(a, b, c) -#endif /* not HAVE_BCOPY */ +#undef memmove /* Some systems may have a macro */ +#define memmove(a, b, c) PRIV(memmove)(a, b, c) #endif /* not HAVE_MEMMOVE */ #endif /* not VPCOMPAT */ -/* External (in the C sense) functions and tables that are private to the -libraries are always referenced using the PRIV macro. This makes it possible -for pcre2test.c to include some of the source files from the libraries using a -different PRIV definition to avoid name clashes. It also makes it clear in the -code that a non-static object is being referenced. */ - -#ifndef PRIV -#define PRIV(name) _pcre2_##name -#endif - /* This is an unsigned int value that no UTF character can ever have, as Unicode doesn't go beyond 0x0010ffff. */ @@ -1985,6 +1960,14 @@ extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *); extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, BOOL); extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); + +/* This function is needed only when memmove() is not available. */ + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove) +extern void * _pcre2_memmove(void *, const void *, size_t); +#endif + #endif /* PCRE2_CODE_UNIT_WIDTH */ #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ diff --git a/src/pcre2_string_utils.c b/src/pcre2_string_utils.c index 2a1f282..d6be01a 100644 --- a/src/pcre2_string_utils.c +++ b/src/pcre2_string_utils.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -50,6 +50,42 @@ functions work only on 8-bit data. */ #include "pcre2_internal.h" +/************************************************* +* Emulated memmove() for systems without it * +*************************************************/ + +/* This function can make use of bcopy() if it is available. Otherwise do it by +steam, as there some non-Unix environments that lack both memmove() and +bcopy(). */ + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +void * +PRIV(memmove)(void *d, const void *s, size_t n) +{ +#ifdef HAVE_BCOPY +bcopy(s, d, n); +return d; +#else +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) + { + dest += n; + src += n; + for (i = 0; i < n; ++i) *(--dest) = *(--src); + return (void *)dest; + } +else + { + for (i = 0; i < n; ++i) *dest++ = *src++; + return (void *)(dest - n); + } +#endif /* not HAVE_BCOPY */ +} +#endif /* not VPCOMPAT && not HAVE_MEMMOVE */ + + /************************************************* * Compare two zero-terminated PCRE2 strings * *************************************************/ diff --git a/src/pcre2grep.c b/src/pcre2grep.c index f18693a..6c7c2a7 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -469,6 +469,43 @@ const char utf8_table4[] = { 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +/************************************************* +* Emulated memmove() for systems without it * +*************************************************/ + +/* This function can make use of bcopy() if it is available. Otherwise do it by +steam, as there are some non-Unix environments that lack both memmove() and +bcopy(). */ + +static void * +emulated_memmove(void *d, const void *s, size_t n) +{ +#ifdef HAVE_BCOPY +bcopy(s, d, n); +return d; +#else +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) + { + dest += n; + src += n; + for (i = 0; i < n; ++i) *(--dest) = *(--src); + return (void *)dest; + } +else + { + for (i = 0; i < n; ++i) *dest++ = *src++; + return (void *)(dest - n); + } +#endif /* not HAVE_BCOPY */ +} +#undef memmove +#define memmove(d,s,n) emulated_memmove(d,s,n) +#endif /* not VPCOMPAT && not HAVE_MEMMOVE */ + /************************************************* * Case-independent string compare * @@ -2932,7 +2969,7 @@ while (ptr < endptr) /* Now do the shuffle */ - memmove(main_buffer, main_buffer + bufthird, 2*bufthird); + (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird); ptr -= bufthird; bufflength = 2*bufthird + fill_buffer(handle, frtype, diff --git a/src/pcre2test.c b/src/pcre2test.c index 43bd5c5..8cfb8e9 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -2595,6 +2595,46 @@ static const uint8_t tables2[] = { }; + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +/************************************************* +* Emulated memmove() for systems without it * +*************************************************/ + +/* This function can make use of bcopy() if it is available. Otherwise do it by +steam, as there are some non-Unix environments that lack both memmove() and +bcopy(). */ + +static void * +emulated_memmove(void *d, const void *s, size_t n) +{ +#ifdef HAVE_BCOPY +bcopy(s, d, n); +return d; +#else +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) + { + dest += n; + src += n; + for (i = 0; i < n; ++i) *(--dest) = *(--src); + return (void *)dest; + } +else + { + for (i = 0; i < n; ++i) *dest++ = *src++; + return (void *)(dest - n); + } +#endif /* not HAVE_BCOPY */ +} +#undef memmove +#define memmove(d,s,n) emulated_memmove(d,s,n) +#endif /* not VPCOMPAT && not HAVE_MEMMOVE */ + + + #ifndef HAVE_STRERROR /************************************************* * Provide strerror() for non-ANSI libraries * @@ -6949,9 +6989,9 @@ if (dat_datctl.replacement[0] != 0) if (timeitm) fprintf(outfile, "** Timing is not supported with replace: ignored\n"); - + if ((dat_datctl.control & CTL_ALTGLOBAL) != 0) - fprintf(outfile, "** Altglobal is not supported with replace: ignored\n"); + fprintf(outfile, "** Altglobal is not supported with replace: ignored\n"); xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 : PCRE2_SUBSTITUTE_GLOBAL) | @@ -7259,7 +7299,7 @@ for (gmatched = 0;; gmatched++) } /* If this is not the first time round a global loop, check that the - returned string has changed. If it has not, check for an empty string match + returned string has changed. If it has not, check for an empty string match at different starting offset from the previous match. This is a failed test retry for null-matching patterns that don't match at their starting offset, for example /(?<=\G.)/. A repeated match at the same point is not such a @@ -7267,15 +7307,15 @@ for (gmatched = 0;; gmatched++) match at the current point. For any other repeated match, there is a bug somewhere and we must break the loop because it will go on for ever. We know that there are always at least two elements in the ovector. */ - + if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) { if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset) { g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; - ovecsave[2] = dat_datctl.offset; + ovecsave[2] = dat_datctl.offset; continue; /* Back to the top of the loop */ - } + } fprintf(outfile, "** PCRE2 error: global repeat returned the same string as previous\n"); fprintf(outfile, "** Global loop abandoned\n"); @@ -7591,11 +7631,11 @@ for (gmatched = 0;; gmatched++) subject. If so, the loop is over. Otherwise, mimic what Perl's /g option does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again at the same point. If this fails it will be picked up above, where a fake - match is set up so that at this point we advance to the next character. - - However, in order to cope with patterns that never match at their starting - offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater - than the starting offset. This means there will be a retry with the + match is set up so that at this point we advance to the next character. + + However, in order to cope with patterns that never match at their starting + offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater + than the starting offset. This means there will be a retry with the starting offset at the match offset. If this returns the same match again, it is picked up above and ignored, and the special action is then taken. */ @@ -7644,16 +7684,16 @@ for (gmatched = 0;; gmatched++) /* For a normal global (/g) iteration, save the current ovector[0,1] and the starting offset so that we can check that they do change each time. Otherwise a matching bug that returns the same string causes an infinite - loop. It has happened! Then update the start offset, leaving other + loop. It has happened! Then update the start offset, leaving other parameters alone. */ if ((dat_datctl.control & CTL_GLOBAL) != 0) { ovecsave[0] = ovector[0]; ovecsave[1] = ovector[1]; - ovecsave[2] = dat_datctl.offset; + ovecsave[2] = dat_datctl.offset; dat_datctl.offset = end_offset; - } + } /* For altglobal, just update the pointer and length. */