Make bcopy() emulation of memmove() work properly.

This commit is contained in:
Philip.Hazel 2018-08-10 16:27:44 +00:00
parent 9332d4be69
commit 1a8cc3dab6
7 changed files with 167 additions and 64 deletions

View File

@ -147,6 +147,13 @@ matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the
end of its group during the parse process, but without another setting such as end of its group during the parse process, but without another setting such as
(?m) the compile phase got it right.) This bug was introduced by the (?m) the compile phase got it right.) This bug was introduced by the
refactoring in release 10.23. refactoring in release 10.23.
33. PCRE2 uses bcopy() if available when memmove() is not, and it used just to
define memmove() as function call to bcopy(). This hasn't been tested for a
long time because in pcre2test the result of memmove() was being used, whereas
bcopy() doesn't return a result. This feature is now refactored always to call
an emulation function when there is no memmove(). The emulation makes use of
bcopy() when available.
Version 10.31 12-February-2018 Version 10.31 12-February-2018

View File

@ -5655,7 +5655,7 @@ for (;; pptr++)
if (class_has_8bitchar > 0) if (class_has_8bitchar > 0)
{ {
*code++ |= XCL_MAP; *code++ |= XCL_MAP;
memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
CU2BYTES(class_uchardata - code)); CU2BYTES(class_uchardata - code));
if (negate_class && !xclass_has_prop) if (negate_class && !xclass_has_prop)
for (i = 0; i < 32; i++) classbits[i] = ~classbits[i]; for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
@ -6602,7 +6602,7 @@ for (;; pptr++)
/* Wrap the recursion call in OP_BRA brackets. */ /* Wrap the recursion call in OP_BRA brackets. */
memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
op_previous = *previous = OP_BRA; op_previous = *previous = OP_BRA;
PUT(previous, 1, 2 + 2*LINK_SIZE); PUT(previous, 1, 2 + 2*LINK_SIZE);
previous[2 + 2*LINK_SIZE] = OP_KET; previous[2 + 2*LINK_SIZE] = OP_KET;
@ -6682,7 +6682,7 @@ for (;; pptr++)
if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED) if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)
{ {
memmove(previous + 1, previous, CU2BYTES(len)); (void)memmove(previous + 1, previous, CU2BYTES(len));
code++; code++;
if (repeat_max == 0) if (repeat_max == 0)
{ {
@ -6703,7 +6703,7 @@ for (;; pptr++)
else else
{ {
int linkoffset; int linkoffset;
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
code += 2 + LINK_SIZE; code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRAZERO + repeat_type;
*previous++ = OP_BRA; *previous++ = OP_BRA;
@ -6904,7 +6904,7 @@ for (;; pptr++)
if (*bracode == OP_COND || *bracode == OP_SCOND) if (*bracode == OP_COND || *bracode == OP_SCOND)
{ {
int nlen = (int)(code - bracode); int nlen = (int)(code - bracode);
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
code += 1 + LINK_SIZE; code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE; nlen += 1 + LINK_SIZE;
*bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS; *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
@ -7175,7 +7175,7 @@ for (;; pptr++)
else else
{ {
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
code += 1 + LINK_SIZE; code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE; len += 1 + LINK_SIZE;
tempcode[0] = OP_ONCE; tempcode[0] = OP_ONCE;
@ -7715,7 +7715,7 @@ for (;;)
{ {
if (cb->open_caps->flag) if (cb->open_caps->flag)
{ {
memmove(start_bracket + 1 + LINK_SIZE, start_bracket, (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
CU2BYTES(code - start_bracket)); CU2BYTES(code - start_bracket));
*start_bracket = OP_ONCE; *start_bracket = OP_ONCE;
code += 1 + LINK_SIZE; code += 1 + LINK_SIZE;
@ -8315,7 +8315,7 @@ for (i = 0; i < tablecount; i++)
if (crc < 0) if (crc < 0)
{ {
memmove(slot + cb->name_entry_size, slot, (void)memmove(slot + cb->name_entry_size, slot,
CU2BYTES((tablecount - i) * cb->name_entry_size)); CU2BYTES((tablecount - i) * cb->name_entry_size));
break; break;
} }

View File

@ -875,7 +875,7 @@ for (;;)
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount) else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
match_count = 0; match_count = 0;
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2; count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
if (count > 0) memmove(offsets + 2, offsets, if (count > 0) (void)memmove(offsets + 2, offsets,
(size_t)count * sizeof(PCRE2_SIZE)); (size_t)count * sizeof(PCRE2_SIZE));
if (offsetcount >= 2) if (offsetcount >= 2)
{ {

View File

@ -165,6 +165,16 @@ by "configure". */
#define INT64_OR_DOUBLE double #define INT64_OR_DOUBLE double
#endif #endif
/* External (in the C sense) functions and tables that are private to the
libraries are always referenced using the PRIV macro. This makes it possible
for pcre2test.c to include some of the source files from the libraries using a
different PRIV definition to avoid name clashes. It also makes it clear in the
code that a non-static object is being referenced. */
#ifndef PRIV
#define PRIV(name) _pcre2_##name
#endif
/* When compiling for use with the Virtual Pascal compiler, these functions /* When compiling for use with the Virtual Pascal compiler, these functions
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
option on the command line. */ option on the command line. */
@ -178,50 +188,15 @@ option on the command line. */
#define memset(s,c,n) _memset(s,c,n) #define memset(s,c,n) _memset(s,c,n)
#else /* VPCOMPAT */ #else /* VPCOMPAT */
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), /* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY a macro that calls an emulating function. */
is set. Otherwise, include an emulating function for those systems that have
neither (there some non-Unix environments where this is the case). */
#ifndef HAVE_MEMMOVE #ifndef HAVE_MEMMOVE
#undef memmove /* some systems may have a macro */ #undef memmove /* Some systems may have a macro */
#ifdef HAVE_BCOPY #define memmove(a, b, c) PRIV(memmove)(a, b, c)
#define memmove(a, b, c) bcopy(b, a, c)
#else /* HAVE_BCOPY */
static void *
pcre2_memmove(void *d, const void *s, size_t n)
{
size_t i;
unsigned char *dest = (unsigned char *)d;
const unsigned char *src = (const unsigned char *)s;
if (dest > src)
{
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return (void *)dest;
}
else
{
for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n);
}
}
#define memmove(a, b, c) pcre2_memmove(a, b, c)
#endif /* not HAVE_BCOPY */
#endif /* not HAVE_MEMMOVE */ #endif /* not HAVE_MEMMOVE */
#endif /* not VPCOMPAT */ #endif /* not VPCOMPAT */
/* External (in the C sense) functions and tables that are private to the
libraries are always referenced using the PRIV macro. This makes it possible
for pcre2test.c to include some of the source files from the libraries using a
different PRIV definition to avoid name clashes. It also makes it clear in the
code that a non-static object is being referenced. */
#ifndef PRIV
#define PRIV(name) _pcre2_##name
#endif
/* This is an unsigned int value that no UTF character can ever have, as /* This is an unsigned int value that no UTF character can ever have, as
Unicode doesn't go beyond 0x0010ffff. */ Unicode doesn't go beyond 0x0010ffff. */
@ -1985,6 +1960,14 @@ extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
uint32_t *, BOOL); uint32_t *, BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
/* This function is needed only when memmove() is not available. */
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove)
extern void * _pcre2_memmove(void *, const void *, size_t);
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH */ #endif /* PCRE2_CODE_UNIT_WIDTH */
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge New API code Copyright (c) 2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -50,6 +50,42 @@ functions work only on 8-bit data. */
#include "pcre2_internal.h" #include "pcre2_internal.h"
/*************************************************
* Emulated memmove() for systems without it *
*************************************************/
/* This function can make use of bcopy() if it is available. Otherwise do it by
steam, as there some non-Unix environments that lack both memmove() and
bcopy(). */
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
void *
PRIV(memmove)(void *d, const void *s, size_t n)
{
#ifdef HAVE_BCOPY
bcopy(s, d, n);
return d;
#else
size_t i;
unsigned char *dest = (unsigned char *)d;
const unsigned char *src = (const unsigned char *)s;
if (dest > src)
{
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return (void *)dest;
}
else
{
for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n);
}
#endif /* not HAVE_BCOPY */
}
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
/************************************************* /*************************************************
* Compare two zero-terminated PCRE2 strings * * Compare two zero-terminated PCRE2 strings *
*************************************************/ *************************************************/

View File

@ -469,6 +469,43 @@ const char utf8_table4[] = {
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
/*************************************************
* Emulated memmove() for systems without it *
*************************************************/
/* This function can make use of bcopy() if it is available. Otherwise do it by
steam, as there are some non-Unix environments that lack both memmove() and
bcopy(). */
static void *
emulated_memmove(void *d, const void *s, size_t n)
{
#ifdef HAVE_BCOPY
bcopy(s, d, n);
return d;
#else
size_t i;
unsigned char *dest = (unsigned char *)d;
const unsigned char *src = (const unsigned char *)s;
if (dest > src)
{
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return (void *)dest;
}
else
{
for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n);
}
#endif /* not HAVE_BCOPY */
}
#undef memmove
#define memmove(d,s,n) emulated_memmove(d,s,n)
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
/************************************************* /*************************************************
* Case-independent string compare * * Case-independent string compare *
@ -2932,7 +2969,7 @@ while (ptr < endptr)
/* Now do the shuffle */ /* Now do the shuffle */
memmove(main_buffer, main_buffer + bufthird, 2*bufthird); (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
ptr -= bufthird; ptr -= bufthird;
bufflength = 2*bufthird + fill_buffer(handle, frtype, bufflength = 2*bufthird + fill_buffer(handle, frtype,

View File

@ -2595,6 +2595,46 @@ static const uint8_t tables2[] = {
}; };
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
/*************************************************
* Emulated memmove() for systems without it *
*************************************************/
/* This function can make use of bcopy() if it is available. Otherwise do it by
steam, as there are some non-Unix environments that lack both memmove() and
bcopy(). */
static void *
emulated_memmove(void *d, const void *s, size_t n)
{
#ifdef HAVE_BCOPY
bcopy(s, d, n);
return d;
#else
size_t i;
unsigned char *dest = (unsigned char *)d;
const unsigned char *src = (const unsigned char *)s;
if (dest > src)
{
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return (void *)dest;
}
else
{
for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n);
}
#endif /* not HAVE_BCOPY */
}
#undef memmove
#define memmove(d,s,n) emulated_memmove(d,s,n)
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
#ifndef HAVE_STRERROR #ifndef HAVE_STRERROR
/************************************************* /*************************************************
* Provide strerror() for non-ANSI libraries * * Provide strerror() for non-ANSI libraries *
@ -6949,9 +6989,9 @@ if (dat_datctl.replacement[0] != 0)
if (timeitm) if (timeitm)
fprintf(outfile, "** Timing is not supported with replace: ignored\n"); fprintf(outfile, "** Timing is not supported with replace: ignored\n");
if ((dat_datctl.control & CTL_ALTGLOBAL) != 0) if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
fprintf(outfile, "** Altglobal is not supported with replace: ignored\n"); fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 : xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
PCRE2_SUBSTITUTE_GLOBAL) | PCRE2_SUBSTITUTE_GLOBAL) |
@ -7259,7 +7299,7 @@ for (gmatched = 0;; gmatched++)
} }
/* If this is not the first time round a global loop, check that the /* If this is not the first time round a global loop, check that the
returned string has changed. If it has not, check for an empty string match returned string has changed. If it has not, check for an empty string match
at different starting offset from the previous match. This is a failed test at different starting offset from the previous match. This is a failed test
retry for null-matching patterns that don't match at their starting offset, retry for null-matching patterns that don't match at their starting offset,
for example /(?<=\G.)/. A repeated match at the same point is not such a for example /(?<=\G.)/. A repeated match at the same point is not such a
@ -7267,15 +7307,15 @@ for (gmatched = 0;; gmatched++)
match at the current point. For any other repeated match, there is a bug match at the current point. For any other repeated match, there is a bug
somewhere and we must break the loop because it will go on for ever. We somewhere and we must break the loop because it will go on for ever. We
know that there are always at least two elements in the ovector. */ know that there are always at least two elements in the ovector. */
if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
{ {
if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset) if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
{ {
g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
ovecsave[2] = dat_datctl.offset; ovecsave[2] = dat_datctl.offset;
continue; /* Back to the top of the loop */ continue; /* Back to the top of the loop */
} }
fprintf(outfile, fprintf(outfile,
"** PCRE2 error: global repeat returned the same string as previous\n"); "** PCRE2 error: global repeat returned the same string as previous\n");
fprintf(outfile, "** Global loop abandoned\n"); fprintf(outfile, "** Global loop abandoned\n");
@ -7591,11 +7631,11 @@ for (gmatched = 0;; gmatched++)
subject. If so, the loop is over. Otherwise, mimic what Perl's /g option subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
at the same point. If this fails it will be picked up above, where a fake at the same point. If this fails it will be picked up above, where a fake
match is set up so that at this point we advance to the next character. match is set up so that at this point we advance to the next character.
However, in order to cope with patterns that never match at their starting However, in order to cope with patterns that never match at their starting
offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
than the starting offset. This means there will be a retry with the than the starting offset. This means there will be a retry with the
starting offset at the match offset. If this returns the same match again, starting offset at the match offset. If this returns the same match again,
it is picked up above and ignored, and the special action is then taken. */ it is picked up above and ignored, and the special action is then taken. */
@ -7644,16 +7684,16 @@ for (gmatched = 0;; gmatched++)
/* For a normal global (/g) iteration, save the current ovector[0,1] and /* For a normal global (/g) iteration, save the current ovector[0,1] and
the starting offset so that we can check that they do change each time. the starting offset so that we can check that they do change each time.
Otherwise a matching bug that returns the same string causes an infinite Otherwise a matching bug that returns the same string causes an infinite
loop. It has happened! Then update the start offset, leaving other loop. It has happened! Then update the start offset, leaving other
parameters alone. */ parameters alone. */
if ((dat_datctl.control & CTL_GLOBAL) != 0) if ((dat_datctl.control & CTL_GLOBAL) != 0)
{ {
ovecsave[0] = ovector[0]; ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1]; ovecsave[1] = ovector[1];
ovecsave[2] = dat_datctl.offset; ovecsave[2] = dat_datctl.offset;
dat_datctl.offset = end_offset; dat_datctl.offset = end_offset;
} }
/* For altglobal, just update the pointer and length. */ /* For altglobal, just update the pointer and length. */