From d5a61ee8917120ff518151f33862a0926dfc21eb Mon Sep 17 00:00:00 2001 From: Philip Hazel Date: Sat, 28 Aug 2021 17:37:33 +0100 Subject: [PATCH] Patch to detect (and ignore) symlink loops in pcre2grep. --- CMakeLists.txt | 2 ++ ChangeLog | 4 ++++ configure.ac | 8 ++++---- src/config.h.generic | 9 ++++++--- src/config.h.in | 3 +++ src/pcre2grep.c | 31 +++++++++++++++++++++++++++---- 6 files changed, 46 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c139c2c..f715aa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,7 @@ # 2021-07-05 JWSB modified such both the static and shared library can be # build in one go. # 2021-08-28 PH increased minimum version +# 2021-08-28 PH added test for realpath() PROJECT(PCRE2 C) @@ -143,6 +144,7 @@ CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H) CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY) CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE) +CHECK_SYMBOL_EXISTS(realpath "stdlib.h" HAVE_REALPATH) CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR) diff --git a/ChangeLog b/ChangeLog index c320fde..b4e5985 100644 --- a/ChangeLog +++ b/ChangeLog @@ -37,6 +37,10 @@ adding "-static". For example, pcre2-8.lib has become pcre2-8-static.lib. 2.8.12 is deprecated (it was set to 2.8.5) and causes warnings. Even 3.0.0 is quote old; it was released in 2014. +4. Implemented a modified version of Thomas Tempelmann's pcre2grep patch for +detecting symlink loops. This is dependent on the availability of realpath(), +which is now tested for in ./configure and CMakeLists.txt. + Version 10.37 26-May-2021 diff --git a/configure.ac b/configure.ac index 69d940b..6940a03 100644 --- a/configure.ac +++ b/configure.ac @@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, [10]) -m4_define(pcre2_minor, [37]) -m4_define(pcre2_prerelease, []) -m4_define(pcre2_date, [2021-05-26]) +m4_define(pcre2_minor, [38]) +m4_define(pcre2_prerelease, [-RC1]) +m4_define(pcre2_date, [2021-08-28]) # Libtool shared library interface versions (current:revision:age) m4_define(libpcre2_8_version, [10:2:10]) @@ -512,7 +512,7 @@ AC_TYPE_SIZE_T # Checks for library functions. -AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror) +AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp realpath secure_getenv strerror) # Check for the availability of libz (aka zlib) diff --git a/src/config.h.generic b/src/config.h.generic index e620bb0..9a290f1 100644 --- a/src/config.h.generic +++ b/src/config.h.generic @@ -103,6 +103,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_READLINE_READLINE_H */ +/* Define to 1 if you have the `realpath' function. */ +/* #undef HAVE_REALPATH */ + /* Define to 1 if you have the `secure_getenv' function. */ /* #undef HAVE_SECURE_GETENV */ @@ -230,7 +233,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.37" +#define PACKAGE_STRING "PCRE2 10.38-RC1" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -239,7 +242,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.37" +#define PACKAGE_VERSION "10.38-RC1" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -432,7 +435,7 @@ sure both macros are undefined; an emulation function will then be used. */ #endif /* Version number of package */ -#define VERSION "10.37" +#define VERSION "10.38-RC1" /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ diff --git a/src/config.h.in b/src/config.h.in index e7ab064..99add60 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -103,6 +103,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_READLINE_READLINE_H +/* Define to 1 if you have the `realpath' function. */ +#undef HAVE_REALPATH + /* Define to 1 if you have the `secure_getenv' function. */ #undef HAVE_SECURE_GETENV diff --git a/src/pcre2grep.c b/src/pcre2grep.c index b54229b..02d2e39 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -63,7 +63,7 @@ POSSIBILITY OF SUCH DAMAGE. #define WIN32 #endif -/* Some cmake's define it still */ +/* Some CMake's define it still */ #if defined(__CYGWIN__) && defined(WIN32) #undef WIN32 #endif @@ -3327,7 +3327,7 @@ if (isdirectory(pathname)) if (dee_action == dee_RECURSE) { - char buffer[FNBUFSIZ]; + char childpath[FNBUFSIZ]; char *nextfile; directory_type *dir = opendirectory(pathname); @@ -3349,8 +3349,31 @@ if (isdirectory(pathname)) rc = 2; break; } - sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile); - frc = grep_or_recurse(buffer, dir_recurse, FALSE); + sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile); + + /* If the realpath() function is available, we can try to prevent endless + recursion caused by a symlink pointing to a parent directory (GitHub + issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann. + Modified to avoid using strlcat() because that isn't a standard C + function, and also modified not to copy back the fully resolved path, + because that affects the output from pcre2grep. */ + +#ifdef HAVE_REALPATH + char resolvedpath[PATH_MAX]; + if (realpath(childpath, resolvedpath) == NULL) + continue; /* This path is invalid - we can skip processing this */ + BOOL isSame = strcmp(pathname, resolvedpath) == 0; + if (isSame) continue; /* We have a recursion */ + size_t rlen = strlen(resolvedpath); + if (rlen++ < sizeof(resolvedpath) - 3) + { + strcat(resolvedpath, "/"); + BOOL contained = strncmp(pathname, resolvedpath, rlen) == 0; + if (contained) continue; /* We have a recursion */ + } +#endif /* HAVE_REALPATH */ + + frc = grep_or_recurse(childpath, dir_recurse, FALSE); if (frc > 1) rc = frc; else if (frc == 0 && rc == 1) rc = 0; }