From e4dc97c81c06b997c653672f96277831624fedc7 Mon Sep 17 00:00:00 2001 From: Pavel Bibergal Date: Sat, 12 Dec 2015 01:35:38 +0200 Subject: [PATCH] faster recursive file collection on huge codebases (posix systems), was 47sec to collect all files, now its only 3 --- cli/filelister.cpp | 145 ++++++++++++++-------------------------- cli/filelister.h | 14 ---- test/testfilelister.cpp | 20 ------ 3 files changed, 52 insertions(+), 127 deletions(-) diff --git a/cli/filelister.cpp b/cli/filelister.cpp index 61e83076d..64a244f87 100644 --- a/cli/filelister.cpp +++ b/cli/filelister.cpp @@ -162,130 +162,89 @@ bool FileLister::fileExists(const std::string &path) #undef __STRICT_ANSI__ #endif -#include +#include #include #include #include #include -#include -// Get absolute path. Returns empty string if path does not exist or other error. -std::string FileLister::getAbsolutePath(const std::string& path) + +static void addFiles2(std::map &files, + const std::string &path, + const std::set &extra, + bool recursive, + const PathMatch& ignored + ) { - std::string absolute_path; + struct stat file_stat; + if (stat(path.c_str(), &file_stat) != -1) { + if ((file_stat.st_mode & S_IFMT) == S_IFDIR) { + DIR * dir = opendir(path.c_str()); -#ifdef PATH_MAX - char buf[PATH_MAX]; - if (realpath(path.c_str(), buf) != nullptr) - absolute_path = buf; -#else - char *dynamic_buf; - if ((dynamic_buf = realpath(path.c_str(), nullptr)) != nullptr) { - absolute_path = dynamic_buf; - free(dynamic_buf); - } -#endif + if (!dir) + return; - return absolute_path; -} + dirent entry; + dirent * dir_result; -void FileLister::addFiles2(std::set &seen_paths, - std::map &files, - const std::string &path, - const std::set &extra, - bool recursive, - const PathMatch& ignored - ) -{ - std::ostringstream oss; - oss << path; - if (path.length() > 0 && path.back() == '/') - oss << "*"; + std::string new_path; + new_path.reserve(path.length() + 100);// prealloc some memory to avoid constant new/deletes in loop - glob_t glob_results; - glob(oss.str().c_str(), GLOB_MARK, 0, &glob_results); - for (unsigned int i = 0; i < glob_results.gl_pathc; i++) { - const std::string filename = glob_results.gl_pathv[i]; - if (filename == "." || filename == ".." || filename.length() == 0) - continue; + while ((readdir_r(dir, &entry, &dir_result) == 0) && (dir_result != NULL)) { - // Determine absolute path. Empty filename if path does not exist - const std::string absolute_path = getAbsolutePath(filename); - if (absolute_path.empty()) - continue; + if ((std::strcmp(dir_result->d_name, ".") == 0) || + (std::strcmp(dir_result->d_name, "..") == 0)) + continue; - // Did we already process this entry? - if (seen_paths.find(absolute_path) != seen_paths.end()) - continue; + new_path = path + "/" + dir_result->d_name; - if (filename.back() != '/') { - // File - - if ((Path::sameFileName(path,filename) || Path::acceptFile(filename, extra)) && !ignored.Match(filename)) { - seen_paths.insert(absolute_path); - - struct stat sb; - if (stat(absolute_path.c_str(), &sb) == 0) { - // Limitation: file sizes are assumed to fit in a 'size_t' - files[filename] = static_cast(sb.st_size); - } else - files[filename] = 0; + if (dir_result->d_type == DT_DIR) { + if (recursive && !ignored.Match(new_path)) { + addFiles2(files, new_path, extra, recursive, ignored); + } + } else { + if (Path::acceptFile(new_path, extra) && !ignored.Match(new_path)) { + files[new_path] = file_stat.st_size; + } + } } - } else if (recursive) { - // Directory - if (!ignored.Match(filename)) { - seen_paths.insert(absolute_path); - addFiles2(seen_paths, files, filename, extra, recursive, ignored); - } - } + closedir(dir); + } else + files[path] = file_stat.st_size; } - globfree(&glob_results); } - void FileLister::recursiveAddFiles(std::map &files, const std::string &path, const std::set &extra, const PathMatch& ignored) { - std::set seen_paths; - addFiles2(seen_paths, files, path, extra, true, ignored); + addFiles(files, path, extra, true, ignored); } void FileLister::addFiles(std::map &files, const std::string &path, const std::set &extra, bool recursive, const PathMatch& ignored) { - std::set seen_paths; - addFiles2(seen_paths, files, path, extra, recursive, ignored); + if (!path.empty()) { + std::string corrected_path = path; + if (corrected_path.back() == '/') + corrected_path.erase(corrected_path.end() - 1); + + addFiles2(files, corrected_path, extra, recursive, ignored); + } } bool FileLister::isDirectory(const std::string &path) { - bool ret = false; - - glob_t glob_results; - glob(path.c_str(), GLOB_MARK, 0, &glob_results); - if (glob_results.gl_pathc == 1) { - const std::string glob_path = glob_results.gl_pathv[0]; - if (!glob_path.empty() && glob_path[glob_path.size() - 1] == '/') { - ret = true; - } - } - globfree(&glob_results); - - return ret; + struct stat file_stat; + if (stat(path.c_str(), &file_stat) != -1) + return ((file_stat.st_mode & S_IFMT) == S_IFDIR); + + return false; } bool FileLister::fileExists(const std::string &path) { - struct stat statinfo; - int result = stat(path.c_str(), &statinfo); - - if (result < 0) { // Todo: should check errno == ENOENT? - // File not found - return false; - } - - // Check if file is regular file - if ((statinfo.st_mode & S_IFMT) == S_IFREG) - return true; - + struct stat file_stat; + if (stat(path.c_str(), &file_stat) != -1) + return ((file_stat.st_mode & S_IFMT) == S_IFREG); + return false; } diff --git a/cli/filelister.h b/cli/filelister.h index b04cd050b..0b0f47a87 100644 --- a/cli/filelister.h +++ b/cli/filelister.h @@ -81,20 +81,6 @@ public: * @return true if path points to file and the file exists. */ static bool fileExists(const std::string &path); - -#ifndef _WIN32 - static std::string getAbsolutePath(const std::string& path); - -private: - - static void addFiles2(std::set &seen_paths, - std::map &files, - const std::string &path, - const std::set &extra, - bool recursive, - const PathMatch& ignored); -#endif - }; /// @} diff --git a/test/testfilelister.cpp b/test/testfilelister.cpp index 24b6fa143..3c9465990 100644 --- a/test/testfilelister.cpp +++ b/test/testfilelister.cpp @@ -46,9 +46,6 @@ private: } TEST_CASE(isDirectory); -#ifndef _WIN32 - TEST_CASE(absolutePath); -#endif TEST_CASE(recursiveAddFiles); } @@ -57,23 +54,6 @@ private: ASSERT_EQUALS(true, FileLister::isDirectory("lib")); } -#ifndef _WIN32 - void absolutePath() const { - std::vector current_dir; -#ifdef PATH_MAX - current_dir.resize(PATH_MAX); -#else - current_dir.resize(1024); -#endif - while (getcwd(¤t_dir[0], current_dir.size()) == nullptr && errno == ERANGE) { - current_dir.resize(current_dir.size() + 1024); - } - - std::string absolute_path = FileLister::getAbsolutePath("."); - ASSERT_EQUALS(¤t_dir[0], absolute_path); - } -#endif - void recursiveAddFiles() const { // Recursively add add files.. std::map files;