faster recursive file collection on huge codebases (posix systems), was 47sec to collect all files, now its only 3

This commit is contained in:
Pavel Bibergal 2015-12-12 01:35:38 +02:00
parent dbc7e2f338
commit e4dc97c81c
3 changed files with 52 additions and 127 deletions

View File

@ -162,130 +162,89 @@ bool FileLister::fileExists(const std::string &path)
#undef __STRICT_ANSI__
#endif
#include <glob.h>
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
#include <limits.h>
#include <sys/stat.h>
#include <sstream>
// Get absolute path. Returns empty string if path does not exist or other error.
std::string FileLister::getAbsolutePath(const std::string& path)
static void addFiles2(std::map<std::string, std::size_t> &files,
const std::string &path,
const std::set<std::string> &extra,
bool recursive,
const PathMatch& ignored
)
{
std::string absolute_path;
struct stat file_stat;
if (stat(path.c_str(), &file_stat) != -1) {
if ((file_stat.st_mode & S_IFMT) == S_IFDIR) {
DIR * dir = opendir(path.c_str());
#ifdef PATH_MAX
char buf[PATH_MAX];
if (realpath(path.c_str(), buf) != nullptr)
absolute_path = buf;
#else
char *dynamic_buf;
if ((dynamic_buf = realpath(path.c_str(), nullptr)) != nullptr) {
absolute_path = dynamic_buf;
free(dynamic_buf);
}
#endif
if (!dir)
return;
return absolute_path;
}
dirent entry;
dirent * dir_result;
void FileLister::addFiles2(std::set<std::string> &seen_paths,
std::map<std::string, std::size_t> &files,
const std::string &path,
const std::set<std::string> &extra,
bool recursive,
const PathMatch& ignored
)
{
std::ostringstream oss;
oss << path;
if (path.length() > 0 && path.back() == '/')
oss << "*";
std::string new_path;
new_path.reserve(path.length() + 100);// prealloc some memory to avoid constant new/deletes in loop
glob_t glob_results;
glob(oss.str().c_str(), GLOB_MARK, 0, &glob_results);
for (unsigned int i = 0; i < glob_results.gl_pathc; i++) {
const std::string filename = glob_results.gl_pathv[i];
if (filename == "." || filename == ".." || filename.length() == 0)
continue;
while ((readdir_r(dir, &entry, &dir_result) == 0) && (dir_result != NULL)) {
// Determine absolute path. Empty filename if path does not exist
const std::string absolute_path = getAbsolutePath(filename);
if (absolute_path.empty())
continue;
if ((std::strcmp(dir_result->d_name, ".") == 0) ||
(std::strcmp(dir_result->d_name, "..") == 0))
continue;
// Did we already process this entry?
if (seen_paths.find(absolute_path) != seen_paths.end())
continue;
new_path = path + "/" + dir_result->d_name;
if (filename.back() != '/') {
// File
if ((Path::sameFileName(path,filename) || Path::acceptFile(filename, extra)) && !ignored.Match(filename)) {
seen_paths.insert(absolute_path);
struct stat sb;
if (stat(absolute_path.c_str(), &sb) == 0) {
// Limitation: file sizes are assumed to fit in a 'size_t'
files[filename] = static_cast<std::size_t>(sb.st_size);
} else
files[filename] = 0;
if (dir_result->d_type == DT_DIR) {
if (recursive && !ignored.Match(new_path)) {
addFiles2(files, new_path, extra, recursive, ignored);
}
} else {
if (Path::acceptFile(new_path, extra) && !ignored.Match(new_path)) {
files[new_path] = file_stat.st_size;
}
}
}
} else if (recursive) {
// Directory
if (!ignored.Match(filename)) {
seen_paths.insert(absolute_path);
addFiles2(seen_paths, files, filename, extra, recursive, ignored);
}
}
closedir(dir);
} else
files[path] = file_stat.st_size;
}
globfree(&glob_results);
}
void FileLister::recursiveAddFiles(std::map<std::string, std::size_t> &files, const std::string &path, const std::set<std::string> &extra, const PathMatch& ignored)
{
std::set<std::string> seen_paths;
addFiles2(seen_paths, files, path, extra, true, ignored);
addFiles(files, path, extra, true, ignored);
}
void FileLister::addFiles(std::map<std::string, std::size_t> &files, const std::string &path, const std::set<std::string> &extra, bool recursive, const PathMatch& ignored)
{
std::set<std::string> seen_paths;
addFiles2(seen_paths, files, path, extra, recursive, ignored);
if (!path.empty()) {
std::string corrected_path = path;
if (corrected_path.back() == '/')
corrected_path.erase(corrected_path.end() - 1);
addFiles2(files, corrected_path, extra, recursive, ignored);
}
}
bool FileLister::isDirectory(const std::string &path)
{
bool ret = false;
glob_t glob_results;
glob(path.c_str(), GLOB_MARK, 0, &glob_results);
if (glob_results.gl_pathc == 1) {
const std::string glob_path = glob_results.gl_pathv[0];
if (!glob_path.empty() && glob_path[glob_path.size() - 1] == '/') {
ret = true;
}
}
globfree(&glob_results);
return ret;
struct stat file_stat;
if (stat(path.c_str(), &file_stat) != -1)
return ((file_stat.st_mode & S_IFMT) == S_IFDIR);
return false;
}
bool FileLister::fileExists(const std::string &path)
{
struct stat statinfo;
int result = stat(path.c_str(), &statinfo);
if (result < 0) { // Todo: should check errno == ENOENT?
// File not found
return false;
}
// Check if file is regular file
if ((statinfo.st_mode & S_IFMT) == S_IFREG)
return true;
struct stat file_stat;
if (stat(path.c_str(), &file_stat) != -1)
return ((file_stat.st_mode & S_IFMT) == S_IFREG);
return false;
}

View File

@ -81,20 +81,6 @@ public:
* @return true if path points to file and the file exists.
*/
static bool fileExists(const std::string &path);
#ifndef _WIN32
static std::string getAbsolutePath(const std::string& path);
private:
static void addFiles2(std::set<std::string> &seen_paths,
std::map<std::string, std::size_t> &files,
const std::string &path,
const std::set<std::string> &extra,
bool recursive,
const PathMatch& ignored);
#endif
};
/// @}

View File

@ -46,9 +46,6 @@ private:
}
TEST_CASE(isDirectory);
#ifndef _WIN32
TEST_CASE(absolutePath);
#endif
TEST_CASE(recursiveAddFiles);
}
@ -57,23 +54,6 @@ private:
ASSERT_EQUALS(true, FileLister::isDirectory("lib"));
}
#ifndef _WIN32
void absolutePath() const {
std::vector<char> current_dir;
#ifdef PATH_MAX
current_dir.resize(PATH_MAX);
#else
current_dir.resize(1024);
#endif
while (getcwd(&current_dir[0], current_dir.size()) == nullptr && errno == ERANGE) {
current_dir.resize(current_dir.size() + 1024);
}
std::string absolute_path = FileLister::getAbsolutePath(".");
ASSERT_EQUALS(&current_dir[0], absolute_path);
}
#endif
void recursiveAddFiles() const {
// Recursively add add files..
std::map<std::string, std::size_t> files;