faster recursive file collection on huge codebases (posix systems), was 47sec to collect all files, now its only 3
This commit is contained in:
parent
dbc7e2f338
commit
e4dc97c81c
|
@ -162,130 +162,89 @@ bool FileLister::fileExists(const std::string &path)
|
|||
#undef __STRICT_ANSI__
|
||||
#endif
|
||||
|
||||
#include <glob.h>
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sstream>
|
||||
|
||||
// Get absolute path. Returns empty string if path does not exist or other error.
|
||||
std::string FileLister::getAbsolutePath(const std::string& path)
|
||||
|
||||
static void addFiles2(std::map<std::string, std::size_t> &files,
|
||||
const std::string &path,
|
||||
const std::set<std::string> &extra,
|
||||
bool recursive,
|
||||
const PathMatch& ignored
|
||||
)
|
||||
{
|
||||
std::string absolute_path;
|
||||
struct stat file_stat;
|
||||
if (stat(path.c_str(), &file_stat) != -1) {
|
||||
if ((file_stat.st_mode & S_IFMT) == S_IFDIR) {
|
||||
DIR * dir = opendir(path.c_str());
|
||||
|
||||
#ifdef PATH_MAX
|
||||
char buf[PATH_MAX];
|
||||
if (realpath(path.c_str(), buf) != nullptr)
|
||||
absolute_path = buf;
|
||||
#else
|
||||
char *dynamic_buf;
|
||||
if ((dynamic_buf = realpath(path.c_str(), nullptr)) != nullptr) {
|
||||
absolute_path = dynamic_buf;
|
||||
free(dynamic_buf);
|
||||
}
|
||||
#endif
|
||||
if (!dir)
|
||||
return;
|
||||
|
||||
return absolute_path;
|
||||
}
|
||||
dirent entry;
|
||||
dirent * dir_result;
|
||||
|
||||
void FileLister::addFiles2(std::set<std::string> &seen_paths,
|
||||
std::map<std::string, std::size_t> &files,
|
||||
const std::string &path,
|
||||
const std::set<std::string> &extra,
|
||||
bool recursive,
|
||||
const PathMatch& ignored
|
||||
)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << path;
|
||||
if (path.length() > 0 && path.back() == '/')
|
||||
oss << "*";
|
||||
std::string new_path;
|
||||
new_path.reserve(path.length() + 100);// prealloc some memory to avoid constant new/deletes in loop
|
||||
|
||||
glob_t glob_results;
|
||||
glob(oss.str().c_str(), GLOB_MARK, 0, &glob_results);
|
||||
for (unsigned int i = 0; i < glob_results.gl_pathc; i++) {
|
||||
const std::string filename = glob_results.gl_pathv[i];
|
||||
if (filename == "." || filename == ".." || filename.length() == 0)
|
||||
continue;
|
||||
while ((readdir_r(dir, &entry, &dir_result) == 0) && (dir_result != NULL)) {
|
||||
|
||||
// Determine absolute path. Empty filename if path does not exist
|
||||
const std::string absolute_path = getAbsolutePath(filename);
|
||||
if (absolute_path.empty())
|
||||
continue;
|
||||
if ((std::strcmp(dir_result->d_name, ".") == 0) ||
|
||||
(std::strcmp(dir_result->d_name, "..") == 0))
|
||||
continue;
|
||||
|
||||
// Did we already process this entry?
|
||||
if (seen_paths.find(absolute_path) != seen_paths.end())
|
||||
continue;
|
||||
new_path = path + "/" + dir_result->d_name;
|
||||
|
||||
if (filename.back() != '/') {
|
||||
// File
|
||||
|
||||
if ((Path::sameFileName(path,filename) || Path::acceptFile(filename, extra)) && !ignored.Match(filename)) {
|
||||
seen_paths.insert(absolute_path);
|
||||
|
||||
struct stat sb;
|
||||
if (stat(absolute_path.c_str(), &sb) == 0) {
|
||||
// Limitation: file sizes are assumed to fit in a 'size_t'
|
||||
files[filename] = static_cast<std::size_t>(sb.st_size);
|
||||
} else
|
||||
files[filename] = 0;
|
||||
if (dir_result->d_type == DT_DIR) {
|
||||
if (recursive && !ignored.Match(new_path)) {
|
||||
addFiles2(files, new_path, extra, recursive, ignored);
|
||||
}
|
||||
} else {
|
||||
if (Path::acceptFile(new_path, extra) && !ignored.Match(new_path)) {
|
||||
files[new_path] = file_stat.st_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (recursive) {
|
||||
// Directory
|
||||
if (!ignored.Match(filename)) {
|
||||
seen_paths.insert(absolute_path);
|
||||
addFiles2(seen_paths, files, filename, extra, recursive, ignored);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
} else
|
||||
files[path] = file_stat.st_size;
|
||||
}
|
||||
globfree(&glob_results);
|
||||
}
|
||||
|
||||
|
||||
void FileLister::recursiveAddFiles(std::map<std::string, std::size_t> &files, const std::string &path, const std::set<std::string> &extra, const PathMatch& ignored)
|
||||
{
|
||||
std::set<std::string> seen_paths;
|
||||
addFiles2(seen_paths, files, path, extra, true, ignored);
|
||||
addFiles(files, path, extra, true, ignored);
|
||||
}
|
||||
|
||||
void FileLister::addFiles(std::map<std::string, std::size_t> &files, const std::string &path, const std::set<std::string> &extra, bool recursive, const PathMatch& ignored)
|
||||
{
|
||||
std::set<std::string> seen_paths;
|
||||
addFiles2(seen_paths, files, path, extra, recursive, ignored);
|
||||
if (!path.empty()) {
|
||||
std::string corrected_path = path;
|
||||
if (corrected_path.back() == '/')
|
||||
corrected_path.erase(corrected_path.end() - 1);
|
||||
|
||||
addFiles2(files, corrected_path, extra, recursive, ignored);
|
||||
}
|
||||
}
|
||||
|
||||
bool FileLister::isDirectory(const std::string &path)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
glob_t glob_results;
|
||||
glob(path.c_str(), GLOB_MARK, 0, &glob_results);
|
||||
if (glob_results.gl_pathc == 1) {
|
||||
const std::string glob_path = glob_results.gl_pathv[0];
|
||||
if (!glob_path.empty() && glob_path[glob_path.size() - 1] == '/') {
|
||||
ret = true;
|
||||
}
|
||||
}
|
||||
globfree(&glob_results);
|
||||
|
||||
return ret;
|
||||
struct stat file_stat;
|
||||
if (stat(path.c_str(), &file_stat) != -1)
|
||||
return ((file_stat.st_mode & S_IFMT) == S_IFDIR);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool FileLister::fileExists(const std::string &path)
|
||||
{
|
||||
struct stat statinfo;
|
||||
int result = stat(path.c_str(), &statinfo);
|
||||
|
||||
if (result < 0) { // Todo: should check errno == ENOENT?
|
||||
// File not found
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if file is regular file
|
||||
if ((statinfo.st_mode & S_IFMT) == S_IFREG)
|
||||
return true;
|
||||
|
||||
struct stat file_stat;
|
||||
if (stat(path.c_str(), &file_stat) != -1)
|
||||
return ((file_stat.st_mode & S_IFMT) == S_IFREG);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -81,20 +81,6 @@ public:
|
|||
* @return true if path points to file and the file exists.
|
||||
*/
|
||||
static bool fileExists(const std::string &path);
|
||||
|
||||
#ifndef _WIN32
|
||||
static std::string getAbsolutePath(const std::string& path);
|
||||
|
||||
private:
|
||||
|
||||
static void addFiles2(std::set<std::string> &seen_paths,
|
||||
std::map<std::string, std::size_t> &files,
|
||||
const std::string &path,
|
||||
const std::set<std::string> &extra,
|
||||
bool recursive,
|
||||
const PathMatch& ignored);
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/// @}
|
||||
|
|
|
@ -46,9 +46,6 @@ private:
|
|||
}
|
||||
|
||||
TEST_CASE(isDirectory);
|
||||
#ifndef _WIN32
|
||||
TEST_CASE(absolutePath);
|
||||
#endif
|
||||
TEST_CASE(recursiveAddFiles);
|
||||
}
|
||||
|
||||
|
@ -57,23 +54,6 @@ private:
|
|||
ASSERT_EQUALS(true, FileLister::isDirectory("lib"));
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
void absolutePath() const {
|
||||
std::vector<char> current_dir;
|
||||
#ifdef PATH_MAX
|
||||
current_dir.resize(PATH_MAX);
|
||||
#else
|
||||
current_dir.resize(1024);
|
||||
#endif
|
||||
while (getcwd(¤t_dir[0], current_dir.size()) == nullptr && errno == ERANGE) {
|
||||
current_dir.resize(current_dir.size() + 1024);
|
||||
}
|
||||
|
||||
std::string absolute_path = FileLister::getAbsolutePath(".");
|
||||
ASSERT_EQUALS(¤t_dir[0], absolute_path);
|
||||
}
|
||||
#endif
|
||||
|
||||
void recursiveAddFiles() const {
|
||||
// Recursively add add files..
|
||||
std::map<std::string, std::size_t> files;
|
||||
|
|
Loading…
Reference in New Issue