Merge branch 'lt/dir-cleanup'

* lt/dir-cleanup:
  Make index preloading check the whole path to the file
  Export thread-safe version of 'has_symlink_leading_path()'
  Prepare symlink caching for thread-safety
  Avoid using 'lstat()' to figure out directories
  Avoid doing extra 'lstat()'s for d_type if we have an up-to-date cache entry
  Simplify read_directory[_recursive]() arguments
  Add 'fill_directory()' helper function for directory traversal
This commit is contained in:
Junio C Hamano 2009-07-10 20:18:37 -07:00
commit 20f3749977
10 changed files with 167 additions and 121 deletions

View File

@ -97,35 +97,6 @@ static void treat_gitlinks(const char **pathspec)
}
}
static void fill_directory(struct dir_struct *dir, const char **pathspec,
int ignored_too)
{
const char *path, *base;
int baselen;
/* Set up the default git porcelain excludes */
memset(dir, 0, sizeof(*dir));
if (!ignored_too) {
dir->flags |= DIR_COLLECT_IGNORED;
setup_standard_excludes(dir);
}
/*
* Calculate common prefix for the pathspec, and
* use that to optimize the directory walk
*/
baselen = common_prefix(pathspec);
path = ".";
base = "";
if (baselen)
path = base = xmemdupz(*pathspec, baselen);
/* Read the directory and prune it */
read_directory(dir, path, base, baselen, pathspec);
if (pathspec)
prune_directory(dir, pathspec, baselen);
}
static void refresh(int verbose, const char **pathspec)
{
char *seen;
@ -343,9 +314,21 @@ int cmd_add(int argc, const char **argv, const char *prefix)
die("index file corrupt");
treat_gitlinks(pathspec);
if (add_new_files)
if (add_new_files) {
int baselen;
/* Set up the default git porcelain excludes */
memset(&dir, 0, sizeof(dir));
if (!ignored_too) {
dir.flags |= DIR_COLLECT_IGNORED;
setup_standard_excludes(&dir);
}
/* This picks up the paths that are not tracked */
fill_directory(&dir, pathspec, ignored_too);
baselen = fill_directory(&dir, pathspec);
if (pathspec)
prune_directory(&dir, pathspec, baselen);
}
if (refresh_only) {
refresh(verbose, pathspec);

View File

@ -33,7 +33,6 @@ int cmd_clean(int argc, const char **argv, const char *prefix)
int ignored_only = 0, baselen = 0, config_set = 0, errors = 0;
struct strbuf directory = STRBUF_INIT;
struct dir_struct dir;
const char *path, *base;
static const char **pathspec;
struct strbuf buf = STRBUF_INIT;
const char *qname;
@ -78,16 +77,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix)
pathspec = get_pathspec(prefix, argv);
read_cache();
/*
* Calculate common prefix for the pathspec, and
* use that to optimize the directory walk
*/
baselen = common_prefix(pathspec);
path = ".";
base = "";
if (baselen)
path = base = xmemdupz(*pathspec, baselen);
read_directory(&dir, path, base, baselen, pathspec);
fill_directory(&dir, pathspec);
if (pathspec)
seen = xmalloc(argc > 0 ? argc : 1);

View File

@ -161,12 +161,7 @@ static void show_files(struct dir_struct *dir, const char *prefix)
/* For cached/deleted files we don't need to even do the readdir */
if (show_others || show_killed) {
const char *path = ".", *base = "";
int baselen = prefix_len;
if (baselen)
path = base = prefix;
read_directory(dir, path, base, baselen, pathspec);
fill_directory(dir, pathspec);
if (show_others)
show_other_files(dir);
if (show_killed)

10
cache.h
View File

@ -744,7 +744,17 @@ struct checkout {
};
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
struct cache_def {
char path[PATH_MAX + 1];
int len;
int flags;
int track_flags;
int prefix_len_stat_func;
};
extern int has_symlink_leading_path(const char *name, int len);
extern int threaded_has_symlink_leading_path(struct cache_def *, const char *, int);
extern int has_symlink_or_noent_leading_path(const char *name, int len);
extern int has_dirs_only_path(const char *name, int len, int prefix_len);
extern void invalidate_lstat_cache(const char *name, int len);

111
dir.c
View File

@ -14,12 +14,11 @@ struct path_simplify {
const char *path;
};
static int read_directory_recursive(struct dir_struct *dir,
const char *path, const char *base, int baselen,
static int read_directory_recursive(struct dir_struct *dir, const char *path, int len,
int check_only, const struct path_simplify *simplify);
static int get_dtype(struct dirent *de, const char *path);
static int get_dtype(struct dirent *de, const char *path, int len);
int common_prefix(const char **pathspec)
static int common_prefix(const char **pathspec)
{
const char *path, *slash, *next;
int prefix;
@ -52,6 +51,26 @@ int common_prefix(const char **pathspec)
return prefix;
}
int fill_directory(struct dir_struct *dir, const char **pathspec)
{
const char *path;
int len;
/*
* Calculate common prefix for the pathspec, and
* use that to optimize the directory walk
*/
len = common_prefix(pathspec);
path = "";
if (len)
path = xmemdupz(*pathspec, len);
/* Read the directory and prune it */
read_directory(dir, path, len, pathspec);
return len;
}
/*
* Does 'match' match the given name?
* A match is found if
@ -307,7 +326,7 @@ static int excluded_1(const char *pathname,
if (x->flags & EXC_FLAG_MUSTBEDIR) {
if (*dtype == DT_UNKNOWN)
*dtype = get_dtype(NULL, pathname);
*dtype = get_dtype(NULL, pathname, pathlen);
if (*dtype != DT_DIR)
continue;
}
@ -505,7 +524,7 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
/* This is the "show_other_directories" case */
if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
return show_directory;
if (!read_directory_recursive(dir, dirname, dirname, len, 1, simplify))
if (!read_directory_recursive(dir, dirname, len, 1, simplify))
return ignore_directory;
return show_directory;
}
@ -547,11 +566,52 @@ static int in_pathspec(const char *path, int len, const struct path_simplify *si
return 0;
}
static int get_dtype(struct dirent *de, const char *path)
static int get_index_dtype(const char *path, int len)
{
int pos;
struct cache_entry *ce;
ce = cache_name_exists(path, len, 0);
if (ce) {
if (!ce_uptodate(ce))
return DT_UNKNOWN;
if (S_ISGITLINK(ce->ce_mode))
return DT_DIR;
/*
* Nobody actually cares about the
* difference between DT_LNK and DT_REG
*/
return DT_REG;
}
/* Try to look it up as a directory */
pos = cache_name_pos(path, len);
if (pos >= 0)
return DT_UNKNOWN;
pos = -pos-1;
while (pos < active_nr) {
ce = active_cache[pos++];
if (strncmp(ce->name, path, len))
break;
if (ce->name[len] > '/')
break;
if (ce->name[len] < '/')
continue;
if (!ce_uptodate(ce))
break; /* continue? */
return DT_DIR;
}
return DT_UNKNOWN;
}
static int get_dtype(struct dirent *de, const char *path, int len)
{
int dtype = de ? DTYPE(de) : DT_UNKNOWN;
struct stat st;
if (dtype != DT_UNKNOWN)
return dtype;
dtype = get_index_dtype(path, len);
if (dtype != DT_UNKNOWN)
return dtype;
if (lstat(path, &st))
@ -574,15 +634,15 @@ static int get_dtype(struct dirent *de, const char *path)
* Also, we ignore the name ".git" (even if it is not a directory).
* That likely will not change.
*/
static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int check_only, const struct path_simplify *simplify)
static int read_directory_recursive(struct dir_struct *dir, const char *base, int baselen, int check_only, const struct path_simplify *simplify)
{
DIR *fdir = opendir(*path ? path : ".");
DIR *fdir = opendir(*base ? base : ".");
int contents = 0;
if (fdir) {
struct dirent *de;
char fullname[PATH_MAX + 1];
memcpy(fullname, base, baselen);
char path[PATH_MAX + 1];
memcpy(path, base, baselen);
while ((de = readdir(fdir)) != NULL) {
int len, dtype;
@ -593,17 +653,18 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
continue;
len = strlen(de->d_name);
/* Ignore overly long pathnames! */
if (len + baselen + 8 > sizeof(fullname))
if (len + baselen + 8 > sizeof(path))
continue;
memcpy(fullname + baselen, de->d_name, len+1);
if (simplify_away(fullname, baselen + len, simplify))
memcpy(path + baselen, de->d_name, len+1);
len = baselen + len;
if (simplify_away(path, len, simplify))
continue;
dtype = DTYPE(de);
exclude = excluded(dir, fullname, &dtype);
exclude = excluded(dir, path, &dtype);
if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
&& in_pathspec(fullname, baselen + len, simplify))
dir_add_ignored(dir, fullname, baselen + len);
&& in_pathspec(path, len, simplify))
dir_add_ignored(dir, path,len);
/*
* Excluded? If we don't explicitly want to show
@ -613,7 +674,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
continue;
if (dtype == DT_UNKNOWN)
dtype = get_dtype(de, fullname);
dtype = get_dtype(de, path, len);
/*
* Do we want to see just the ignored files?
@ -630,9 +691,9 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
default:
continue;
case DT_DIR:
memcpy(fullname + baselen + len, "/", 2);
memcpy(path + len, "/", 2);
len++;
switch (treat_directory(dir, fullname, baselen + len, simplify)) {
switch (treat_directory(dir, path, len, simplify)) {
case show_directory:
if (exclude != !!(dir->flags
& DIR_SHOW_IGNORED))
@ -640,7 +701,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
break;
case recurse_into_directory:
contents += read_directory_recursive(dir,
fullname, fullname, baselen + len, 0, simplify);
path, len, 0, simplify);
continue;
case ignore_directory:
continue;
@ -654,7 +715,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
if (check_only)
goto exit_early;
else
dir_add_name(dir, fullname, baselen + len);
dir_add_name(dir, path, len);
}
exit_early:
closedir(fdir);
@ -717,15 +778,15 @@ static void free_simplify(struct path_simplify *simplify)
free(simplify);
}
int read_directory(struct dir_struct *dir, const char *path, const char *base, int baselen, const char **pathspec)
int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)
{
struct path_simplify *simplify;
if (has_symlink_leading_path(path, strlen(path)))
if (has_symlink_leading_path(path, len))
return dir->nr;
simplify = create_simplify(pathspec);
read_directory_recursive(dir, path, base, baselen, 0, simplify);
read_directory_recursive(dir, path, len, 0, simplify);
free_simplify(simplify);
qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);

5
dir.h
View File

@ -61,14 +61,13 @@ struct dir_struct {
char basebuf[PATH_MAX];
};
extern int common_prefix(const char **pathspec);
#define MATCHED_RECURSIVELY 1
#define MATCHED_FNMATCH 2
#define MATCHED_EXACTLY 3
extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen);
extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen, const char **pathspec);
extern int fill_directory(struct dir_struct *dir, const char **pathspec);
extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec);
extern int excluded(struct dir_struct *, const char *, int *);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);

View File

@ -34,7 +34,9 @@ static void *preload_thread(void *_data)
struct thread_data *p = _data;
struct index_state *index = p->index;
struct cache_entry **cep = index->cache + p->offset;
struct cache_def cache;
memset(&cache, 0, sizeof(cache));
nr = p->nr;
if (nr + p->offset > index->cache_nr)
nr = index->cache_nr - p->offset;
@ -49,6 +51,8 @@ static void *preload_thread(void *_data)
continue;
if (!ce_path_match(ce, p->pathspec))
continue;
if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce)))
continue;
if (lstat(ce->name, &st))
continue;
if (ie_match_stat(index, ce, &st, CE_MATCH_RACY_IS_DIRTY))

View File

@ -32,19 +32,13 @@ static int longest_path_match(const char *name_a, int len_a,
return match_len;
}
static struct cache_def {
char path[PATH_MAX + 1];
int len;
int flags;
int track_flags;
int prefix_len_stat_func;
} cache;
static struct cache_def default_cache;
static inline void reset_lstat_cache(void)
static inline void reset_lstat_cache(struct cache_def *cache)
{
cache.path[0] = '\0';
cache.len = 0;
cache.flags = 0;
cache->path[0] = '\0';
cache->len = 0;
cache->flags = 0;
/*
* The track_flags and prefix_len_stat_func members is only
* set by the safeguard rule inside lstat_cache()
@ -70,23 +64,23 @@ static inline void reset_lstat_cache(void)
* of the prefix, where the cache should use the stat() function
* instead of the lstat() function to test each path component.
*/
static int lstat_cache(const char *name, int len,
static int lstat_cache(struct cache_def *cache, const char *name, int len,
int track_flags, int prefix_len_stat_func)
{
int match_len, last_slash, last_slash_dir, previous_slash;
int match_flags, ret_flags, save_flags, max_len, ret;
struct stat st;
if (cache.track_flags != track_flags ||
cache.prefix_len_stat_func != prefix_len_stat_func) {
if (cache->track_flags != track_flags ||
cache->prefix_len_stat_func != prefix_len_stat_func) {
/*
* As a safeguard rule we clear the cache if the
* values of track_flags and/or prefix_len_stat_func
* does not match with the last supplied values.
*/
reset_lstat_cache();
cache.track_flags = track_flags;
cache.prefix_len_stat_func = prefix_len_stat_func;
reset_lstat_cache(cache);
cache->track_flags = track_flags;
cache->prefix_len_stat_func = prefix_len_stat_func;
match_len = last_slash = 0;
} else {
/*
@ -94,10 +88,10 @@ static int lstat_cache(const char *name, int len,
* the 2 "excluding" path types.
*/
match_len = last_slash =
longest_path_match(name, len, cache.path, cache.len,
longest_path_match(name, len, cache->path, cache->len,
&previous_slash);
match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
if (match_flags && match_len == cache.len)
match_flags = cache->flags & track_flags & (FL_NOENT|FL_SYMLINK);
if (match_flags && match_len == cache->len)
return match_flags;
/*
* If we now have match_len > 0, we would know that
@ -121,18 +115,18 @@ static int lstat_cache(const char *name, int len,
max_len = len < PATH_MAX ? len : PATH_MAX;
while (match_len < max_len) {
do {
cache.path[match_len] = name[match_len];
cache->path[match_len] = name[match_len];
match_len++;
} while (match_len < max_len && name[match_len] != '/');
if (match_len >= max_len && !(track_flags & FL_FULLPATH))
break;
last_slash = match_len;
cache.path[last_slash] = '\0';
cache->path[last_slash] = '\0';
if (last_slash <= prefix_len_stat_func)
ret = stat(cache.path, &st);
ret = stat(cache->path, &st);
else
ret = lstat(cache.path, &st);
ret = lstat(cache->path, &st);
if (ret) {
ret_flags = FL_LSTATERR;
@ -156,9 +150,9 @@ static int lstat_cache(const char *name, int len,
*/
save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
if (save_flags && last_slash > 0 && last_slash <= PATH_MAX) {
cache.path[last_slash] = '\0';
cache.len = last_slash;
cache.flags = save_flags;
cache->path[last_slash] = '\0';
cache->len = last_slash;
cache->flags = save_flags;
} else if ((track_flags & FL_DIR) &&
last_slash_dir > 0 && last_slash_dir <= PATH_MAX) {
/*
@ -172,11 +166,11 @@ static int lstat_cache(const char *name, int len,
* can still cache the path components before the last
* one (the found symlink or non-existing component).
*/
cache.path[last_slash_dir] = '\0';
cache.len = last_slash_dir;
cache.flags = FL_DIR;
cache->path[last_slash_dir] = '\0';
cache->len = last_slash_dir;
cache->flags = FL_DIR;
} else {
reset_lstat_cache();
reset_lstat_cache(cache);
}
return ret_flags;
}
@ -188,16 +182,17 @@ static int lstat_cache(const char *name, int len,
void invalidate_lstat_cache(const char *name, int len)
{
int match_len, previous_slash;
struct cache_def *cache = &default_cache; /* FIXME */
match_len = longest_path_match(name, len, cache.path, cache.len,
match_len = longest_path_match(name, len, cache->path, cache->len,
&previous_slash);
if (len == match_len) {
if ((cache.track_flags & FL_DIR) && previous_slash > 0) {
cache.path[previous_slash] = '\0';
cache.len = previous_slash;
cache.flags = FL_DIR;
if ((cache->track_flags & FL_DIR) && previous_slash > 0) {
cache->path[previous_slash] = '\0';
cache->len = previous_slash;
cache->flags = FL_DIR;
} else {
reset_lstat_cache();
reset_lstat_cache(cache);
}
}
}
@ -207,19 +202,26 @@ void invalidate_lstat_cache(const char *name, int len)
*/
void clear_lstat_cache(void)
{
reset_lstat_cache();
struct cache_def *cache = &default_cache; /* FIXME */
reset_lstat_cache(cache);
}
#define USE_ONLY_LSTAT 0
/*
* Return non-zero if path 'name' has a leading symlink component
*/
int threaded_has_symlink_leading_path(struct cache_def *cache, const char *name, int len)
{
return lstat_cache(cache, name, len, FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) & FL_SYMLINK;
}
/*
* Return non-zero if path 'name' has a leading symlink component
*/
int has_symlink_leading_path(const char *name, int len)
{
return lstat_cache(name, len,
FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) &
FL_SYMLINK;
return threaded_has_symlink_leading_path(&default_cache, name, len);
}
/*
@ -228,7 +230,8 @@ int has_symlink_leading_path(const char *name, int len)
*/
int has_symlink_or_noent_leading_path(const char *name, int len)
{
return lstat_cache(name, len,
struct cache_def *cache = &default_cache; /* FIXME */
return lstat_cache(cache, name, len,
FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) &
(FL_SYMLINK|FL_NOENT);
}
@ -242,7 +245,8 @@ int has_symlink_or_noent_leading_path(const char *name, int len)
*/
int has_dirs_only_path(const char *name, int len, int prefix_len)
{
return lstat_cache(name, len,
struct cache_def *cache = &default_cache; /* FIXME */
return lstat_cache(cache, name, len,
FL_DIR|FL_FULLPATH, prefix_len) &
FL_DIR;
}

View File

@ -551,7 +551,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action,
memset(&d, 0, sizeof(d));
if (o->dir)
d.exclude_per_dir = o->dir->exclude_per_dir;
i = read_directory(&d, ce->name, pathbuf, namelen+1, NULL);
i = read_directory(&d, pathbuf, namelen+1, NULL);
if (i)
return o->gently ? -1 :
error(ERRORMSG(o, not_uptodate_dir), ce->name);

View File

@ -255,7 +255,7 @@ static void wt_status_print_untracked(struct wt_status *s)
DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
setup_standard_excludes(&dir);
read_directory(&dir, ".", "", 0, NULL);
fill_directory(&dir, NULL);
for(i = 0; i < dir.nr; i++) {
struct dir_entry *ent = dir.entries[i];
if (!cache_name_is_other(ent->name, ent->len))