From 55fe6f51f41f254d3d87994d18bff04664aa013b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:24 +0700 Subject: [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is not used anywhere yet. But the goal is to compare quickly if a .gitignore file has changed when we have the SHA-1 of both old (cached somewhere) and new (from index or a tree) versions. Helped-by: Junio C Hamano Helped-by: Torsten Bögershausen Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++------- dir.h | 6 ++++++ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/dir.c b/dir.c index 3f7a0256b6..f23bd7bebc 100644 --- a/dir.c +++ b/dir.c @@ -466,7 +466,8 @@ void add_exclude(const char *string, const char *base, x->el = el; } -static void *read_skip_worktree_file_from_index(const char *path, size_t *size) +static void *read_skip_worktree_file_from_index(const char *path, size_t *size, + struct sha1_stat *sha1_stat) { int pos, len; unsigned long sz; @@ -485,6 +486,10 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size) return NULL; } *size = xsize_t(sz); + if (sha1_stat) { + memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat)); + hashcpy(sha1_stat->sha1, active_cache[pos]->sha1); + } return data; } @@ -529,11 +534,18 @@ static void trim_trailing_spaces(char *buf) *last_space = '\0'; } -int add_excludes_from_file_to_list(const char *fname, - const char *base, - int baselen, - struct exclude_list *el, - int check_index) +/* + * Given a file with name "fname", read it (either from disk, or from + * the index if "check_index" is non-zero), parse it and store the + * exclude rules in "el". + * + * If "ss" is not NULL, compute SHA-1 of the exclude file and fill + * stat data from disk (only valid if add_excludes returns zero). If + * ss_valid is non-zero, "ss" must contain good value as input. + */ +static int add_excludes(const char *fname, const char *base, int baselen, + struct exclude_list *el, int check_index, + struct sha1_stat *sha1_stat) { struct stat st; int fd, i, lineno = 1; @@ -547,7 +559,7 @@ int add_excludes_from_file_to_list(const char *fname, if (0 <= fd) close(fd); if (!check_index || - (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL) + (buf = read_skip_worktree_file_from_index(fname, &size, sha1_stat)) == NULL) return -1; if (size == 0) { free(buf); @@ -560,6 +572,11 @@ int add_excludes_from_file_to_list(const char *fname, } else { size = xsize_t(st.st_size); if (size == 0) { + if (sha1_stat) { + fill_stat_data(&sha1_stat->stat, &st); + hashcpy(sha1_stat->sha1, EMPTY_BLOB_SHA1_BIN); + sha1_stat->valid = 1; + } close(fd); return 0; } @@ -571,6 +588,22 @@ int add_excludes_from_file_to_list(const char *fname, } buf[size++] = '\n'; close(fd); + if (sha1_stat) { + int pos; + if (sha1_stat->valid && + !match_stat_data(&sha1_stat->stat, &st)) + ; /* no content change, ss->sha1 still good */ + else if (check_index && + (pos = cache_name_pos(fname, strlen(fname))) >= 0 && + !ce_stage(active_cache[pos]) && + ce_uptodate(active_cache[pos]) && + !would_convert_to_git(fname)) + hashcpy(sha1_stat->sha1, active_cache[pos]->sha1); + else + hash_sha1_file(buf, size, "blob", sha1_stat->sha1); + fill_stat_data(&sha1_stat->stat, &st); + sha1_stat->valid = 1; + } } el->filebuf = buf; @@ -589,6 +622,13 @@ int add_excludes_from_file_to_list(const char *fname, return 0; } +int add_excludes_from_file_to_list(const char *fname, const char *base, + int baselen, struct exclude_list *el, + int check_index) +{ + return add_excludes(fname, base, baselen, el, check_index, NULL); +} + struct exclude_list *add_exclude_list(struct dir_struct *dir, int group_type, const char *src) { diff --git a/dir.h b/dir.h index 6c45e9d4b9..cdca71b3b0 100644 --- a/dir.h +++ b/dir.h @@ -73,6 +73,12 @@ struct exclude_list_group { struct exclude_list *el; }; +struct sha1_stat { + struct stat_data stat; + unsigned char sha1[20]; + int valid; +}; + struct dir_struct { int nr, alloc; int ignored_nr, ignored_alloc; From 0dcb8d7fe0ec2687d4a6ae201ae72907d862437c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:25 +0700 Subject: [PATCH 02/24] untracked cache: record .gitignore information and dir hierarchy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The idea is if we can capture all input and (non-rescursive) output of read_directory_recursive(), and can verify later that all the input is the same, then the second r_d_r() should produce the same output as in the first run. The requirement for this to work is stat info of a directory MUST change if an entry is added to or removed from that directory (and should not change often otherwise). If your OS and filesystem do not meet this requirement, untracked cache is not for you. Most file systems on *nix should be fine. On Windows, NTFS is fine while FAT may not be [1] even though FAT on Linux seems to be fine. The list of input of r_d_r() is in the big comment block in dir.h. In short, the output of a directory (not counting subdirs) mainly depends on stat info of the directory in question, all .gitignore leading to it and the check_only flag when r_d_r() is called recursively. This patch records all this info (and the output) as r_d_r() runs. Two hash_sha1_file() are required for $GIT_DIR/info/exclude and core.excludesfile unless their stat data matches. hash_sha1_file() is only needed when .gitignore files in the worktree are modified, otherwise their SHA-1 in index is used (see the previous patch). We could store stat data for .gitignore files so we don't have to rehash them if their content is different from index, but I think .gitignore files are rarely modified, so not worth extra cache data (and hashing penalty read-cache.c:verify_hdr(), as we will be storing this as an index extension). The implication is, if you change .gitignore, you better add it to the index soon or you lose all the benefit of untracked cache because a modified .gitignore invalidates all subdirs recursively. This is especially bad for .gitignore at root. This cached output is about untracked files only, not ignored files because the number of tracked files is usually small, so small cache overhead, while the number of ignored files could go really high (e.g. *.o files mixing with source code). [1] "Description of NTFS date and time stamps for files and folders" http://support.microsoft.com/kb/299648 Helped-by: Torsten Bögershausen Helped-by: David Turner Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- dir.h | 60 +++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 19 deletions(-) diff --git a/dir.c b/dir.c index f23bd7bebc..0e411c0802 100644 --- a/dir.c +++ b/dir.c @@ -32,7 +32,7 @@ enum path_treatment { }; static enum path_treatment read_directory_recursive(struct dir_struct *dir, - const char *path, int len, + const char *path, int len, struct untracked_cache_dir *untracked, int check_only, const struct path_simplify *simplify); static int get_dtype(struct dirent *de, const char *path, int len); @@ -534,6 +534,54 @@ static void trim_trailing_spaces(char *buf) *last_space = '\0'; } +/* + * Given a subdirectory name and "dir" of the current directory, + * search the subdir in "dir" and return it, or create a new one if it + * does not exist in "dir". + * + * If "name" has the trailing slash, it'll be excluded in the search. + */ +static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, + struct untracked_cache_dir *dir, + const char *name, int len) +{ + int first, last; + struct untracked_cache_dir *d; + if (!dir) + return NULL; + if (len && name[len - 1] == '/') + len--; + first = 0; + last = dir->dirs_nr; + while (last > first) { + int cmp, next = (last + first) >> 1; + d = dir->dirs[next]; + cmp = strncmp(name, d->name, len); + if (!cmp && strlen(d->name) > len) + cmp = -1; + if (!cmp) + return d; + if (cmp < 0) { + last = next; + continue; + } + first = next+1; + } + + uc->dir_created++; + d = xmalloc(sizeof(*d) + len + 1); + memset(d, 0, sizeof(*d)); + memcpy(d->name, name, len); + d->name[len] = '\0'; + + ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc); + memmove(dir->dirs + first + 1, dir->dirs + first, + (dir->dirs_nr - first) * sizeof(*dir->dirs)); + dir->dirs_nr++; + dir->dirs[first] = d; + return d; +} + /* * Given a file with name "fname", read it (either from disk, or from * the index if "check_index" is non-zero), parse it and store the @@ -646,14 +694,20 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir, /* * Used to set up core.excludesfile and .git/info/exclude lists. */ -void add_excludes_from_file(struct dir_struct *dir, const char *fname) +static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname, + struct sha1_stat *sha1_stat) { struct exclude_list *el; el = add_exclude_list(dir, EXC_FILE, fname); - if (add_excludes_from_file_to_list(fname, "", 0, el, 0) < 0) + if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0) die("cannot use %s as an exclude file", fname); } +void add_excludes_from_file(struct dir_struct *dir, const char *fname) +{ + add_excludes_from_file_1(dir, fname, NULL); +} + int match_basename(const char *basename, int basenamelen, const char *pattern, int prefix, int patternlen, int flags) @@ -828,6 +882,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) struct exclude_list_group *group; struct exclude_list *el; struct exclude_stack *stk = NULL; + struct untracked_cache_dir *untracked; int current; group = &dir->exclude_list_group[EXC_DIRS]; @@ -865,8 +920,14 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) /* Read from the parent directories and push them down. */ current = stk ? stk->baselen : -1; strbuf_setlen(&dir->basebuf, current < 0 ? 0 : current); + if (dir->untracked) + untracked = stk ? stk->ucd : dir->untracked->root; + else + untracked = NULL; + while (current < baselen) { const char *cp; + struct sha1_stat sha1_stat; stk = xcalloc(1, sizeof(*stk)); if (current < 0) { @@ -877,10 +938,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) if (!cp) die("oops in prep_exclude"); cp++; + untracked = + lookup_untracked(dir->untracked, untracked, + base + current, + cp - base - current); } stk->prev = dir->exclude_stack; stk->baselen = cp - base; stk->exclude_ix = group->nr; + stk->ucd = untracked; el = add_exclude_list(dir, EXC_DIRS, NULL); strbuf_add(&dir->basebuf, base + current, stk->baselen - current); assert(stk->baselen == dir->basebuf.len); @@ -903,6 +969,8 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) } /* Try to read per-directory file */ + hashclr(sha1_stat.sha1); + sha1_stat.valid = 0; if (dir->exclude_per_dir) { /* * dir->basebuf gets reused by the traversal, but we @@ -916,8 +984,11 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) strbuf_addbuf(&sb, &dir->basebuf); strbuf_addstr(&sb, dir->exclude_per_dir); el->src = strbuf_detach(&sb, NULL); - add_excludes_from_file_to_list(el->src, el->src, - stk->baselen, el, 1); + add_excludes(el->src, el->src, stk->baselen, el, 1, + untracked ? &sha1_stat : NULL); + } + if (untracked) { + hashcpy(untracked->exclude_sha1, sha1_stat.sha1); } dir->exclude_stack = stk; current = stk->baselen; @@ -1098,6 +1169,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len) * (c) otherwise, we recurse into it. */ static enum path_treatment treat_directory(struct dir_struct *dir, + struct untracked_cache_dir *untracked, const char *dirname, int len, int exclude, const struct path_simplify *simplify) { @@ -1125,7 +1197,9 @@ static enum path_treatment treat_directory(struct dir_struct *dir, if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) return exclude ? path_excluded : path_untracked; - return read_directory_recursive(dir, dirname, len, 1, simplify); + untracked = lookup_untracked(dir->untracked, untracked, dirname, len); + return read_directory_recursive(dir, dirname, len, + untracked, 1, simplify); } /* @@ -1241,6 +1315,7 @@ static int get_dtype(struct dirent *de, const char *path, int len) } static enum path_treatment treat_one_path(struct dir_struct *dir, + struct untracked_cache_dir *untracked, struct strbuf *path, const struct path_simplify *simplify, int dtype, struct dirent *de) @@ -1293,7 +1368,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, return path_none; case DT_DIR: strbuf_addch(path, '/'); - return treat_directory(dir, path->buf, path->len, exclude, + return treat_directory(dir, untracked, path->buf, path->len, exclude, simplify); case DT_REG: case DT_LNK: @@ -1302,6 +1377,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, } static enum path_treatment treat_path(struct dir_struct *dir, + struct untracked_cache_dir *untracked, struct dirent *de, struct strbuf *path, int baselen, @@ -1317,7 +1393,16 @@ static enum path_treatment treat_path(struct dir_struct *dir, return path_none; dtype = DTYPE(de); - return treat_one_path(dir, path, simplify, dtype, de); + return treat_one_path(dir, untracked, path, simplify, dtype, de); +} + +static void add_untracked(struct untracked_cache_dir *dir, const char *name) +{ + if (!dir) + return; + ALLOC_GROW(dir->untracked, dir->untracked_nr + 1, + dir->untracked_alloc); + dir->untracked[dir->untracked_nr++] = xstrdup(name); } /* @@ -1333,7 +1418,7 @@ static enum path_treatment treat_path(struct dir_struct *dir, */ static enum path_treatment read_directory_recursive(struct dir_struct *dir, const char *base, int baselen, - int check_only, + struct untracked_cache_dir *untracked, int check_only, const struct path_simplify *simplify) { DIR *fdir; @@ -1347,24 +1432,36 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, if (!fdir) goto out; + if (untracked) + untracked->check_only = !!check_only; + while ((de = readdir(fdir)) != NULL) { /* check how the file or directory should be treated */ - state = treat_path(dir, de, &path, baselen, simplify); + state = treat_path(dir, untracked, de, &path, baselen, simplify); + if (state > dir_state) dir_state = state; /* recurse into subdir if instructed by treat_path */ if (state == path_recurse) { - subdir_state = read_directory_recursive(dir, path.buf, - path.len, check_only, simplify); + struct untracked_cache_dir *ud; + ud = lookup_untracked(dir->untracked, untracked, + path.buf + baselen, + path.len - baselen); + subdir_state = + read_directory_recursive(dir, path.buf, path.len, + ud, check_only, simplify); if (subdir_state > dir_state) dir_state = subdir_state; } if (check_only) { /* abort early if maximum state has been reached */ - if (dir_state == path_untracked) + if (dir_state == path_untracked) { + if (untracked) + add_untracked(untracked, path.buf + baselen); break; + } /* skip the dir_add_* part */ continue; } @@ -1382,8 +1479,11 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, break; case path_untracked: - if (!(dir->flags & DIR_SHOW_IGNORED)) - dir_add_name(dir, path.buf, path.len); + if (dir->flags & DIR_SHOW_IGNORED) + break; + dir_add_name(dir, path.buf, path.len); + if (untracked) + add_untracked(untracked, path.buf + baselen); break; default: @@ -1460,7 +1560,7 @@ static int treat_leading_path(struct dir_struct *dir, break; if (simplify_away(sb.buf, sb.len, simplify)) break; - if (treat_one_path(dir, &sb, simplify, + if (treat_one_path(dir, NULL, &sb, simplify, DT_DIR, NULL) == path_none) break; /* do not recurse into it */ if (len <= baselen) { @@ -1500,7 +1600,9 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru */ simplify = create_simplify(pathspec ? pathspec->_raw : NULL); if (!len || treat_leading_path(dir, path, len, simplify)) - read_directory_recursive(dir, path, len, 0, simplify); + read_directory_recursive(dir, path, len, + dir->untracked ? dir->untracked->root : NULL, + 0, simplify); free_simplify(simplify); qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name); qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name); @@ -1671,9 +1773,11 @@ void setup_standard_excludes(struct dir_struct *dir) excludes_file = xdg_path; } if (!access_or_warn(path, R_OK, 0)) - add_excludes_from_file(dir, path); + add_excludes_from_file_1(dir, path, + dir->untracked ? &dir->ss_info_exclude : NULL); if (excludes_file && !access_or_warn(excludes_file, R_OK, 0)) - add_excludes_from_file(dir, excludes_file); + add_excludes_from_file_1(dir, excludes_file, + dir->untracked ? &dir->ss_excludes_file : NULL); } int remove_path(const char *name) diff --git a/dir.h b/dir.h index cdca71b3b0..9ab74b4c16 100644 --- a/dir.h +++ b/dir.h @@ -66,6 +66,7 @@ struct exclude_stack { struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */ int baselen; int exclude_ix; /* index of exclude_list within EXC_DIRS exclude_list_group */ + struct untracked_cache_dir *ucd; }; struct exclude_list_group { @@ -79,6 +80,60 @@ struct sha1_stat { int valid; }; +/* + * Untracked cache + * + * The following inputs are sufficient to determine what files in a + * directory are excluded: + * + * - The list of files and directories of the directory in question + * - The $GIT_DIR/index + * - dir_struct flags + * - The content of $GIT_DIR/info/exclude + * - The content of core.excludesfile + * - The content (or the lack) of .gitignore of all parent directories + * from $GIT_WORK_TREE + * - The check_only flag in read_directory_recursive (for + * DIR_HIDE_EMPTY_DIRECTORIES) + * + * The first input can be checked using directory mtime. In many + * filesystems, directory mtime (stat_data field) is updated when its + * files or direct subdirs are added or removed. + * + * The second one can be hooked from cache_tree_invalidate_path(). + * Whenever a file (or a submodule) is added or removed from a + * directory, we invalidate that directory. + * + * The remaining inputs are easy, their SHA-1 could be used to verify + * their contents (exclude_sha1[], info_exclude_sha1[] and + * excludes_file_sha1[]) + */ +struct untracked_cache_dir { + struct untracked_cache_dir **dirs; + char **untracked; + struct stat_data stat_data; + unsigned int untracked_alloc, dirs_nr, dirs_alloc; + unsigned int untracked_nr; + unsigned int check_only : 1; + /* null SHA-1 means this directory does not have .gitignore */ + unsigned char exclude_sha1[20]; + char name[FLEX_ARRAY]; +}; + +struct untracked_cache { + struct sha1_stat ss_info_exclude; + struct sha1_stat ss_excludes_file; + const char *exclude_per_dir; + /* + * dir_struct#flags must match dir_flags or the untracked + * cache is ignored. + */ + unsigned dir_flags; + struct untracked_cache_dir *root; + /* Statistics */ + int dir_created; +}; + struct dir_struct { int nr, alloc; int ignored_nr, ignored_alloc; @@ -126,6 +181,11 @@ struct dir_struct { struct exclude_stack *exclude_stack; struct exclude *exclude; struct strbuf basebuf; + + /* Enable untracked file cache if set */ + struct untracked_cache *untracked; + struct sha1_stat ss_info_exclude; + struct sha1_stat ss_excludes_file; }; /* From ccad261f07900b55029f3fd42a9ec8f17229808f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:26 +0700 Subject: [PATCH 03/24] untracked cache: initial untracked cache validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the starting conditions and all global exclude files are good to go. If not, either disable untracked cache completely, or wipe out the cache and start fresh. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- dir.h | 4 +++ 2 files changed, 114 insertions(+), 3 deletions(-) diff --git a/dir.c b/dir.c index 0e411c0802..a0654885b6 100644 --- a/dir.c +++ b/dir.c @@ -582,6 +582,22 @@ static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, return d; } +static void do_invalidate_gitignore(struct untracked_cache_dir *dir) +{ + int i; + dir->valid = 0; + dir->untracked_nr = 0; + for (i = 0; i < dir->dirs_nr; i++) + do_invalidate_gitignore(dir->dirs[i]); +} + +static void invalidate_gitignore(struct untracked_cache *uc, + struct untracked_cache_dir *dir) +{ + uc->gitignore_invalidated++; + do_invalidate_gitignore(dir); +} + /* * Given a file with name "fname", read it (either from disk, or from * the index if "check_index" is non-zero), parse it and store the @@ -698,6 +714,13 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname, struct sha1_stat *sha1_stat) { struct exclude_list *el; + /* + * catch setup_standard_excludes() that's called before + * dir->untracked is assigned. That function behaves + * differently when dir->untracked is non-NULL. + */ + if (!dir->untracked) + dir->unmanaged_exclude_files++; el = add_exclude_list(dir, EXC_FILE, fname); if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0) die("cannot use %s as an exclude file", fname); @@ -705,6 +728,7 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname, void add_excludes_from_file(struct dir_struct *dir, const char *fname) { + dir->unmanaged_exclude_files++; /* see validate_untracked_cache() */ add_excludes_from_file_1(dir, fname, NULL); } @@ -1573,9 +1597,87 @@ static int treat_leading_path(struct dir_struct *dir, return rc; } +static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir, + int base_len, + const struct pathspec *pathspec) +{ + struct untracked_cache_dir *root; + + if (!dir->untracked) + return NULL; + + /* + * We only support $GIT_DIR/info/exclude and core.excludesfile + * as the global ignore rule files. Any other additions + * (e.g. from command line) invalidate the cache. This + * condition also catches running setup_standard_excludes() + * before setting dir->untracked! + */ + if (dir->unmanaged_exclude_files) + return NULL; + + /* + * Optimize for the main use case only: whole-tree git + * status. More work involved in treat_leading_path() if we + * use cache on just a subset of the worktree. pathspec + * support could make the matter even worse. + */ + if (base_len || (pathspec && pathspec->nr)) + return NULL; + + /* Different set of flags may produce different results */ + if (dir->flags != dir->untracked->dir_flags || + /* + * See treat_directory(), case index_nonexistent. Without + * this flag, we may need to also cache .git file content + * for the resolve_gitlink_ref() call, which we don't. + */ + !(dir->flags & DIR_SHOW_OTHER_DIRECTORIES) || + /* We don't support collecting ignore files */ + (dir->flags & (DIR_SHOW_IGNORED | DIR_SHOW_IGNORED_TOO | + DIR_COLLECT_IGNORED))) + return NULL; + + /* + * If we use .gitignore in the cache and now you change it to + * .gitexclude, everything will go wrong. + */ + if (dir->exclude_per_dir != dir->untracked->exclude_per_dir && + strcmp(dir->exclude_per_dir, dir->untracked->exclude_per_dir)) + return NULL; + + /* + * EXC_CMDL is not considered in the cache. If people set it, + * skip the cache. + */ + if (dir->exclude_list_group[EXC_CMDL].nr) + return NULL; + + if (!dir->untracked->root) { + const int len = sizeof(*dir->untracked->root); + dir->untracked->root = xmalloc(len); + memset(dir->untracked->root, 0, len); + } + + /* Validate $GIT_DIR/info/exclude and core.excludesfile */ + root = dir->untracked->root; + if (hashcmp(dir->ss_info_exclude.sha1, + dir->untracked->ss_info_exclude.sha1)) { + invalidate_gitignore(dir->untracked, root); + dir->untracked->ss_info_exclude = dir->ss_info_exclude; + } + if (hashcmp(dir->ss_excludes_file.sha1, + dir->untracked->ss_excludes_file.sha1)) { + invalidate_gitignore(dir->untracked, root); + dir->untracked->ss_excludes_file = dir->ss_excludes_file; + } + return root; +} + int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec) { struct path_simplify *simplify; + struct untracked_cache_dir *untracked; /* * Check out create_simplify() @@ -1599,10 +1701,15 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru * create_simplify(). */ simplify = create_simplify(pathspec ? pathspec->_raw : NULL); + untracked = validate_untracked_cache(dir, len, pathspec); + if (!untracked) + /* + * make sure untracked cache code path is disabled, + * e.g. prep_exclude() + */ + dir->untracked = NULL; if (!len || treat_leading_path(dir, path, len, simplify)) - read_directory_recursive(dir, path, len, - dir->untracked ? dir->untracked->root : NULL, - 0, simplify); + read_directory_recursive(dir, path, len, untracked, 0, simplify); free_simplify(simplify); qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name); qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name); diff --git a/dir.h b/dir.h index 9ab74b4c16..1d7a9585fe 100644 --- a/dir.h +++ b/dir.h @@ -115,6 +115,8 @@ struct untracked_cache_dir { unsigned int untracked_alloc, dirs_nr, dirs_alloc; unsigned int untracked_nr; unsigned int check_only : 1; + /* all data in this struct are good */ + unsigned int valid : 1; /* null SHA-1 means this directory does not have .gitignore */ unsigned char exclude_sha1[20]; char name[FLEX_ARRAY]; @@ -132,6 +134,7 @@ struct untracked_cache { struct untracked_cache_dir *root; /* Statistics */ int dir_created; + int gitignore_invalidated; }; struct dir_struct { @@ -186,6 +189,7 @@ struct dir_struct { struct untracked_cache *untracked; struct sha1_stat ss_info_exclude; struct sha1_stat ss_excludes_file; + unsigned unmanaged_exclude_files; }; /* From 5ebf79ad4b308c678bd9623dd906c01bb0ab7e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:27 +0700 Subject: [PATCH 04/24] untracked cache: invalidate dirs recursively if .gitignore changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's easy to see that if an existing .gitignore changes, its SHA-1 would be different and invalidate_gitignore() is called. If .gitignore is removed, add_excludes() will treat it like an empty .gitignore, which again should invalidate the cached directory data. if .gitignore is added, lookup_untracked() already fills initial .gitignore SHA-1 as "empty file", so again invalidate_gitignore() is called. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index a0654885b6..ec7c49643a 100644 --- a/dir.c +++ b/dir.c @@ -1011,7 +1011,23 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) add_excludes(el->src, el->src, stk->baselen, el, 1, untracked ? &sha1_stat : NULL); } - if (untracked) { + /* + * NEEDSWORK: when untracked cache is enabled, prep_exclude() + * will first be called in valid_cached_dir() then maybe many + * times more in last_exclude_matching(). When the cache is + * used, last_exclude_matching() will not be called and + * reading .gitignore content will be a waste. + * + * So when it's called by valid_cached_dir() and we can get + * .gitignore SHA-1 from the index (i.e. .gitignore is not + * modified on work tree), we could delay reading the + * .gitignore content until we absolutely need it in + * last_exclude_matching(). Be careful about ignore rule + * order, though, if you do that. + */ + if (untracked && + hashcmp(sha1_stat.sha1, untracked->exclude_sha1)) { + invalidate_gitignore(dir->untracked, untracked); hashcpy(untracked->exclude_sha1, sha1_stat.sha1); } dir->exclude_stack = stk; From cf7c61484f77f6212b52c7e9fdce7bfbbb4eb854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:28 +0700 Subject: [PATCH 05/24] untracked cache: make a wrapper around {open,read,close}dir() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to feed different info to read_directory_recursive() based on untracked cache in the next patch. Helped-by: Ramsay Jones Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/dir.c b/dir.c index ec7c49643a..86bf6e9311 100644 --- a/dir.c +++ b/dir.c @@ -31,6 +31,15 @@ enum path_treatment { path_untracked }; +/* + * Support data structure for our opendir/readdir/closedir wrappers + */ +struct cached_dir { + DIR *fdir; + struct untracked_cache_dir *untracked; + struct dirent *de; +}; + static enum path_treatment read_directory_recursive(struct dir_struct *dir, const char *path, int len, struct untracked_cache_dir *untracked, int check_only, const struct path_simplify *simplify); @@ -1418,12 +1427,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, static enum path_treatment treat_path(struct dir_struct *dir, struct untracked_cache_dir *untracked, - struct dirent *de, + struct cached_dir *cdir, struct strbuf *path, int baselen, const struct path_simplify *simplify) { int dtype; + struct dirent *de = cdir->de; if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git")) return path_none; @@ -1445,6 +1455,37 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name) dir->untracked[dir->untracked_nr++] = xstrdup(name); } +static int open_cached_dir(struct cached_dir *cdir, + struct dir_struct *dir, + struct untracked_cache_dir *untracked, + struct strbuf *path, + int check_only) +{ + memset(cdir, 0, sizeof(*cdir)); + cdir->untracked = untracked; + cdir->fdir = opendir(path->len ? path->buf : "."); + if (!cdir->fdir) + return -1; + return 0; +} + +static int read_cached_dir(struct cached_dir *cdir) +{ + if (cdir->fdir) { + cdir->de = readdir(cdir->fdir); + if (!cdir->de) + return -1; + return 0; + } + return -1; +} + +static void close_cached_dir(struct cached_dir *cdir) +{ + if (cdir->fdir) + closedir(cdir->fdir); +} + /* * Read a directory tree. We currently ignore anything but * directories, regular files and symlinks. That's because git @@ -1461,23 +1502,21 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, struct untracked_cache_dir *untracked, int check_only, const struct path_simplify *simplify) { - DIR *fdir; + struct cached_dir cdir; enum path_treatment state, subdir_state, dir_state = path_none; - struct dirent *de; struct strbuf path = STRBUF_INIT; strbuf_add(&path, base, baselen); - fdir = opendir(path.len ? path.buf : "."); - if (!fdir) + if (open_cached_dir(&cdir, dir, untracked, &path, check_only)) goto out; if (untracked) untracked->check_only = !!check_only; - while ((de = readdir(fdir)) != NULL) { + while (!read_cached_dir(&cdir)) { /* check how the file or directory should be treated */ - state = treat_path(dir, untracked, de, &path, baselen, simplify); + state = treat_path(dir, untracked, &cdir, &path, baselen, simplify); if (state > dir_state) dir_state = state; @@ -1530,7 +1569,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, break; } } - closedir(fdir); + close_cached_dir(&cdir); out: strbuf_release(&path); From 91a2288b5f63fba82e912dca475154d5b9dd233a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:29 +0700 Subject: [PATCH 06/24] untracked cache: record/validate dir mtime and reuse cached output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main readdir loop in read_directory_recursive() is replaced with a new one that checks if cached results of a directory is still valid. If a file is added or removed from the index, the containing directory is invalidated (but not its subdirs). If directory's mtime is changed, the same happens. If a .gitignore is updated, the containing directory and all subdirs are invalidated recursively. If dir_struct#flags or other conditions change, the cache is ignored. If a directory is invalidated, we opendir/readdir/closedir and run the exclude machinery on that directory listing as usual. If untracked cache is also enabled, we'll update the cache along the way. If a directory is validated, we simply pull the untracked listing out from the cache. The cache also records the list of direct subdirs that we have to recurse in. Fully excluded directories are seen as "untracked files". In the best case when no dirs are invalidated, read_directory() becomes a series of stat(dir), open(.gitignore), fstat(), read(), close() and optionally hash_sha1_file() For comparison, standard read_directory() is a sequence of opendir(), readdir(), open(.gitignore), fstat(), read(), close(), the expensive last_exclude_matching() and closedir(). We already try not to open(.gitignore) if we know it does not exist, so open/fstat/read/close sequence does not apply to every directory. The sequence could be reduced further, as noted in prep_exclude() in another patch. So in theory, the entire best-case read_directory sequence could be reduced to a series of stat() and nothing else. This is not a silver bullet approach. When you compile a C file, for example, the old .o file is removed and a new one with the same name created, effectively invalidating the containing directory's cache (but not its subdirectories). If your build process touches every directory, this cache adds extra overhead for nothing, so it's a good idea to separate generated files from tracked files.. Editors may use the same strategy for saving files. And of course you're out of luck running your repo on an unsupported filesystem and/or operating system. Helped-by: Eric Sunshine Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- dir.h | 2 + 2 files changed, 121 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 86bf6e9311..5415374105 100644 --- a/dir.c +++ b/dir.c @@ -37,7 +37,12 @@ enum path_treatment { struct cached_dir { DIR *fdir; struct untracked_cache_dir *untracked; + int nr_files; + int nr_dirs; + struct dirent *de; + const char *file; + struct untracked_cache_dir *ucd; }; static enum path_treatment read_directory_recursive(struct dir_struct *dir, @@ -607,6 +612,14 @@ static void invalidate_gitignore(struct untracked_cache *uc, do_invalidate_gitignore(dir); } +static void invalidate_directory(struct untracked_cache *uc, + struct untracked_cache_dir *dir) +{ + uc->dir_invalidated++; + dir->valid = 0; + dir->untracked_nr = 0; +} + /* * Given a file with name "fname", read it (either from disk, or from * the index if "check_index" is non-zero), parse it and store the @@ -1425,6 +1438,39 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, } } +static enum path_treatment treat_path_fast(struct dir_struct *dir, + struct untracked_cache_dir *untracked, + struct cached_dir *cdir, + struct strbuf *path, + int baselen, + const struct path_simplify *simplify) +{ + strbuf_setlen(path, baselen); + if (!cdir->ucd) { + strbuf_addstr(path, cdir->file); + return path_untracked; + } + strbuf_addstr(path, cdir->ucd->name); + /* treat_one_path() does this before it calls treat_directory() */ + if (path->buf[path->len - 1] != '/') + strbuf_addch(path, '/'); + if (cdir->ucd->check_only) + /* + * check_only is set as a result of treat_directory() getting + * to its bottom. Verify again the same set of directories + * with check_only set. + */ + return read_directory_recursive(dir, path->buf, path->len, + cdir->ucd, 1, simplify); + /* + * We get path_recurse in the first run when + * directory_exists_in_index() returns index_nonexistent. We + * are sure that new changes in the index does not impact the + * outcome. Return now. + */ + return path_recurse; +} + static enum path_treatment treat_path(struct dir_struct *dir, struct untracked_cache_dir *untracked, struct cached_dir *cdir, @@ -1435,6 +1481,9 @@ static enum path_treatment treat_path(struct dir_struct *dir, int dtype; struct dirent *de = cdir->de; + if (!de) + return treat_path_fast(dir, untracked, cdir, path, + baselen, simplify); if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git")) return path_none; strbuf_setlen(path, baselen); @@ -1455,6 +1504,52 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name) dir->untracked[dir->untracked_nr++] = xstrdup(name); } +static int valid_cached_dir(struct dir_struct *dir, + struct untracked_cache_dir *untracked, + struct strbuf *path, + int check_only) +{ + struct stat st; + + if (!untracked) + return 0; + + if (stat(path->len ? path->buf : ".", &st)) { + invalidate_directory(dir->untracked, untracked); + memset(&untracked->stat_data, 0, sizeof(untracked->stat_data)); + return 0; + } + if (!untracked->valid || + match_stat_data(&untracked->stat_data, &st)) { + if (untracked->valid) + invalidate_directory(dir->untracked, untracked); + fill_stat_data(&untracked->stat_data, &st); + return 0; + } + + if (untracked->check_only != !!check_only) { + invalidate_directory(dir->untracked, untracked); + return 0; + } + + /* + * prep_exclude will be called eventually on this directory, + * but it's called much later in last_exclude_matching(). We + * need it now to determine the validity of the cache for this + * path. The next calls will be nearly no-op, the way + * prep_exclude() is designed. + */ + if (path->len && path->buf[path->len - 1] != '/') { + strbuf_addch(path, '/'); + prep_exclude(dir, path->buf, path->len); + strbuf_setlen(path, path->len - 1); + } else + prep_exclude(dir, path->buf, path->len); + + /* hopefully prep_exclude() haven't invalidated this entry... */ + return untracked->valid; +} + static int open_cached_dir(struct cached_dir *cdir, struct dir_struct *dir, struct untracked_cache_dir *untracked, @@ -1463,7 +1558,11 @@ static int open_cached_dir(struct cached_dir *cdir, { memset(cdir, 0, sizeof(*cdir)); cdir->untracked = untracked; + if (valid_cached_dir(dir, untracked, path, check_only)) + return 0; cdir->fdir = opendir(path->len ? path->buf : "."); + if (dir->untracked) + dir->untracked->dir_opened++; if (!cdir->fdir) return -1; return 0; @@ -1477,6 +1576,18 @@ static int read_cached_dir(struct cached_dir *cdir) return -1; return 0; } + while (cdir->nr_dirs < cdir->untracked->dirs_nr) { + struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs]; + cdir->ucd = d; + cdir->nr_dirs++; + return 0; + } + cdir->ucd = NULL; + if (cdir->nr_files < cdir->untracked->untracked_nr) { + struct untracked_cache_dir *d = cdir->untracked; + cdir->file = d->untracked[cdir->nr_files++]; + return 0; + } return -1; } @@ -1484,6 +1595,12 @@ static void close_cached_dir(struct cached_dir *cdir) { if (cdir->fdir) closedir(cdir->fdir); + /* + * We have gone through this directory and found no untracked + * entries. Mark it valid. + */ + if (cdir->untracked) + cdir->untracked->valid = 1; } /* @@ -1537,7 +1654,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, if (check_only) { /* abort early if maximum state has been reached */ if (dir_state == path_untracked) { - if (untracked) + if (cdir.fdir) add_untracked(untracked, path.buf + baselen); break; } @@ -1561,7 +1678,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, if (dir->flags & DIR_SHOW_IGNORED) break; dir_add_name(dir, path.buf, path.len); - if (untracked) + if (cdir.fdir) add_untracked(untracked, path.buf + baselen); break; diff --git a/dir.h b/dir.h index 1d7a9585fe..ff3d99bcb0 100644 --- a/dir.h +++ b/dir.h @@ -135,6 +135,8 @@ struct untracked_cache { /* Statistics */ int dir_created; int gitignore_invalidated; + int dir_invalidated; + int dir_opened; }; struct dir_struct { From 26cb0182b8b2e119f469750b3511fac4624f6667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:30 +0700 Subject: [PATCH 07/24] untracked cache: mark what dirs should be recursed/saved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we redo this thing in a functional style, we would have one struct untracked_dir as input tree and another as output. The input is used for verification. The output is a brand new tree, reflecting current worktree. But that means recreate a lot of dir nodes even if a lot could be shared between input and output trees in good cases. So we go with the messy but efficient way, combining both input and output trees into one. We need a way to know which node in this combined tree belongs to the output. This is the purpose of this "recurse" flag. "valid" bit can't be used for this because it's about data of the node except the subdirs. When we invalidate a directory, we want to keep cached data of the subdirs intact even though we don't really know what subdir still exists (yet). Then we check worktree to see what actual subdir remains on disk. Those will have 'recurse' bit set again. If cached data for those are still valid, we may be able to avoid computing exclude files for them. Those subdirs that are deleted will have 'recurse' remained clear and their 'valid' bits do not matter. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 14 +++++++++++++- dir.h | 3 ++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 5415374105..2d0582e8a8 100644 --- a/dir.c +++ b/dir.c @@ -615,9 +615,12 @@ static void invalidate_gitignore(struct untracked_cache *uc, static void invalidate_directory(struct untracked_cache *uc, struct untracked_cache_dir *dir) { + int i; uc->dir_invalidated++; dir->valid = 0; dir->untracked_nr = 0; + for (i = 0; i < dir->dirs_nr; i++) + dir->dirs[i]->recurse = 0; } /* @@ -1578,6 +1581,10 @@ static int read_cached_dir(struct cached_dir *cdir) } while (cdir->nr_dirs < cdir->untracked->dirs_nr) { struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs]; + if (!d->recurse) { + cdir->nr_dirs++; + continue; + } cdir->ucd = d; cdir->nr_dirs++; return 0; @@ -1599,8 +1606,10 @@ static void close_cached_dir(struct cached_dir *cdir) * We have gone through this directory and found no untracked * entries. Mark it valid. */ - if (cdir->untracked) + if (cdir->untracked) { cdir->untracked->valid = 1; + cdir->untracked->recurse = 1; + } } /* @@ -1843,6 +1852,9 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d invalidate_gitignore(dir->untracked, root); dir->untracked->ss_excludes_file = dir->ss_excludes_file; } + + /* Make sure this directory is not dropped out at saving phase */ + root->recurse = 1; return root; } diff --git a/dir.h b/dir.h index ff3d99bcb0..95baf014ca 100644 --- a/dir.h +++ b/dir.h @@ -115,8 +115,9 @@ struct untracked_cache_dir { unsigned int untracked_alloc, dirs_nr, dirs_alloc; unsigned int untracked_nr; unsigned int check_only : 1; - /* all data in this struct are good */ + /* all data except 'dirs' in this struct are good */ unsigned int valid : 1; + unsigned int recurse : 1; /* null SHA-1 means this directory does not have .gitignore */ unsigned char exclude_sha1[20]; char name[FLEX_ARRAY]; From 27b099ae8742f014700edba92036db288750e44a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:31 +0700 Subject: [PATCH 08/24] untracked cache: don't open non-existent .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This cuts down a signficant number of open(.gitignore) because most directories usually don't have .gitignore files. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 2d0582e8a8..f39024c639 100644 --- a/dir.c +++ b/dir.c @@ -1020,7 +1020,21 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) /* Try to read per-directory file */ hashclr(sha1_stat.sha1); sha1_stat.valid = 0; - if (dir->exclude_per_dir) { + if (dir->exclude_per_dir && + /* + * If we know that no files have been added in + * this directory (i.e. valid_cached_dir() has + * been executed and set untracked->valid) .. + */ + (!untracked || !untracked->valid || + /* + * .. and .gitignore does not exist before + * (i.e. null exclude_sha1 and skip_worktree is + * not set). Then we can skip loading .gitignore, + * which would result in ENOENT anyway. + * skip_worktree is taken care in read_directory() + */ + !is_null_sha1(untracked->exclude_sha1))) { /* * dir->basebuf gets reused by the traversal, but we * need fname to remain unchanged to ensure the src @@ -1783,6 +1797,7 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d const struct pathspec *pathspec) { struct untracked_cache_dir *root; + int i; if (!dir->untracked) return NULL; @@ -1834,6 +1849,15 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d if (dir->exclude_list_group[EXC_CMDL].nr) return NULL; + /* + * An optimization in prep_exclude() does not play well with + * CE_SKIP_WORKTREE. It's a rare case anyway, if a single + * entry has that bit set, disable the whole untracked cache. + */ + for (i = 0; i < active_nr; i++) + if (ce_skip_worktree(active_cache[i])) + return NULL; + if (!dir->untracked->root) { const int len = sizeof(*dir->untracked->root); dir->untracked->root = xmalloc(len); From be0d9d532326a81d761913e3ec9e2e7c62eeca7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:32 +0700 Subject: [PATCH 09/24] ewah: add convenient wrapper ewah_serialize_strbuf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- ewah/ewah_io.c | 13 +++++++++++++ ewah/ewok.h | 2 ++ split-index.c | 11 ++--------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/ewah/ewah_io.c b/ewah/ewah_io.c index 1c2d7afd4c..43481b9c60 100644 --- a/ewah/ewah_io.c +++ b/ewah/ewah_io.c @@ -19,6 +19,7 @@ */ #include "git-compat-util.h" #include "ewok.h" +#include "strbuf.h" int ewah_serialize_native(struct ewah_bitmap *self, int fd) { @@ -110,6 +111,18 @@ int ewah_serialize(struct ewah_bitmap *self, int fd) return ewah_serialize_to(self, write_helper, (void *)(intptr_t)fd); } +static int write_strbuf(void *user_data, const void *data, size_t len) +{ + struct strbuf *sb = user_data; + strbuf_add(sb, data, len); + return len; +} + +int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *sb) +{ + return ewah_serialize_to(self, write_strbuf, sb); +} + int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len) { const uint8_t *ptr = map; diff --git a/ewah/ewok.h b/ewah/ewok.h index 13c6e20412..e732525367 100644 --- a/ewah/ewok.h +++ b/ewah/ewok.h @@ -30,6 +30,7 @@ # define ewah_calloc xcalloc #endif +struct strbuf; typedef uint64_t eword_t; #define BITS_IN_WORD (sizeof(eword_t) * 8) @@ -98,6 +99,7 @@ int ewah_serialize_to(struct ewah_bitmap *self, void *out); int ewah_serialize(struct ewah_bitmap *self, int fd); int ewah_serialize_native(struct ewah_bitmap *self, int fd); +int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *); int ewah_deserialize(struct ewah_bitmap *self, int fd); int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len); diff --git a/split-index.c b/split-index.c index 21485e2066..968b780a06 100644 --- a/split-index.c +++ b/split-index.c @@ -41,13 +41,6 @@ int read_link_extension(struct index_state *istate, return 0; } -static int write_strbuf(void *user_data, const void *data, size_t len) -{ - struct strbuf *sb = user_data; - strbuf_add(sb, data, len); - return len; -} - int write_link_extension(struct strbuf *sb, struct index_state *istate) { @@ -55,8 +48,8 @@ int write_link_extension(struct strbuf *sb, strbuf_add(sb, si->base_sha1, 20); if (!si->delete_bitmap && !si->replace_bitmap) return 0; - ewah_serialize_to(si->delete_bitmap, write_strbuf, sb); - ewah_serialize_to(si->replace_bitmap, write_strbuf, sb); + ewah_serialize_strbuf(si->delete_bitmap, sb); + ewah_serialize_strbuf(si->replace_bitmap, sb); return 0; } From 83c094ad0dd2104adbbec034f802dceb1d052981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:33 +0700 Subject: [PATCH 10/24] untracked cache: save to an index extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helped-by: Stefan Beller Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/technical/index-format.txt | 58 ++++++++++ cache.h | 3 + dir.c | 139 +++++++++++++++++++++++ dir.h | 1 + read-cache.c | 12 ++ 5 files changed, 213 insertions(+) diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index 35112e4966..db59a13600 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -233,3 +233,61 @@ Git index format The remaining index entries after replaced ones will be added to the final index. These added entries are also sorted by entry name then stage. + +== Untracked cache + + Untracked cache saves the untracked file list and necessary data to + verify the cache. The signature for this extension is { 'U', 'N', + 'T', 'R' }. + + The extension starts with + + - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from + ctime field until "file size". + + - Stat data of core.excludesfile + + - 32-bit dir_flags (see struct dir_struct) + + - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file + does not exist. + + - 160-bit SHA-1 of core.excludesfile. Null SHA-1 means the file does + not exist. + + - NUL-terminated string of per-dir exclude file name. This usually + is ".gitignore". + + - The number of following directory blocks, variable width + encoding. If this number is zero, the extension ends here with a + following NUL. + + - A number of directory blocks in depth-first-search order, each + consists of + + - The number of untracked entries, variable width encoding. + + - The number of sub-directory blocks, variable width encoding. + + - The directory name terminated by NUL. + + - A number of untrached file/dir names terminated by NUL. + +The remaining data of each directory block is grouped by type: + + - An ewah bitmap, the n-th bit marks whether the n-th directory has + valid untracked cache entries. + + - An ewah bitmap, the n-th bit records "check-only" bit of + read_directory_recursive() for the n-th directory. + + - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data + is valid for the n-th directory and exists in the next data. + + - An array of stat data. The n-th data corresponds with the n-th + "one" bit in the previous ewah bitmap. + + - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit + in the previous ewah bitmap. + + - One NUL. diff --git a/cache.h b/cache.h index 761c5704b2..811cc36547 100644 --- a/cache.h +++ b/cache.h @@ -291,6 +291,8 @@ static inline unsigned int canon_mode(unsigned int mode) #define SPLIT_INDEX_ORDERED (1 << 6) struct split_index; +struct untracked_cache; + struct index_state { struct cache_entry **cache; unsigned int version; @@ -304,6 +306,7 @@ struct index_state { struct hashmap name_hash; struct hashmap dir_hash; unsigned char sha1[20]; + struct untracked_cache *untracked; }; extern struct index_state the_index; diff --git a/dir.c b/dir.c index f39024c639..f695e0c685 100644 --- a/dir.c +++ b/dir.c @@ -12,6 +12,8 @@ #include "refs.h" #include "wildmatch.h" #include "pathspec.h" +#include "varint.h" +#include "ewah/ewok.h" struct path_simplify { int len; @@ -2144,3 +2146,140 @@ void clear_directory(struct dir_struct *dir) } strbuf_release(&dir->basebuf); } + +struct ondisk_untracked_cache { + struct stat_data info_exclude_stat; + struct stat_data excludes_file_stat; + uint32_t dir_flags; + unsigned char info_exclude_sha1[20]; + unsigned char excludes_file_sha1[20]; + char exclude_per_dir[FLEX_ARRAY]; +}; + +#define ouc_size(len) (offsetof(struct ondisk_untracked_cache, exclude_per_dir) + len + 1) + +struct write_data { + int index; /* number of written untracked_cache_dir */ + struct ewah_bitmap *check_only; /* from untracked_cache_dir */ + struct ewah_bitmap *valid; /* from untracked_cache_dir */ + struct ewah_bitmap *sha1_valid; /* set if exclude_sha1 is not null */ + struct strbuf out; + struct strbuf sb_stat; + struct strbuf sb_sha1; +}; + +static void stat_data_to_disk(struct stat_data *to, const struct stat_data *from) +{ + to->sd_ctime.sec = htonl(from->sd_ctime.sec); + to->sd_ctime.nsec = htonl(from->sd_ctime.nsec); + to->sd_mtime.sec = htonl(from->sd_mtime.sec); + to->sd_mtime.nsec = htonl(from->sd_mtime.nsec); + to->sd_dev = htonl(from->sd_dev); + to->sd_ino = htonl(from->sd_ino); + to->sd_uid = htonl(from->sd_uid); + to->sd_gid = htonl(from->sd_gid); + to->sd_size = htonl(from->sd_size); +} + +static void write_one_dir(struct untracked_cache_dir *untracked, + struct write_data *wd) +{ + struct stat_data stat_data; + struct strbuf *out = &wd->out; + unsigned char intbuf[16]; + unsigned int intlen, value; + int i = wd->index++; + + /* + * untracked_nr should be reset whenever valid is clear, but + * for safety.. + */ + if (!untracked->valid) { + untracked->untracked_nr = 0; + untracked->check_only = 0; + } + + if (untracked->check_only) + ewah_set(wd->check_only, i); + if (untracked->valid) { + ewah_set(wd->valid, i); + stat_data_to_disk(&stat_data, &untracked->stat_data); + strbuf_add(&wd->sb_stat, &stat_data, sizeof(stat_data)); + } + if (!is_null_sha1(untracked->exclude_sha1)) { + ewah_set(wd->sha1_valid, i); + strbuf_add(&wd->sb_sha1, untracked->exclude_sha1, 20); + } + + intlen = encode_varint(untracked->untracked_nr, intbuf); + strbuf_add(out, intbuf, intlen); + + /* skip non-recurse directories */ + for (i = 0, value = 0; i < untracked->dirs_nr; i++) + if (untracked->dirs[i]->recurse) + value++; + intlen = encode_varint(value, intbuf); + strbuf_add(out, intbuf, intlen); + + strbuf_add(out, untracked->name, strlen(untracked->name) + 1); + + for (i = 0; i < untracked->untracked_nr; i++) + strbuf_add(out, untracked->untracked[i], + strlen(untracked->untracked[i]) + 1); + + for (i = 0; i < untracked->dirs_nr; i++) + if (untracked->dirs[i]->recurse) + write_one_dir(untracked->dirs[i], wd); +} + +void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked) +{ + struct ondisk_untracked_cache *ouc; + struct write_data wd; + unsigned char varbuf[16]; + int len = 0, varint_len; + if (untracked->exclude_per_dir) + len = strlen(untracked->exclude_per_dir); + ouc = xmalloc(sizeof(*ouc) + len + 1); + stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat); + stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat); + hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.sha1); + hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1); + ouc->dir_flags = htonl(untracked->dir_flags); + memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1); + strbuf_add(out, ouc, ouc_size(len)); + free(ouc); + ouc = NULL; + + if (!untracked->root) { + varint_len = encode_varint(0, varbuf); + strbuf_add(out, varbuf, varint_len); + return; + } + + wd.index = 0; + wd.check_only = ewah_new(); + wd.valid = ewah_new(); + wd.sha1_valid = ewah_new(); + strbuf_init(&wd.out, 1024); + strbuf_init(&wd.sb_stat, 1024); + strbuf_init(&wd.sb_sha1, 1024); + write_one_dir(untracked->root, &wd); + + varint_len = encode_varint(wd.index, varbuf); + strbuf_add(out, varbuf, varint_len); + strbuf_addbuf(out, &wd.out); + ewah_serialize_strbuf(wd.valid, out); + ewah_serialize_strbuf(wd.check_only, out); + ewah_serialize_strbuf(wd.sha1_valid, out); + strbuf_addbuf(out, &wd.sb_stat); + strbuf_addbuf(out, &wd.sb_sha1); + strbuf_addch(out, '\0'); /* safe guard for string lists */ + + ewah_free(wd.valid); + ewah_free(wd.check_only); + ewah_free(wd.sha1_valid); + strbuf_release(&wd.out); + strbuf_release(&wd.sb_stat); + strbuf_release(&wd.sb_sha1); +} diff --git a/dir.h b/dir.h index 95baf014ca..dc3ee0b2e5 100644 --- a/dir.h +++ b/dir.h @@ -298,4 +298,5 @@ static inline int dir_path_match(const struct dir_entry *ent, has_trailing_dir); } +void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked); #endif diff --git a/read-cache.c b/read-cache.c index 8d71860f69..3a058d008a 100644 --- a/read-cache.c +++ b/read-cache.c @@ -39,6 +39,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, #define CACHE_EXT_TREE 0x54524545 /* "TREE" */ #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */ #define CACHE_EXT_LINK 0x6c696e6b /* "link" */ +#define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ @@ -2047,6 +2048,17 @@ static int do_write_index(struct index_state *istate, int newfd, if (err) return -1; } + if (!strip_extensions && istate->untracked) { + struct strbuf sb = STRBUF_INIT; + + write_untracked_extension(&sb, istate->untracked); + err = write_index_ext_header(&c, newfd, CACHE_EXT_UNTRACKED, + sb.len) < 0 || + ce_write(&c, newfd, sb.buf, sb.len) < 0; + strbuf_release(&sb); + if (err) + return -1; + } if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st)) return -1; From f9e6c649589e0940ccb82821107fb658277ed86b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:34 +0700 Subject: [PATCH 11/24] untracked cache: load from UNTR index extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 219 +++++++++++++++++++++++++++++++++++++++++++++++++++ dir.h | 2 + read-cache.c | 5 ++ 3 files changed, 226 insertions(+) diff --git a/dir.c b/dir.c index f695e0c685..b39a034ae5 100644 --- a/dir.c +++ b/dir.c @@ -2283,3 +2283,222 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra strbuf_release(&wd.sb_stat); strbuf_release(&wd.sb_sha1); } + +static void free_untracked(struct untracked_cache_dir *ucd) +{ + int i; + if (!ucd) + return; + for (i = 0; i < ucd->dirs_nr; i++) + free_untracked(ucd->dirs[i]); + for (i = 0; i < ucd->untracked_nr; i++) + free(ucd->untracked[i]); + free(ucd->untracked); + free(ucd->dirs); + free(ucd); +} + +void free_untracked_cache(struct untracked_cache *uc) +{ + if (uc) + free_untracked(uc->root); + free(uc); +} + +struct read_data { + int index; + struct untracked_cache_dir **ucd; + struct ewah_bitmap *check_only; + struct ewah_bitmap *valid; + struct ewah_bitmap *sha1_valid; + const unsigned char *data; + const unsigned char *end; +}; + +static void stat_data_from_disk(struct stat_data *to, const struct stat_data *from) +{ + to->sd_ctime.sec = get_be32(&from->sd_ctime.sec); + to->sd_ctime.nsec = get_be32(&from->sd_ctime.nsec); + to->sd_mtime.sec = get_be32(&from->sd_mtime.sec); + to->sd_mtime.nsec = get_be32(&from->sd_mtime.nsec); + to->sd_dev = get_be32(&from->sd_dev); + to->sd_ino = get_be32(&from->sd_ino); + to->sd_uid = get_be32(&from->sd_uid); + to->sd_gid = get_be32(&from->sd_gid); + to->sd_size = get_be32(&from->sd_size); +} + +static int read_one_dir(struct untracked_cache_dir **untracked_, + struct read_data *rd) +{ + struct untracked_cache_dir ud, *untracked; + const unsigned char *next, *data = rd->data, *end = rd->end; + unsigned int value; + int i, len; + + memset(&ud, 0, sizeof(ud)); + + next = data; + value = decode_varint(&next); + if (next > end) + return -1; + ud.recurse = 1; + ud.untracked_alloc = value; + ud.untracked_nr = value; + if (ud.untracked_nr) + ud.untracked = xmalloc(sizeof(*ud.untracked) * ud.untracked_nr); + data = next; + + next = data; + ud.dirs_alloc = ud.dirs_nr = decode_varint(&next); + if (next > end) + return -1; + ud.dirs = xmalloc(sizeof(*ud.dirs) * ud.dirs_nr); + data = next; + + len = strlen((const char *)data); + next = data + len + 1; + if (next > rd->end) + return -1; + *untracked_ = untracked = xmalloc(sizeof(*untracked) + len); + memcpy(untracked, &ud, sizeof(ud)); + memcpy(untracked->name, data, len + 1); + data = next; + + for (i = 0; i < untracked->untracked_nr; i++) { + len = strlen((const char *)data); + next = data + len + 1; + if (next > rd->end) + return -1; + untracked->untracked[i] = xstrdup((const char*)data); + data = next; + } + + rd->ucd[rd->index++] = untracked; + rd->data = data; + + for (i = 0; i < untracked->dirs_nr; i++) { + len = read_one_dir(untracked->dirs + i, rd); + if (len < 0) + return -1; + } + return 0; +} + +static void set_check_only(size_t pos, void *cb) +{ + struct read_data *rd = cb; + struct untracked_cache_dir *ud = rd->ucd[pos]; + ud->check_only = 1; +} + +static void read_stat(size_t pos, void *cb) +{ + struct read_data *rd = cb; + struct untracked_cache_dir *ud = rd->ucd[pos]; + if (rd->data + sizeof(struct stat_data) > rd->end) { + rd->data = rd->end + 1; + return; + } + stat_data_from_disk(&ud->stat_data, (struct stat_data *)rd->data); + rd->data += sizeof(struct stat_data); + ud->valid = 1; +} + +static void read_sha1(size_t pos, void *cb) +{ + struct read_data *rd = cb; + struct untracked_cache_dir *ud = rd->ucd[pos]; + if (rd->data + 20 > rd->end) { + rd->data = rd->end + 1; + return; + } + hashcpy(ud->exclude_sha1, rd->data); + rd->data += 20; +} + +static void load_sha1_stat(struct sha1_stat *sha1_stat, + const struct stat_data *stat, + const unsigned char *sha1) +{ + stat_data_from_disk(&sha1_stat->stat, stat); + hashcpy(sha1_stat->sha1, sha1); + sha1_stat->valid = 1; +} + +struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz) +{ + const struct ondisk_untracked_cache *ouc; + struct untracked_cache *uc; + struct read_data rd; + const unsigned char *next = data, *end = (const unsigned char *)data + sz; + int len; + + if (sz <= 1 || end[-1] != '\0') + return NULL; + end--; + + ouc = (const struct ondisk_untracked_cache *)next; + if (next + ouc_size(0) > end) + return NULL; + + uc = xcalloc(1, sizeof(*uc)); + load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat, + ouc->info_exclude_sha1); + load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat, + ouc->excludes_file_sha1); + uc->dir_flags = get_be32(&ouc->dir_flags); + uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir); + /* NUL after exclude_per_dir is covered by sizeof(*ouc) */ + next += ouc_size(strlen(ouc->exclude_per_dir)); + if (next >= end) + goto done2; + + len = decode_varint(&next); + if (next > end || len == 0) + goto done2; + + rd.valid = ewah_new(); + rd.check_only = ewah_new(); + rd.sha1_valid = ewah_new(); + rd.data = next; + rd.end = end; + rd.index = 0; + rd.ucd = xmalloc(sizeof(*rd.ucd) * len); + + if (read_one_dir(&uc->root, &rd) || rd.index != len) + goto done; + + next = rd.data; + len = ewah_read_mmap(rd.valid, next, end - next); + if (len < 0) + goto done; + + next += len; + len = ewah_read_mmap(rd.check_only, next, end - next); + if (len < 0) + goto done; + + next += len; + len = ewah_read_mmap(rd.sha1_valid, next, end - next); + if (len < 0) + goto done; + + ewah_each_bit(rd.check_only, set_check_only, &rd); + rd.data = next + len; + ewah_each_bit(rd.valid, read_stat, &rd); + ewah_each_bit(rd.sha1_valid, read_sha1, &rd); + next = rd.data; + +done: + free(rd.ucd); + ewah_free(rd.valid); + ewah_free(rd.check_only); + ewah_free(rd.sha1_valid); +done2: + if (next != end) { + free_untracked_cache(uc); + uc = NULL; + } + return uc; +} diff --git a/dir.h b/dir.h index dc3ee0b2e5..40a679a802 100644 --- a/dir.h +++ b/dir.h @@ -298,5 +298,7 @@ static inline int dir_path_match(const struct dir_entry *ent, has_trailing_dir); } +void free_untracked_cache(struct untracked_cache *); +struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz); void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked); #endif diff --git a/read-cache.c b/read-cache.c index 3a058d008a..ee0ef049b8 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1371,6 +1371,9 @@ static int read_index_extension(struct index_state *istate, if (read_link_extension(istate, data, sz)) return -1; break; + case CACHE_EXT_UNTRACKED: + istate->untracked = read_untracked_extension(data, sz); + break; default: if (*ext < 'A' || 'Z' < *ext) return error("index uses %.4s extension, which we do not understand", @@ -1662,6 +1665,8 @@ int discard_index(struct index_state *istate) istate->cache = NULL; istate->cache_alloc = 0; discard_split_index(istate); + free_untracked_cache(istate->untracked); + istate->untracked = NULL; return 0; } From e931371a8f1164185486a1f5fdaaa708b4a6217c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:35 +0700 Subject: [PATCH 12/24] untracked cache: invalidate at index addition or removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ideally we should implement untracked_cache_remove_from_index() and untracked_cache_add_to_index() so that they update untracked cache right away instead of invalidating it and wait for read_directory() next time to deal with it. But that may need some more work in unpack-trees.c. So stay simple as the first step. The new call in add_index_entry_with_check() may look strange because new calls usually stay close to cache_tree_invalidate_path(). We do it a bit later than c_t_i_p() in this function because if it's about replacing the entry with the same name, we don't care (but cache-tree does). Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 31 +++++++++++++++++++++++++++++++ dir.h | 4 ++++ read-cache.c | 4 ++++ unpack-trees.c | 7 +++++-- 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index b39a034ae5..68b46d0acb 100644 --- a/dir.c +++ b/dir.c @@ -2502,3 +2502,34 @@ done2: } return uc; } + +void untracked_cache_invalidate_path(struct index_state *istate, + const char *path) +{ + const char *sep; + struct untracked_cache_dir *d; + if (!istate->untracked || !istate->untracked->root) + return; + sep = strrchr(path, '/'); + if (sep) + d = lookup_untracked(istate->untracked, + istate->untracked->root, + path, sep - path); + else + d = istate->untracked->root; + istate->untracked->dir_invalidated++; + d->valid = 0; + d->untracked_nr = 0; +} + +void untracked_cache_remove_from_index(struct index_state *istate, + const char *path) +{ + untracked_cache_invalidate_path(istate, path); +} + +void untracked_cache_add_to_index(struct index_state *istate, + const char *path) +{ + untracked_cache_invalidate_path(istate, path); +} diff --git a/dir.h b/dir.h index 40a679a802..2ce7dd3d27 100644 --- a/dir.h +++ b/dir.h @@ -298,6 +298,10 @@ static inline int dir_path_match(const struct dir_entry *ent, has_trailing_dir); } +void untracked_cache_invalidate_path(struct index_state *, const char *); +void untracked_cache_remove_from_index(struct index_state *, const char *); +void untracked_cache_add_to_index(struct index_state *, const char *); + void free_untracked_cache(struct untracked_cache *); struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz); void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked); diff --git a/read-cache.c b/read-cache.c index ee0ef049b8..0d96c753b7 100644 --- a/read-cache.c +++ b/read-cache.c @@ -80,6 +80,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n memcpy(new->name, new_name, namelen + 1); cache_tree_invalidate_path(istate, old->name); + untracked_cache_remove_from_index(istate, old->name); remove_index_entry_at(istate, nr); add_index_entry(istate, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); } @@ -539,6 +540,7 @@ int remove_file_from_index(struct index_state *istate, const char *path) if (pos < 0) pos = -pos-1; cache_tree_invalidate_path(istate, path); + untracked_cache_remove_from_index(istate, path); while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path)) remove_index_entry_at(istate, pos); return 0; @@ -981,6 +983,8 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e } pos = -pos-1; + untracked_cache_add_to_index(istate, ce->name); + /* * Inserting a merged entry ("stage 0") into the index * will always replace all non-merged entries.. diff --git a/unpack-trees.c b/unpack-trees.c index be84ba2607..2927660d92 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -9,6 +9,7 @@ #include "refs.h" #include "attr.h" #include "split-index.h" +#include "dir.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -1259,8 +1260,10 @@ static int verify_uptodate_sparse(const struct cache_entry *ce, static void invalidate_ce_path(const struct cache_entry *ce, struct unpack_trees_options *o) { - if (ce) - cache_tree_invalidate_path(o->src_index, ce->name); + if (!ce) + return; + cache_tree_invalidate_path(o->src_index, ce->name); + untracked_cache_invalidate_path(o->src_index, ce->name); } /* From 2bb4cda1987afe6911a1c193283213babda328d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:36 +0700 Subject: [PATCH 13/24] read-cache.c: split racy stat test to a separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- read-cache.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/read-cache.c b/read-cache.c index 0d96c753b7..b5e9c3f8ac 100644 --- a/read-cache.c +++ b/read-cache.c @@ -272,20 +272,26 @@ static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st) return changed; } +static int is_racy_stat(const struct index_state *istate, + const struct stat_data *sd) +{ + return (istate->timestamp.sec && +#ifdef USE_NSEC + /* nanosecond timestamped files can also be racy! */ + (istate->timestamp.sec < sd->sd_mtime.sec || + (istate->timestamp.sec == sd->sd_mtime.sec && + istate->timestamp.nsec <= sd->sd_mtime.nsec)) +#else + istate->timestamp.sec <= sd->sd_mtime.sec +#endif + ); +} + static int is_racy_timestamp(const struct index_state *istate, const struct cache_entry *ce) { return (!S_ISGITLINK(ce->ce_mode) && - istate->timestamp.sec && -#ifdef USE_NSEC - /* nanosecond timestamped files can also be racy! */ - (istate->timestamp.sec < ce->ce_stat_data.sd_mtime.sec || - (istate->timestamp.sec == ce->ce_stat_data.sd_mtime.sec && - istate->timestamp.nsec <= ce->ce_stat_data.sd_mtime.nsec)) -#else - istate->timestamp.sec <= ce->ce_stat_data.sd_mtime.sec -#endif - ); + is_racy_stat(istate, &ce->ce_stat_data)); } int ie_match_stat(const struct index_state *istate, From ed4efab1b17e883b761b4482c40c04a4529be8f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:37 +0700 Subject: [PATCH 14/24] untracked cache: avoid racy timestamps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a directory is updated within the same second that its timestamp is last saved, we cannot realize the directory has been updated by checking timestamps. Assume the worst (something is update). See 29e4d36 (Racy GIT - 2005-12-20) for more information. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- cache.h | 2 ++ dir.c | 4 ++-- read-cache.c | 8 ++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/cache.h b/cache.h index 811cc36547..120d337bd4 100644 --- a/cache.h +++ b/cache.h @@ -555,6 +555,8 @@ extern void fill_stat_data(struct stat_data *sd, struct stat *st); * INODE_CHANGED, and DATA_CHANGED. */ extern int match_stat_data(const struct stat_data *sd, struct stat *st); +extern int match_stat_data_racy(const struct index_state *istate, + const struct stat_data *sd, struct stat *st); extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st); diff --git a/dir.c b/dir.c index 68b46d0acb..741484aa97 100644 --- a/dir.c +++ b/dir.c @@ -682,7 +682,7 @@ static int add_excludes(const char *fname, const char *base, int baselen, if (sha1_stat) { int pos; if (sha1_stat->valid && - !match_stat_data(&sha1_stat->stat, &st)) + !match_stat_data_racy(&the_index, &sha1_stat->stat, &st)) ; /* no content change, ss->sha1 still good */ else if (check_index && (pos = cache_name_pos(fname, strlen(fname))) >= 0 && @@ -1539,7 +1539,7 @@ static int valid_cached_dir(struct dir_struct *dir, return 0; } if (!untracked->valid || - match_stat_data(&untracked->stat_data, &st)) { + match_stat_data_racy(&the_index, &untracked->stat_data, &st)) { if (untracked->valid) invalidate_directory(dir->untracked, untracked); fill_stat_data(&untracked->stat_data, &st); diff --git a/read-cache.c b/read-cache.c index b5e9c3f8ac..57828bb3f3 100644 --- a/read-cache.c +++ b/read-cache.c @@ -294,6 +294,14 @@ static int is_racy_timestamp(const struct index_state *istate, is_racy_stat(istate, &ce->ce_stat_data)); } +int match_stat_data_racy(const struct index_state *istate, + const struct stat_data *sd, struct stat *st) +{ + if (is_racy_stat(istate, sd)) + return MTIME_CHANGED; + return match_stat_data(sd, st); +} + int ie_match_stat(const struct index_state *istate, const struct cache_entry *ce, struct stat *st, unsigned int options) From c9ccb5d327bd9259ca6cceb9d87df9eb2cba2e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:38 +0700 Subject: [PATCH 15/24] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This could be used to verify correct behavior in tests Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dir.c b/dir.c index 741484aa97..1cf1e3002e 100644 --- a/dir.c +++ b/dir.c @@ -1923,6 +1923,18 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru free_simplify(simplify); qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name); qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name); + if (dir->untracked) { + static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS); + trace_printf_key(&trace_untracked_stats, + "node creation: %u\n" + "gitignore invalidation: %u\n" + "directory invalidation: %u\n" + "opendir: %u\n", + dir->untracked->dir_created, + dir->untracked->gitignore_invalidated, + dir->untracked->dir_invalidated, + dir->untracked->dir_opened); + } return dir->nr; } From 1bbb3dba3fbf733db45f073ddafe89f5972c516a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:39 +0700 Subject: [PATCH 16/24] untracked cache: mark index dirty if untracked cache is updated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- cache.h | 1 + dir.c | 9 +++++++++ read-cache.c | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cache.h b/cache.h index 120d337bd4..1392be1030 100644 --- a/cache.h +++ b/cache.h @@ -289,6 +289,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define RESOLVE_UNDO_CHANGED (1 << 4) #define CACHE_TREE_CHANGED (1 << 5) #define SPLIT_INDEX_ORDERED (1 << 6) +#define UNTRACKED_CHANGED (1 << 7) struct split_index; struct untracked_cache; diff --git a/dir.c b/dir.c index 1cf1e3002e..592b5fa795 100644 --- a/dir.c +++ b/dir.c @@ -1934,6 +1934,15 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru dir->untracked->gitignore_invalidated, dir->untracked->dir_invalidated, dir->untracked->dir_opened); + if (dir->untracked == the_index.untracked && + (dir->untracked->dir_opened || + dir->untracked->gitignore_invalidated || + dir->untracked->dir_invalidated)) + the_index.cache_changed |= UNTRACKED_CHANGED; + if (dir->untracked != the_index.untracked) { + free(dir->untracked); + dir->untracked = NULL; + } } return dir->nr; } diff --git a/read-cache.c b/read-cache.c index 57828bb3f3..705469eb7a 100644 --- a/read-cache.c +++ b/read-cache.c @@ -44,7 +44,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \ - SPLIT_INDEX_ORDERED) + SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED) struct index_state the_index; static const char *alternate_index_output; From 76e6b090a0b9b1e83b8e467d1b64f250f66c3f54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:40 +0700 Subject: [PATCH 17/24] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be used to double check if results with untracked cache are correctly, compared to vanilla version. Untracked cache remains in index, but not used. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 592b5fa795..2cf8f35cc7 100644 --- a/dir.c +++ b/dir.c @@ -1801,7 +1801,7 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d struct untracked_cache_dir *root; int i; - if (!dir->untracked) + if (!dir->untracked || getenv("GIT_DISABLE_UNTRACKED_CACHE")) return NULL; /* From 226c051adb0f64772f807287e130a6a546b95fbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:41 +0700 Subject: [PATCH 18/24] status: enable untracked cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update_index_if_able() is moved down so that the updated untracked cache could be written out. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/commit.c | 5 +++-- wt-status.c | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 961e467242..fe380a9b92 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1364,13 +1364,14 @@ int cmd_status(int argc, const char **argv, const char *prefix) refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL); fd = hold_locked_index(&index_lock, 0); - if (0 <= fd) - update_index_if_able(&the_index, &index_lock); s.is_initial = get_sha1(s.reference, sha1) ? 1 : 0; s.ignore_submodule_arg = ignore_submodule_arg; wt_status_collect(&s); + if (0 <= fd) + update_index_if_able(&the_index, &index_lock); + if (s.relative_paths) s.prefix = prefix; diff --git a/wt-status.c b/wt-status.c index 29666d0dba..fc1b82e2c1 100644 --- a/wt-status.c +++ b/wt-status.c @@ -585,6 +585,8 @@ static void wt_status_collect_untracked(struct wt_status *s) DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; if (s->show_ignored_files) dir.flags |= DIR_SHOW_IGNORED_TOO; + else + dir.untracked = the_index.untracked; setup_standard_excludes(&dir); fill_directory(&dir, &s->pathspec); From 9e5972413b4873dc143c4046c6e74eb608ace32b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:42 +0700 Subject: [PATCH 19/24] update-index: manually enable or disable untracked cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Overall time saving on "git status" is about 40% in the best case scenario, removing ..collect_untracked() as the most time consuming function. read and refresh index operations are now at the top (which should drop when index-helper and/or watchman support is added). More numbers and analysis below. webkit.git ========== 169k files. 6k dirs. Lots of test data (i.e. not touched most of the time) Base status ----------- Index version 4 in split index mode and cache-tree populated. No untracked cache. It shows how time is consumed by "git status". The same settings are used for other repos below. 18:28:10.199679 builtin/commit.c:1394 performance: 0.000000451 s: cmd_status:setup 18:28:10.474847 read-cache.c:1407 performance: 0.274873831 s: read_index 18:28:10.475295 read-cache.c:1407 performance: 0.000000656 s: read_index 18:28:10.728443 preload-index.c:131 performance: 0.253147487 s: read_index_preload 18:28:10.741422 read-cache.c:1254 performance: 0.012868340 s: refresh_index 18:28:10.752300 wt-status.c:623 performance: 0.010421357 s: wt_status_collect_changes_worktree 18:28:10.762069 wt-status.c:629 performance: 0.009644748 s: wt_status_collect_changes_index 18:28:11.601019 wt-status.c:632 performance: 0.838859547 s: wt_status_collect_untracked 18:28:11.605939 builtin/commit.c:1421 performance: 0.004835004 s: cmd_status:update_index 18:28:11.606580 trace.c:415 performance: 1.407878388 s: git command: 'git' 'status' Populating status ----------------- This is after enabling untracked cache and the cache is still empty. We see a slight increase in .._collect_untracked() and update_index (because new cache has to be written to $GIT_DIR/index). 18:28:18.915213 builtin/commit.c:1394 performance: 0.000000326 s: cmd_status:setup 18:28:19.197364 read-cache.c:1407 performance: 0.281901416 s: read_index 18:28:19.197754 read-cache.c:1407 performance: 0.000000546 s: read_index 18:28:19.451355 preload-index.c:131 performance: 0.253599607 s: read_index_preload 18:28:19.464400 read-cache.c:1254 performance: 0.012935336 s: refresh_index 18:28:19.475115 wt-status.c:623 performance: 0.010236920 s: wt_status_collect_changes_worktree 18:28:19.486022 wt-status.c:629 performance: 0.010801685 s: wt_status_collect_changes_index 18:28:20.362660 wt-status.c:632 performance: 0.876551366 s: wt_status_collect_untracked 18:28:20.396199 builtin/commit.c:1421 performance: 0.033447969 s: cmd_status:update_index 18:28:20.396939 trace.c:415 performance: 1.482695902 s: git command: 'git' 'status' Populated status ---------------- After the cache is populated, wt_status_collect_untracked() drops 82% from 0.838s to 0.144s. Overall time drops 45%. Top offenders are now read_index() and read_index_preload(). 18:28:20.408605 builtin/commit.c:1394 performance: 0.000000457 s: cmd_status:setup 18:28:20.692864 read-cache.c:1407 performance: 0.283980458 s: read_index 18:28:20.693273 read-cache.c:1407 performance: 0.000000661 s: read_index 18:28:20.958814 preload-index.c:131 performance: 0.265540254 s: read_index_preload 18:28:20.972375 read-cache.c:1254 performance: 0.013437429 s: refresh_index 18:28:20.983959 wt-status.c:623 performance: 0.011146646 s: wt_status_collect_changes_worktree 18:28:20.993948 wt-status.c:629 performance: 0.009879094 s: wt_status_collect_changes_index 18:28:21.138125 wt-status.c:632 performance: 0.144084737 s: wt_status_collect_untracked 18:28:21.173678 builtin/commit.c:1421 performance: 0.035463949 s: cmd_status:update_index 18:28:21.174251 trace.c:415 performance: 0.766707355 s: git command: 'git' 'status' gentoo-x86.git ============== This repository is a strange one with a balanced, wide and shallow worktree (about 100k files and 23k dirs) and no .gitignore in worktree. .._collect_untracked() time drops 88%, total time drops 56%. Base status ----------- 18:20:40.828642 builtin/commit.c:1394 performance: 0.000000496 s: cmd_status:setup 18:20:41.027233 read-cache.c:1407 performance: 0.198130532 s: read_index 18:20:41.027670 read-cache.c:1407 performance: 0.000000581 s: read_index 18:20:41.171716 preload-index.c:131 performance: 0.144045594 s: read_index_preload 18:20:41.179171 read-cache.c:1254 performance: 0.007320424 s: refresh_index 18:20:41.185785 wt-status.c:623 performance: 0.006144638 s: wt_status_collect_changes_worktree 18:20:41.192701 wt-status.c:629 performance: 0.006780184 s: wt_status_collect_changes_index 18:20:41.991723 wt-status.c:632 performance: 0.798927029 s: wt_status_collect_untracked 18:20:41.994664 builtin/commit.c:1421 performance: 0.002852772 s: cmd_status:update_index 18:20:41.995458 trace.c:415 performance: 1.168427502 s: git command: 'git' 'status' Populating status ----------------- 18:20:48.968848 builtin/commit.c:1394 performance: 0.000000380 s: cmd_status:setup 18:20:49.172918 read-cache.c:1407 performance: 0.203734214 s: read_index 18:20:49.173341 read-cache.c:1407 performance: 0.000000562 s: read_index 18:20:49.320013 preload-index.c:131 performance: 0.146671391 s: read_index_preload 18:20:49.328039 read-cache.c:1254 performance: 0.007921957 s: refresh_index 18:20:49.334680 wt-status.c:623 performance: 0.006172020 s: wt_status_collect_changes_worktree 18:20:49.342526 wt-status.c:629 performance: 0.007731746 s: wt_status_collect_changes_index 18:20:50.257510 wt-status.c:632 performance: 0.914864222 s: wt_status_collect_untracked 18:20:50.338371 builtin/commit.c:1421 performance: 0.080776477 s: cmd_status:update_index 18:20:50.338900 trace.c:415 performance: 1.371462446 s: git command: 'git' 'status' Populated status ---------------- 18:20:50.351160 builtin/commit.c:1394 performance: 0.000000571 s: cmd_status:setup 18:20:50.577358 read-cache.c:1407 performance: 0.225917338 s: read_index 18:20:50.577794 read-cache.c:1407 performance: 0.000000617 s: read_index 18:20:50.734140 preload-index.c:131 performance: 0.156345564 s: read_index_preload 18:20:50.745717 read-cache.c:1254 performance: 0.011463075 s: refresh_index 18:20:50.755176 wt-status.c:623 performance: 0.008877929 s: wt_status_collect_changes_worktree 18:20:50.763768 wt-status.c:629 performance: 0.008471633 s: wt_status_collect_changes_index 18:20:50.854885 wt-status.c:632 performance: 0.090988721 s: wt_status_collect_untracked 18:20:50.857765 builtin/commit.c:1421 performance: 0.002789097 s: cmd_status:update_index 18:20:50.858411 trace.c:415 performance: 0.508647673 s: git command: 'git' 'status' linux-2.6 ========= Reference repo. Not too big. .._collect_status() drops 84%. Total time drops 42%. Base status ----------- 18:34:09.870122 builtin/commit.c:1394 performance: 0.000000385 s: cmd_status:setup 18:34:09.943218 read-cache.c:1407 performance: 0.072871177 s: read_index 18:34:09.943614 read-cache.c:1407 performance: 0.000000491 s: read_index 18:34:10.004364 preload-index.c:131 performance: 0.060748102 s: read_index_preload 18:34:10.008190 read-cache.c:1254 performance: 0.003714285 s: refresh_index 18:34:10.012087 wt-status.c:623 performance: 0.002775446 s: wt_status_collect_changes_worktree 18:34:10.016054 wt-status.c:629 performance: 0.003862140 s: wt_status_collect_changes_index 18:34:10.214747 wt-status.c:632 performance: 0.198604837 s: wt_status_collect_untracked 18:34:10.216102 builtin/commit.c:1421 performance: 0.001244166 s: cmd_status:update_index 18:34:10.216817 trace.c:415 performance: 0.347670735 s: git command: 'git' 'status' Populating status ----------------- 18:34:16.595102 builtin/commit.c:1394 performance: 0.000000456 s: cmd_status:setup 18:34:16.666600 read-cache.c:1407 performance: 0.070992413 s: read_index 18:34:16.667012 read-cache.c:1407 performance: 0.000000606 s: read_index 18:34:16.729375 preload-index.c:131 performance: 0.062362492 s: read_index_preload 18:34:16.732565 read-cache.c:1254 performance: 0.003075517 s: refresh_index 18:34:16.736148 wt-status.c:623 performance: 0.002422201 s: wt_status_collect_changes_worktree 18:34:16.739990 wt-status.c:629 performance: 0.003746618 s: wt_status_collect_changes_index 18:34:16.948505 wt-status.c:632 performance: 0.208426710 s: wt_status_collect_untracked 18:34:16.961744 builtin/commit.c:1421 performance: 0.013151887 s: cmd_status:update_index 18:34:16.962233 trace.c:415 performance: 0.368537535 s: git command: 'git' 'status' Populated status ---------------- 18:34:16.970026 builtin/commit.c:1394 performance: 0.000000631 s: cmd_status:setup 18:34:17.046235 read-cache.c:1407 performance: 0.075904673 s: read_index 18:34:17.046644 read-cache.c:1407 performance: 0.000000681 s: read_index 18:34:17.113564 preload-index.c:131 performance: 0.066920253 s: read_index_preload 18:34:17.117281 read-cache.c:1254 performance: 0.003604055 s: refresh_index 18:34:17.121115 wt-status.c:623 performance: 0.002508345 s: wt_status_collect_changes_worktree 18:34:17.125089 wt-status.c:629 performance: 0.003871636 s: wt_status_collect_changes_index 18:34:17.156089 wt-status.c:632 performance: 0.030895703 s: wt_status_collect_untracked 18:34:17.169861 builtin/commit.c:1421 performance: 0.013686404 s: cmd_status:update_index 18:34:17.170391 trace.c:415 performance: 0.201474531 s: git command: 'git' 'status' Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-update-index.txt | 8 ++++++++ builtin/update-index.c | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt index aff01798cd..6bc296787e 100644 --- a/Documentation/git-update-index.txt +++ b/Documentation/git-update-index.txt @@ -170,6 +170,14 @@ may not support it yet. the shared index file. This mode is designed for very large indexes that take a significant amount of time to read or write. +--untracked-cache:: +--no-untracked-cache:: + Enable or disable untracked cache extension. This could speed + up for commands that involve determining untracked files such + as `git status`. The underlying operating system and file + system must change `st_mtime` field of a directory if files + are added or deleted in that directory. + \--:: Do not interpret any more arguments as options. diff --git a/builtin/update-index.c b/builtin/update-index.c index 587898624c..6ea5c8dc20 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -741,6 +741,7 @@ static int reupdate_callback(struct parse_opt_ctx_t *ctx, int cmd_update_index(int argc, const char **argv, const char *prefix) { int newfd, entries, has_errors = 0, line_termination = '\n'; + int untracked_cache = -1; int read_from_stdin = 0; int prefix_length = prefix ? strlen(prefix) : 0; int preferred_index_format = 0; @@ -832,6 +833,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) N_("write index in this format")), OPT_BOOL(0, "split-index", &split_index, N_("enable or disable split index")), + OPT_BOOL(0, "untracked-cache", &untracked_cache, + N_("enable/disable untracked cache")), OPT_END() }; @@ -938,6 +941,19 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) the_index.split_index = NULL; the_index.cache_changed |= SOMETHING_CHANGED; } + if (untracked_cache > 0 && !the_index.untracked) { + struct untracked_cache *uc; + + uc = xcalloc(1, sizeof(*uc)); + uc->exclude_per_dir = ".gitignore"; + /* should be the same flags used by git-status */ + uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; + the_index.untracked = uc; + the_index.cache_changed |= UNTRACKED_CHANGED; + } else if (!untracked_cache && the_index.untracked) { + the_index.untracked = NULL; + the_index.cache_changed |= UNTRACKED_CHANGED; + } if (active_cache_changed) { if (newfd < 0) { From f64cb88d3521b64b2db9353d14148328063745dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:43 +0700 Subject: [PATCH 20/24] update-index: test the system before enabling untracked cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helped-by: Eric Sunshine Helped-by: Junio C Hamano Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-update-index.txt | 6 ++ builtin/update-index.c | 168 +++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt index 6bc296787e..1a296bc29a 100644 --- a/Documentation/git-update-index.txt +++ b/Documentation/git-update-index.txt @@ -178,6 +178,12 @@ may not support it yet. system must change `st_mtime` field of a directory if files are added or deleted in that directory. +--force-untracked-cache:: + For safety, `--untracked-cache` performs tests on the working + directory to make sure untracked cache can be used. These + tests can take a few seconds. `--force-untracked-cache` can be + used to skip the tests. + \--:: Do not interpret any more arguments as options. diff --git a/builtin/update-index.c b/builtin/update-index.c index 6ea5c8dc20..c6951ad3ce 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -33,6 +33,7 @@ static int mark_valid_only; static int mark_skip_worktree_only; #define MARK_FLAG 1 #define UNMARK_FLAG 2 +static struct strbuf mtime_dir = STRBUF_INIT; __attribute__((format (printf, 1, 2))) static void report(const char *fmt, ...) @@ -48,6 +49,166 @@ static void report(const char *fmt, ...) va_end(vp); } +static void remove_test_directory(void) +{ + if (mtime_dir.len) + remove_dir_recursively(&mtime_dir, 0); +} + +static const char *get_mtime_path(const char *path) +{ + static struct strbuf sb = STRBUF_INIT; + strbuf_reset(&sb); + strbuf_addf(&sb, "%s/%s", mtime_dir.buf, path); + return sb.buf; +} + +static void xmkdir(const char *path) +{ + path = get_mtime_path(path); + if (mkdir(path, 0700)) + die_errno(_("failed to create directory %s"), path); +} + +static int xstat_mtime_dir(struct stat *st) +{ + if (stat(mtime_dir.buf, st)) + die_errno(_("failed to stat %s"), mtime_dir.buf); + return 0; +} + +static int create_file(const char *path) +{ + int fd; + path = get_mtime_path(path); + fd = open(path, O_CREAT | O_RDWR, 0644); + if (fd < 0) + die_errno(_("failed to create file %s"), path); + return fd; +} + +static void xunlink(const char *path) +{ + path = get_mtime_path(path); + if (unlink(path)) + die_errno(_("failed to delete file %s"), path); +} + +static void xrmdir(const char *path) +{ + path = get_mtime_path(path); + if (rmdir(path)) + die_errno(_("failed to delete directory %s"), path); +} + +static void avoid_racy(void) +{ + /* + * not use if we could usleep(10) if USE_NSEC is defined. The + * field nsec could be there, but the OS could choose to + * ignore it? + */ + sleep(1); +} + +static int test_if_untracked_cache_is_supported(void) +{ + struct stat st; + struct stat_data base; + int fd, ret = 0; + + strbuf_addstr(&mtime_dir, "mtime-test-XXXXXX"); + if (!mkdtemp(mtime_dir.buf)) + die_errno("Could not make temporary directory"); + + fprintf(stderr, _("Testing ")); + atexit(remove_test_directory); + xstat_mtime_dir(&st); + fill_stat_data(&base, &st); + fputc('.', stderr); + + avoid_racy(); + fd = create_file("newfile"); + xstat_mtime_dir(&st); + if (!match_stat_data(&base, &st)) { + close(fd); + fputc('\n', stderr); + fprintf_ln(stderr,_("directory stat info does not " + "change after adding a new file")); + goto done; + } + fill_stat_data(&base, &st); + fputc('.', stderr); + + avoid_racy(); + xmkdir("new-dir"); + xstat_mtime_dir(&st); + if (!match_stat_data(&base, &st)) { + close(fd); + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info does not change " + "after adding a new directory")); + goto done; + } + fill_stat_data(&base, &st); + fputc('.', stderr); + + avoid_racy(); + write_or_die(fd, "data", 4); + close(fd); + xstat_mtime_dir(&st); + if (match_stat_data(&base, &st)) { + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info changes " + "after updating a file")); + goto done; + } + fputc('.', stderr); + + avoid_racy(); + close(create_file("new-dir/new")); + xstat_mtime_dir(&st); + if (match_stat_data(&base, &st)) { + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info changes after " + "adding a file inside subdirectory")); + goto done; + } + fputc('.', stderr); + + avoid_racy(); + xunlink("newfile"); + xstat_mtime_dir(&st); + if (!match_stat_data(&base, &st)) { + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info does not " + "change after deleting a file")); + goto done; + } + fill_stat_data(&base, &st); + fputc('.', stderr); + + avoid_racy(); + xunlink("new-dir/new"); + xrmdir("new-dir"); + xstat_mtime_dir(&st); + if (!match_stat_data(&base, &st)) { + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info does not " + "change after deleting a directory")); + goto done; + } + + if (rmdir(mtime_dir.buf)) + die_errno(_("failed to delete directory %s"), mtime_dir.buf); + fprintf_ln(stderr, _(" OK")); + ret = 1; + +done: + strbuf_release(&mtime_dir); + return ret; +} + static int mark_ce_flags(const char *path, int flag, int mark) { int namelen = strlen(path); @@ -835,6 +996,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) N_("enable or disable split index")), OPT_BOOL(0, "untracked-cache", &untracked_cache, N_("enable/disable untracked cache")), + OPT_SET_INT(0, "force-untracked-cache", &untracked_cache, + N_("enable untracked cache without testing the filesystem"), 2), OPT_END() }; @@ -944,6 +1107,11 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) if (untracked_cache > 0 && !the_index.untracked) { struct untracked_cache *uc; + if (untracked_cache < 2) { + setup_work_tree(); + if (!test_if_untracked_cache_is_supported()) + return 1; + } uc = xcalloc(1, sizeof(*uc)); uc->exclude_per_dir = ".gitignore"; /* should be the same flags used by git-status */ From a3ddcefd97b0e033eca045f07e0a262e1e7bb483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:44 +0700 Subject: [PATCH 21/24] t7063: tests for untracked cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- .gitignore | 1 + Makefile | 1 + t/t7063-status-untracked-cache.sh | 353 ++++++++++++++++++++++++++++++ test-dump-untracked-cache.c | 61 ++++++ 4 files changed, 416 insertions(+) create mode 100755 t/t7063-status-untracked-cache.sh create mode 100644 test-dump-untracked-cache.c diff --git a/.gitignore b/.gitignore index a05241916c..422c5382c1 100644 --- a/.gitignore +++ b/.gitignore @@ -184,6 +184,7 @@ /test-delta /test-dump-cache-tree /test-dump-split-index +/test-dump-untracked-cache /test-scrap-cache-tree /test-genrandom /test-hashmap diff --git a/Makefile b/Makefile index 44f1dd10ff..e63538a2e7 100644 --- a/Makefile +++ b/Makefile @@ -570,6 +570,7 @@ TEST_PROGRAMS_NEED_X += test-date TEST_PROGRAMS_NEED_X += test-delta TEST_PROGRAMS_NEED_X += test-dump-cache-tree TEST_PROGRAMS_NEED_X += test-dump-split-index +TEST_PROGRAMS_NEED_X += test-dump-untracked-cache TEST_PROGRAMS_NEED_X += test-genrandom TEST_PROGRAMS_NEED_X += test-hashmap TEST_PROGRAMS_NEED_X += test-index-version diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh new file mode 100755 index 0000000000..2b2ffd7d6b --- /dev/null +++ b/t/t7063-status-untracked-cache.sh @@ -0,0 +1,353 @@ +#!/bin/sh + +test_description='test untracked cache' + +. ./test-lib.sh + +avoid_racy() { + sleep 1 +} + +git update-index --untracked-cache +# It's fine if git update-index returns an error code other than one, +# it'll be caught in the first test. +if test $? -eq 1; then + skip_all='This system does not support untracked cache' + test_done +fi + +test_expect_success 'setup' ' + git init worktree && + cd worktree && + mkdir done dtwo dthree && + touch one two three done/one dtwo/two dthree/three && + git add one two done/one && + : >.git/info/exclude && + git update-index --untracked-cache +' + +test_expect_success 'untracked cache is empty' ' + test-dump-untracked-cache >../actual && + cat >../expect <../status.expect <../dump.expect <../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../actual && + test_cmp ../status.expect ../actual && + cat >../trace.expect <../actual && + test_cmp ../dump.expect ../actual +' + +test_expect_success 'status second time (fully populated cache)' ' + avoid_racy && + : >../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../actual && + test_cmp ../status.expect ../actual && + cat >../trace.expect <../actual && + test_cmp ../dump.expect ../actual +' + +test_expect_success 'modify in root directory, one dir invalidation' ' + avoid_racy && + : >four && + : >../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../actual && + cat >../status.expect <../trace.expect <../actual && + cat >../expect <.gitignore && + : >../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../actual && + cat >../status.expect <../trace.expect <../actual && + cat >../expect <>.git/info/exclude && + : >../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../actual && + cat >../status.expect <../trace.expect <../actual && + cat >../expect <../actual && + cat >../expect <../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../actual && + cat >../status.expect <../trace.expect <../actual && + cat >../expect <../actual && + cat >../expect <../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../actual && + cat >../status.expect <../trace.expect <../actual && + cat >../expect <name, (*b)->name); +} + +static void dump(struct untracked_cache_dir *ucd, struct strbuf *base) +{ + int i, len; + qsort(ucd->untracked, ucd->untracked_nr, sizeof(*ucd->untracked), + compare_untracked); + qsort(ucd->dirs, ucd->dirs_nr, sizeof(*ucd->dirs), + compare_dir); + len = base->len; + strbuf_addf(base, "%s/", ucd->name); + printf("%s %s", base->buf, + sha1_to_hex(ucd->exclude_sha1)); + if (ucd->recurse) + fputs(" recurse", stdout); + if (ucd->check_only) + fputs(" check_only", stdout); + if (ucd->valid) + fputs(" valid", stdout); + printf("\n"); + for (i = 0; i < ucd->untracked_nr; i++) + printf("%s\n", ucd->untracked[i]); + for (i = 0; i < ucd->dirs_nr; i++) + dump(ucd->dirs[i], base); + strbuf_setlen(base, len); +} + +int main(int ac, char **av) +{ + struct untracked_cache *uc; + struct strbuf base = STRBUF_INIT; + if (read_cache() < 0) + die("unable to read index file"); + uc = the_index.untracked; + if (!uc) { + printf("no untracked cache\n"); + return 0; + } + printf("info/exclude %s\n", sha1_to_hex(uc->ss_info_exclude.sha1)); + printf("core.excludesfile %s\n", sha1_to_hex(uc->ss_excludes_file.sha1)); + printf("exclude_per_dir %s\n", uc->exclude_per_dir); + printf("flags %08x\n", uc->dir_flags); + if (uc->root) + dump(uc->root, &base); + return 0; +} From 7b6aff0655c965959a59cc7fa3ed51c2a1fbcd44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:45 +0700 Subject: [PATCH 22/24] mingw32: add uname() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helped-by: Eric Sunshine Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- compat/mingw.c | 11 +++++++++++ compat/mingw.h | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 70f3191a4f..496e6f8bb0 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2128,3 +2128,14 @@ void mingw_startup() /* initialize Unicode console */ winansi_init(); } + +int uname(struct utsname *buf) +{ + DWORD v = GetVersion(); + memset(buf, 0, sizeof(*buf)); + strcpy(buf->sysname, "Windows"); + sprintf(buf->release, "%u.%u", v & 0xff, (v >> 8) & 0xff); + /* assuming NT variants only.. */ + sprintf(buf->version, "%u", (v >> 16) & 0x7fff); + return 0; +} diff --git a/compat/mingw.h b/compat/mingw.h index 5e499cfb71..a6f7b9f1a7 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -76,6 +76,14 @@ struct itimerval { }; #define ITIMER_REAL 0 +struct utsname { + char sysname[16]; + char nodename[1]; + char release[16]; + char version[16]; + char machine[1]; +}; + /* * sanitize preprocessor namespace polluted by Windows headers defining * macros which collide with git local versions @@ -171,6 +179,7 @@ struct passwd *getpwuid(uid_t uid); int setitimer(int type, struct itimerval *in, struct itimerval *out); int sigaction(int sig, struct sigaction *in, struct sigaction *out); int link(const char *oldpath, const char *newpath); +int uname(struct utsname *buf); /* * replacements of existing functions From 1e8fef609e78110e276df633c5ba1fb1f1589fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:46 +0700 Subject: [PATCH 23/24] untracked cache: guard and disable on system changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user enables untracked cache, then - move worktree to an unsupported filesystem - or simply upgrade OS - or move the whole (portable) disk from one machine to another - or access a shared fs from another machine there's no guarantee that untracked cache can still function properly. Record the worktree location and OS footprint in the cache. If it changes, err on the safe side and disable the cache. The user can 'update-index --untracked-cache' again to make sure all conditions are met. This adds a new requirement that setup_git_directory* must be called before read_cache() because we need worktree location by then, or the cache is dropped. This change does not cover all bases, you can fool it if you try hard. The point is to stop accidents. Helped-by: Eric Sunshine Helped-by: brian m. carlson Helped-by: Torsten Bögershausen Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/technical/index-format.txt | 4 ++ builtin/update-index.c | 16 ++++--- dir.c | 55 +++++++++++++++++++++++- dir.h | 2 + git-compat-util.h | 1 + test-dump-untracked-cache.c | 1 + 6 files changed, 72 insertions(+), 7 deletions(-) diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index db59a13600..b7093af8b2 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -242,6 +242,10 @@ Git index format The extension starts with + - A sequence of NUL-terminated strings, preceded by the size of the + sequence in variable width encoding. Each string describes the + environment where the cache can be used. + - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from ctime field until "file size". diff --git a/builtin/update-index.c b/builtin/update-index.c index c6951ad3ce..790a6aa9db 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -1104,7 +1104,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) the_index.split_index = NULL; the_index.cache_changed |= SOMETHING_CHANGED; } - if (untracked_cache > 0 && !the_index.untracked) { + if (untracked_cache > 0) { struct untracked_cache *uc; if (untracked_cache < 2) { @@ -1112,11 +1112,15 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) if (!test_if_untracked_cache_is_supported()) return 1; } - uc = xcalloc(1, sizeof(*uc)); - uc->exclude_per_dir = ".gitignore"; - /* should be the same flags used by git-status */ - uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; - the_index.untracked = uc; + if (!the_index.untracked) { + uc = xcalloc(1, sizeof(*uc)); + strbuf_init(&uc->ident, 100); + uc->exclude_per_dir = ".gitignore"; + /* should be the same flags used by git-status */ + uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; + the_index.untracked = uc; + } + add_untracked_ident(the_index.untracked); the_index.cache_changed |= UNTRACKED_CHANGED; } else if (!untracked_cache && the_index.untracked) { the_index.untracked = NULL; diff --git a/dir.c b/dir.c index 2cf8f35cc7..e9eaf97efe 100644 --- a/dir.c +++ b/dir.c @@ -1794,6 +1794,40 @@ static int treat_leading_path(struct dir_struct *dir, return rc; } +static const char *get_ident_string(void) +{ + static struct strbuf sb = STRBUF_INIT; + struct utsname uts; + + if (sb.len) + return sb.buf; + if (uname(&uts)) + die_errno(_("failed to get kernel name and information")); + strbuf_addf(&sb, "Location %s, system %s %s %s", get_git_work_tree(), + uts.sysname, uts.release, uts.version); + return sb.buf; +} + +static int ident_in_untracked(const struct untracked_cache *uc) +{ + const char *end = uc->ident.buf + uc->ident.len; + const char *p = uc->ident.buf; + + for (p = uc->ident.buf; p < end; p += strlen(p) + 1) + if (!strcmp(p, get_ident_string())) + return 1; + return 0; +} + +void add_untracked_ident(struct untracked_cache *uc) +{ + if (ident_in_untracked(uc)) + return; + strbuf_addstr(&uc->ident, get_ident_string()); + /* this strbuf contains a list of strings, save NUL too */ + strbuf_addch(&uc->ident, 0); +} + static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir, int base_len, const struct pathspec *pathspec) @@ -1860,6 +1894,11 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d if (ce_skip_worktree(active_cache[i])) return NULL; + if (!ident_in_untracked(dir->untracked)) { + warning(_("Untracked cache is disabled on this system.")); + return NULL; + } + if (!dir->untracked->root) { const int len = sizeof(*dir->untracked->root); dir->untracked->root = xmalloc(len); @@ -2268,6 +2307,11 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1); ouc->dir_flags = htonl(untracked->dir_flags); memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1); + + varint_len = encode_varint(untracked->ident.len, varbuf); + strbuf_add(out, varbuf, varint_len); + strbuf_add(out, untracked->ident.buf, untracked->ident.len); + strbuf_add(out, ouc, ouc_size(len)); free(ouc); ouc = NULL; @@ -2453,17 +2497,26 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long struct untracked_cache *uc; struct read_data rd; const unsigned char *next = data, *end = (const unsigned char *)data + sz; - int len; + const char *ident; + int ident_len, len; if (sz <= 1 || end[-1] != '\0') return NULL; end--; + ident_len = decode_varint(&next); + if (next + ident_len > end) + return NULL; + ident = (const char *)next; + next += ident_len; + ouc = (const struct ondisk_untracked_cache *)next; if (next + ouc_size(0) > end) return NULL; uc = xcalloc(1, sizeof(*uc)); + strbuf_init(&uc->ident, ident_len); + strbuf_add(&uc->ident, ident, ident_len); load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat, ouc->info_exclude_sha1); load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat, diff --git a/dir.h b/dir.h index 2ce7dd3d27..6ccbc454ac 100644 --- a/dir.h +++ b/dir.h @@ -127,6 +127,7 @@ struct untracked_cache { struct sha1_stat ss_info_exclude; struct sha1_stat ss_excludes_file; const char *exclude_per_dir; + struct strbuf ident; /* * dir_struct#flags must match dir_flags or the untracked * cache is ignored. @@ -305,4 +306,5 @@ void untracked_cache_add_to_index(struct index_state *, const char *); void free_untracked_cache(struct untracked_cache *); struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz); void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked); +void add_untracked_ident(struct untracked_cache *); #endif diff --git a/git-compat-util.h b/git-compat-util.h index a3095be962..1663537791 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -134,6 +134,7 @@ #elif defined(_MSC_VER) #include "compat/msvc.h" #else +#include #include #include #include diff --git a/test-dump-untracked-cache.c b/test-dump-untracked-cache.c index 710441ee05..25d855d98b 100644 --- a/test-dump-untracked-cache.c +++ b/test-dump-untracked-cache.c @@ -44,6 +44,7 @@ int main(int ac, char **av) { struct untracked_cache *uc; struct strbuf base = STRBUF_INIT; + setup_git_directory(); if (read_cache() < 0) die("unable to read index file"); uc = the_index.untracked; From aeb6f8b3a2bbfd8b48a967139fbf4581e5345182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:47 +0700 Subject: [PATCH 24/24] git-status.txt: advertisement for untracked cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a good user sees the "too long, consider -uno" advice when running `git status`, they should check out the man page to find out more. This change suggests they try untracked cache before -uno. Helped-by: brian m. carlson Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-status.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt index 4d8d530d35..56573bd86e 100644 --- a/Documentation/git-status.txt +++ b/Documentation/git-status.txt @@ -58,7 +58,10 @@ When `-u` option is not used, untracked files and directories are shown (i.e. the same as specifying `normal`), to help you avoid forgetting to add newly created files. Because it takes extra work to find untracked files in the filesystem, this mode may take some -time in a large working tree. You can use `no` to have `git status` +time in a large working tree. +Consider enabling untracked cache and split index if supported (see +`git update-index --untracked-cache` and `git update-index +--split-index`), Otherwise you can use `no` to have `git status` return more quickly without showing untracked files. + The default can be changed using the status.showUntrackedFiles