From 497c32136f80aca5f724bf70c2a0f44b63cb79f1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 26 Apr 2006 22:05:05 -0700 Subject: [PATCH 1/3] update-index: when --unresolve, smudge the relevant cache-tree entries. Signed-off-by: Junio C Hamano --- update-index.c | 1 + 1 file changed, 1 insertion(+) diff --git a/update-index.c b/update-index.c index 258a88cbea..1c1f13bd70 100644 --- a/update-index.c +++ b/update-index.c @@ -562,6 +562,7 @@ static int unresolve_one(const char *path) goto free_return; } + cache_tree_invalidate_path(active_cache_tree, path); remove_file_from_cache(path); if (add_cache_entry(ce_2, ADD_CACHE_OK_TO_ADD)) { error("%s: cannot add our version to the index.", path); From b34c39cf31e370dad3bcfba29ee8cd023c40fd6b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 00:13:34 -0700 Subject: [PATCH 2/3] read-tree: teach 1 and 2 way merges about cache-tree. This teaches one-way and two-way "read-tree -m" (and its special form, "read-tree --reset" as well) not to discard cache-tree but invalidate only the changed parts of the tree. When switching between related branches, this helps the eventual commit (i.e. write-tree) by keeping cache-tree valid as much as possible. This does not prime cache-tree yet, but we ought to be able to do that for no-merge (i.e. reading from a tree object) case and, and also perhaps 1 way merge case. With this patch applied, switching between the tip of Linux 2.6 kernel tree and a branch that touches one path (fs/ext3/Makefile) from it invalidates only 3 paths out of 1201 cache-tree entries in the index, and subsequent write-tree takes about a half as much time as before. Signed-off-by: Junio C Hamano --- read-tree.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/read-tree.c b/read-tree.c index 1c65101291..ab516824ef 100644 --- a/read-tree.c +++ b/read-tree.c @@ -422,6 +422,12 @@ static void verify_uptodate(struct cache_entry *ce) die("Entry '%s' not uptodate. Cannot merge.", ce->name); } +static void invalidate_ce_path(struct cache_entry *ce) +{ + if (ce) + cache_tree_invalidate_path(active_cache_tree, ce->name); +} + static int merged_entry(struct cache_entry *merge, struct cache_entry *old) { merge->ce_flags |= htons(CE_UPDATE); @@ -437,6 +443,7 @@ static int merged_entry(struct cache_entry *merge, struct cache_entry *old) *merge = *old; } else { verify_uptodate(old); + invalidate_ce_path(old); } } merge->ce_flags &= ~htons(CE_STAGEMASK); @@ -450,6 +457,7 @@ static int deleted_entry(struct cache_entry *ce, struct cache_entry *old) verify_uptodate(old); ce->ce_mode = 0; add_cache_entry(ce, ADD_CACHE_OK_TO_ADD); + invalidate_ce_path(ce); return 1; } @@ -684,8 +692,10 @@ static int oneway_merge(struct cache_entry **src) return error("Cannot do a oneway merge of %d trees", merge_size); - if (!a) + if (!a) { + invalidate_ce_path(old); return 0; + } if (old && same(old, a)) { return keep_entry(old); } @@ -704,6 +714,7 @@ static int read_cache_unmerged(void) struct cache_entry *ce = active_cache[i]; if (ce_stage(ce)) { deleted++; + invalidate_ce_path(ce); continue; } if (deleted) @@ -815,10 +826,9 @@ int main(int argc, char **argv) fn = twoway_merge; break; case 3: - fn = threeway_merge; - break; default: fn = threeway_merge; + cache_tree_free(&active_cache_tree); break; } @@ -829,7 +839,6 @@ int main(int argc, char **argv) } unpack_trees(fn); - cache_tree_free(&active_cache_tree); if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("unable to write new index file"); From 7927a55d5bde25702dca4fb1a7d6eb7ef61110ba Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 01:33:07 -0700 Subject: [PATCH 3/3] read-tree: teach 1-way merege and plain read to prime cache-tree. This teaches read-tree to fully populate valid cache-tree when reading a tree from scratch, or reading a single tree into an existing index, reusing only the cached stat information (i.e. one-way merge). We have already taught update-index about cache-tree, so "git checkout" followed by updates to a few path followed by a "git commit" would become very efficient. Signed-off-by: Junio C Hamano --- cache-tree.c | 11 ++++++++--- cache-tree.h | 1 + read-tree.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index d8438d67d7..35740b3647 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -91,6 +91,12 @@ static struct cache_tree_sub *find_subtree(struct cache_tree *it, return down; } +struct cache_tree_sub *cache_tree_sub(struct cache_tree *it, const char *path) +{ + int pathlen = strlen(path); + return find_subtree(it, path, pathlen, 1); +} + void cache_tree_invalidate_path(struct cache_tree *it, const char *path) { /* a/b/c @@ -476,12 +482,11 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) struct cache_tree *sub; struct cache_tree_sub *subtree; const char *name = buf; - int namelen; + sub = read_one(&buf, &size); if (!sub) goto free_return; - namelen = strlen(name); - subtree = find_subtree(it, name, namelen, 1); + subtree = cache_tree_sub(it, name); subtree->cache_tree = sub; } if (subtree_nr != it->subtree_nr) diff --git a/cache-tree.h b/cache-tree.h index c70a7699a9..5d824df2ec 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -20,6 +20,7 @@ struct cache_tree { struct cache_tree *cache_tree(void); void cache_tree_free(struct cache_tree **); void cache_tree_invalidate_path(struct cache_tree *, const char *); +struct cache_tree_sub *cache_tree_sub(struct cache_tree *, const char *); void *cache_tree_write(struct cache_tree *root, unsigned long *size_p); struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); diff --git a/read-tree.c b/read-tree.c index ab516824ef..66c0120f13 100644 --- a/read-tree.c +++ b/read-tree.c @@ -725,6 +725,39 @@ static int read_cache_unmerged(void) return deleted; } +static void prime_cache_tree_rec(struct cache_tree *it, struct tree *tree) +{ + struct tree_entry_list *ent; + int cnt; + + memcpy(it->sha1, tree->object.sha1, 20); + for (cnt = 0, ent = tree->entries; ent; ent = ent->next) { + if (!ent->directory) + cnt++; + else { + struct cache_tree_sub *sub; + struct tree *subtree = (struct tree *)ent->item.tree; + if (!subtree->object.parsed) + parse_tree(subtree); + sub = cache_tree_sub(it, ent->name); + sub->cache_tree = cache_tree(); + prime_cache_tree_rec(sub->cache_tree, subtree); + cnt += sub->cache_tree->entry_count; + } + } + it->entry_count = cnt; +} + +static void prime_cache_tree(void) +{ + struct tree *tree = (struct tree *)trees->item; + if (!tree) + return; + active_cache_tree = cache_tree(); + prime_cache_tree_rec(active_cache_tree, tree); + +} + static const char read_tree_usage[] = "git-read-tree ( | -m [--aggressive] [-u | -i] [ []])"; static struct cache_file cache_file; @@ -839,6 +872,18 @@ int main(int argc, char **argv) } unpack_trees(fn); + + /* + * When reading only one tree (either the most basic form, + * "-m ent" or "--reset ent" form), we can obtain a fully + * valid cache-tree because the index must match exactly + * what came from the tree. + */ + if (trees->item && (!merge || (stage == 2))) { + cache_tree_free(&active_cache_tree); + prime_cache_tree(); + } + if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("unable to write new index file");