From 1af1c2b63db6a413fbeb9b08cd55dcb735d7597d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 16:52:08 -0700 Subject: [PATCH 1/5] read-cache/write-cache: optionally return cache checksum SHA1. read_cache_1() and write_cache_1() takes an extra parameter *sha1 that returns the checksum of the index file when non-NULL. Signed-off-by: Junio C Hamano --- cache.h | 5 ++++- read-cache.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/cache.h b/cache.h index 69801b02d5..8c9947ef49 100644 --- a/cache.h +++ b/cache.h @@ -138,8 +138,11 @@ extern const char *prefix_filename(const char *prefix, int len, const char *path #define alloc_nr(x) (((x)+16)*3/2) /* Initialize and use the cache information */ +extern int read_cache_1(unsigned char *); +extern int write_cache_1(int, struct cache_entry **, int, unsigned char *); extern int read_cache(void); -extern int write_cache(int newfd, struct cache_entry **cache, int entries); +extern int write_cache(int, struct cache_entry **, int); + extern int cache_name_pos(const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ diff --git a/read-cache.c b/read-cache.c index f97f92d90a..50e094e053 100644 --- a/read-cache.c +++ b/read-cache.c @@ -496,10 +496,12 @@ int add_cache_entry(struct cache_entry *ce, int option) return 0; } -static int verify_hdr(struct cache_header *hdr, unsigned long size) +static int verify_hdr(struct cache_header *hdr, unsigned long size, unsigned char *sha1) { SHA_CTX c; - unsigned char sha1[20]; + unsigned char sha1_buf[20]; + if (!sha1) + sha1 = sha1_buf; if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) return error("bad signature"); @@ -513,7 +515,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size) return 0; } -int read_cache(void) +int read_cache_1(unsigned char *cache_sha1) { int fd, i; struct stat st; @@ -547,7 +549,7 @@ int read_cache(void) die("index file mmap failed (%s)", strerror(errno)); hdr = map; - if (verify_hdr(hdr, size) < 0) + if (verify_hdr(hdr, size, cache_sha1) < 0) goto unmap; active_nr = ntohl(hdr->hdr_entries); @@ -595,7 +597,7 @@ static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len) return 0; } -static int ce_flush(SHA_CTX *context, int fd) +static int ce_flush(SHA_CTX *context, int fd, unsigned char *sha1) { unsigned int left = write_buffer_len; @@ -612,7 +614,8 @@ static int ce_flush(SHA_CTX *context, int fd) } /* Append the SHA1 signature at the end */ - SHA1_Final(write_buffer + left, context); + SHA1_Final(sha1, context); + memcpy(write_buffer + left, sha1, 20); left += 20; if (write(fd, write_buffer, left) != left) return -1; @@ -663,11 +666,14 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce) } } -int write_cache(int newfd, struct cache_entry **cache, int entries) +int write_cache_1(int newfd, struct cache_entry **cache, int entries, + unsigned char *cache_sha1) { SHA_CTX c; struct cache_header hdr; int i, removed; + int status; + unsigned char sha1[20]; for (i = removed = 0; i < entries; i++) if (!cache[i]->ce_mode) @@ -691,5 +697,18 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; } - return ce_flush(&c, newfd); + status = ce_flush(&c, newfd, sha1); + if (cache_sha1) + memcpy(cache_sha1, sha1, 20); + return status; +} + +int read_cache(void) +{ + return read_cache_1(NULL); +} + +int write_cache(int newfd, struct cache_entry **cache, int entries) +{ + return write_cache_1(newfd, cache, entries, NULL); } From 749864627c2d3c33bbc56d7ba0b5542af698cc40 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 16:52:20 -0700 Subject: [PATCH 2/5] Add cache-tree. The cache_tree data structure is to cache tree object names that would result from the current index file. The idea is to have an optional file to record each tree object name that corresponds to a directory path in the cache when we run write_cache(), and read it back when we run read_cache(). During various index manupulations, we selectively invalidate the parts so that the next write-tree can bypass regenerating tree objects for unchanged parts of the directory hierarchy. We could perhaps make the cache-tree data an optional part of the index file, but that would involve the index format updates, so unless we need it for performance reasons, the current plan is to use a separate file, $GIT_DIR/index.aux to store this information and link it with the index file with the checksum that is already used for index file integrity check. Signed-off-by: Junio C Hamano --- Makefile | 2 +- cache-tree.c | 519 +++++++++++++++++++++++++++++++++++++++++++++++++++ cache-tree.h | 29 +++ 3 files changed, 549 insertions(+), 1 deletion(-) create mode 100644 cache-tree.c create mode 100644 cache-tree.h diff --git a/Makefile b/Makefile index d9a3a82fe2..518c3c176b 100644 --- a/Makefile +++ b/Makefile @@ -204,7 +204,7 @@ DIFF_OBJS = \ diffcore-delta.o log-tree.o LIB_OBJS = \ - blob.o commit.o connect.o csum-file.o \ + blob.o commit.o connect.o csum-file.o cache-tree.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ diff --git a/cache-tree.c b/cache-tree.c new file mode 100644 index 0000000000..f6d1dd1d7f --- /dev/null +++ b/cache-tree.c @@ -0,0 +1,519 @@ +#include "cache.h" +#include "tree.h" +#include "cache-tree.h" + +#define DEBUG 0 + +struct cache_tree *cache_tree(void) +{ + struct cache_tree *it = xcalloc(1, sizeof(struct cache_tree)); + it->entry_count = -1; + return it; +} + +void cache_tree_free(struct cache_tree *it) +{ + int i; + + if (!it) + return; + for (i = 0; i < it->subtree_nr; i++) + cache_tree_free(it->down[i]->cache_tree); + free(it->down); + free(it); +} + +static struct cache_tree_sub *find_subtree(struct cache_tree *it, + const char *path, + int pathlen, + int create) +{ + int i; + struct cache_tree_sub *down; + for (i = 0; i < it->subtree_nr; i++) { + down = it->down[i]; + if (down->namelen == pathlen && + !memcmp(down->name, path, pathlen)) + return down; + } + if (!create) + return NULL; + if (it->subtree_alloc <= it->subtree_nr) { + it->subtree_alloc = alloc_nr(it->subtree_alloc); + it->down = xrealloc(it->down, it->subtree_alloc * + sizeof(*it->down)); + } + down = xmalloc(sizeof(*down) + pathlen + 1); + down->cache_tree = NULL; /* cache_tree(); */ + down->namelen = pathlen; + memcpy(down->name, path, pathlen); + down->name[pathlen] = 0; /* not strictly needed */ + it->down[it->subtree_nr++] = down; + return down; +} + +void cache_tree_invalidate_path(struct cache_tree *it, const char *path) +{ + /* a/b/c + * ==> invalidate self + * ==> find "a", have it invalidate "b/c" + * a + * ==> invalidate self + * ==> if "a" exists as a subtree, remove it. + */ + const char *slash; + int namelen; + struct cache_tree_sub *down; + + if (!it) + return; + slash = strchr(path, '/'); + it->entry_count = -1; + if (!slash) { + int i; + namelen = strlen(path); + for (i = 0; i < it->subtree_nr; i++) { + if (it->down[i]->namelen == namelen && + !memcmp(it->down[i]->name, path, namelen)) + break; + } + if (i < it->subtree_nr) { + cache_tree_free(it->down[i]->cache_tree); + free(it->down[i]); + /* 0 1 2 3 4 5 + * ^ ^subtree_nr = 6 + * i + * move 4 and 5 up one place (2 entries) + * 2 = 6 - 3 - 1 = subtree_nr - i - 1 + */ + memmove(it->down+i, it->down+i+1, + sizeof(struct cache_tree_sub *) * + (it->subtree_nr - i - 1)); + it->subtree_nr--; + } + return; + } + namelen = slash - path; + down = find_subtree(it, path, namelen, 0); + if (down) + cache_tree_invalidate_path(down->cache_tree, slash + 1); +} + +static int verify_cache(struct cache_entry **cache, + int entries) +{ + int i, funny; + + /* Verify that the tree is merged */ + funny = 0; + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + if (ce_stage(ce)) { + if (10 < ++funny) { + fprintf(stderr, "...\n"); + break; + } + fprintf(stderr, "%s: unmerged (%s)\n", + ce->name, sha1_to_hex(ce->sha1)); + } + } + if (funny) + return -1; + + /* Also verify that the cache does not have path and path/file + * at the same time. At this point we know the cache has only + * stage 0 entries. + */ + funny = 0; + for (i = 0; i < entries - 1; i++) { + /* path/file always comes after path because of the way + * the cache is sorted. Also path can appear only once, + * which means conflicting one would immediately follow. + */ + const char *this_name = cache[i]->name; + const char *next_name = cache[i+1]->name; + int this_len = strlen(this_name); + if (this_len < strlen(next_name) && + strncmp(this_name, next_name, this_len) == 0 && + next_name[this_len] == '/') { + if (10 < ++funny) { + fprintf(stderr, "...\n"); + break; + } + fprintf(stderr, "You have both %s and %s\n", + this_name, next_name); + } + } + if (funny) + return -1; + return 0; +} + +static void discard_unused_subtrees(struct cache_tree *it) +{ + struct cache_tree_sub **down = it->down; + int nr = it->subtree_nr; + int dst, src; + for (dst = src = 0; src < nr; src++) { + struct cache_tree_sub *s = down[src]; + if (s->used) + down[dst++] = s; + else { + cache_tree_free(s->cache_tree); + free(s); + it->subtree_nr--; + } + } +} + +static int update_one(struct cache_tree *it, + struct cache_entry **cache, + int entries, + const char *base, + int baselen, + int missing_ok) +{ + unsigned long size, offset; + char *buffer; + int i; + + if (0 <= it->entry_count) + return it->entry_count; + + /* + * We first scan for subtrees and update them; we start by + * marking existing subtrees -- the ones that are unmarked + * should not be in the result. + */ + for (i = 0; i < it->subtree_nr; i++) + it->down[i]->used = 0; + + /* + * Find the subtrees and update them. + */ + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + struct cache_tree_sub *sub; + const char *path, *slash; + int pathlen, sublen, subcnt; + + path = ce->name; + pathlen = ce_namelen(ce); + if (pathlen <= baselen || memcmp(base, path, baselen)) + break; /* at the end of this level */ + + slash = strchr(path + baselen, '/'); + if (!slash) + continue; + /* + * a/bbb/c (base = a/, slash = /c) + * ==> + * path+baselen = bbb/c, sublen = 3 + */ + sublen = slash - (path + baselen); + sub = find_subtree(it, path + baselen, sublen, 1); + if (!sub->cache_tree) + sub->cache_tree = cache_tree(); + subcnt = update_one(sub->cache_tree, + cache + i, entries - i, + path, + baselen + sublen + 1, + missing_ok); + i += subcnt - 1; + sub->used = 1; + } + + discard_unused_subtrees(it); + + /* + * Then write out the tree object for this level. + */ + size = 8192; + buffer = xmalloc(size); + offset = 0; + + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + struct cache_tree_sub *sub; + const char *path, *slash; + int pathlen, entlen; + const unsigned char *sha1; + unsigned mode; + + path = ce->name; + pathlen = ce_namelen(ce); + if (pathlen <= baselen || memcmp(base, path, baselen)) + break; /* at the end of this level */ + + slash = strchr(path + baselen, '/'); + if (slash) { + entlen = slash - (path + baselen); + sub = find_subtree(it, path + baselen, entlen, 0); + if (!sub) + die("cache-tree.c: '%.*s' in '%s' not found", + entlen, path + baselen, path); + i += sub->cache_tree->entry_count - 1; + sha1 = sub->cache_tree->sha1; + mode = S_IFDIR; + } + else { + sha1 = ce->sha1; + mode = ntohl(ce->ce_mode); + entlen = pathlen - baselen; + } + if (!missing_ok && !has_sha1_file(sha1)) + return error("invalid object %s", sha1_to_hex(sha1)); + + if (!ce->ce_mode) + continue; /* entry being removed */ + + if (size < offset + entlen + 100) { + size = alloc_nr(offset + entlen + 100); + buffer = xrealloc(buffer, size); + } + offset += sprintf(buffer + offset, + "%o %.*s", mode, entlen, path + baselen); + buffer[offset++] = 0; + memcpy(buffer + offset, sha1, 20); + offset += 20; + +#if DEBUG + fprintf(stderr, "cache-tree %o %.*s\n", + mode, entlen, path + baselen); +#endif + } + + write_sha1_file(buffer, offset, tree_type, it->sha1); + free(buffer); + it->entry_count = i; +#if DEBUG + fprintf(stderr, "cache-tree (%d ent, %d subtree) %s\n", + it->entry_count, it->subtree_nr, + sha1_to_hex(it->sha1)); +#endif + return i; +} + +int cache_tree_update(struct cache_tree *it, + struct cache_entry **cache, + int entries, + int missing_ok) +{ + int i; + i = verify_cache(cache, entries); + if (i) + return i; + i = update_one(it, cache, entries, "", 0, missing_ok); + if (i < 0) + return i; + return 0; +} + +static void *write_one(struct cache_tree *it, + char *path, + int pathlen, + char *buffer, + unsigned long *size, + unsigned long *offset) +{ + int i; + + /* One "cache-tree" entry consists of the following: + * path (NUL terminated) + * entry_count, subtree_nr ("%d %d\n") + * tree-sha1 (missing if invalid) + * subtree_nr "cache-tree" entries for subtrees. + */ + if (*size < *offset + pathlen + 100) { + *size = alloc_nr(*offset + pathlen + 100); + buffer = xrealloc(buffer, *size); + } + *offset += sprintf(buffer + *offset, "%.*s%c%d %d\n", + pathlen, path, 0, + it->entry_count, it->subtree_nr); + +#if DEBUG + if (0 <= it->entry_count) + fprintf(stderr, "cache-tree <%.*s> (%d ent, %d subtree) %s\n", + pathlen, path, it->entry_count, it->subtree_nr, + sha1_to_hex(it->sha1)); + else + fprintf(stderr, "cache-tree <%.*s> (%d subtree) invalid\n", + pathlen, path, it->subtree_nr); +#endif + + if (0 <= it->entry_count) { + memcpy(buffer + *offset, it->sha1, 20); + *offset += 20; + } + for (i = 0; i < it->subtree_nr; i++) { + struct cache_tree_sub *down = it->down[i]; + buffer = write_one(down->cache_tree, down->name, down->namelen, + buffer, size, offset); + } + return buffer; +} + +static void *cache_tree_write(const unsigned char *cache_sha1, + struct cache_tree *root, + unsigned long *offset_p) +{ + char path[PATH_MAX]; + unsigned long size = 8192; + char *buffer = xmalloc(size); + + /* the cache checksum of the corresponding index file. */ + memcpy(buffer, cache_sha1, 20); + *offset_p = 20; + path[0] = 0; + return write_one(root, path, 0, buffer, &size, offset_p); +} + +static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) +{ + const char *buf = *buffer; + unsigned long size = *size_p; + struct cache_tree *it; + int i, subtree_nr; + + it = NULL; + /* skip name, but make sure name exists */ + while (size && *buf) { + size--; + buf++; + } + if (!size) + goto free_return; + buf++; size--; + it = cache_tree(); + if (sscanf(buf, "%d %d\n", &it->entry_count, &subtree_nr) != 2) + goto free_return; + while (size && *buf && *buf != '\n') { + size--; + buf++; + } + if (!size) + goto free_return; + buf++; size--; + if (0 <= it->entry_count) { + if (size < 20) + goto free_return; + memcpy(it->sha1, buf, 20); + buf += 20; + size -= 20; + } + +#if DEBUG + if (0 <= it->entry_count) + fprintf(stderr, "cache-tree <%s> (%d ent, %d subtree) %s\n", + *buffer, it->entry_count, subtree_nr, + sha1_to_hex(it->sha1)); + else + fprintf(stderr, "cache-tree <%s> (%d subtrees) invalid\n", + *buffer, subtree_nr); +#endif + + /* + * Just a heuristic -- we do not add directories that often but + * we do not want to have to extend it immediately when we do, + * hence +2. + */ + it->subtree_alloc = subtree_nr + 2; + it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *)); + for (i = 0; i < subtree_nr; i++) { + /* read each subtree */ + struct cache_tree *sub; + const char *name = buf; + int namelen; + sub = read_one(&buf, &size); + if (!sub) + goto free_return; + namelen = strlen(name); + it->down[i] = find_subtree(it, name, namelen, 1); + it->down[i]->cache_tree = sub; + } + if (subtree_nr != it->subtree_nr) + die("cache-tree: internal error"); + *buffer = buf; + *size_p = size; + return it; + + free_return: + cache_tree_free(it); + return NULL; +} + +static struct cache_tree *cache_tree_read(unsigned char *sha1, + const char *buffer, + unsigned long size) +{ + /* check the cache-tree matches the index */ + if (memcmp(buffer, sha1, 20)) + return NULL; /* checksum mismatch */ + if (buffer[20]) + return NULL; /* not the whole tree */ + buffer += 20; + size -= 20; + return read_one(&buffer, &size); +} + +struct cache_tree *read_cache_tree(unsigned char *sha1) +{ + int fd; + struct stat st; + char path[PATH_MAX]; + unsigned long size = 0; + void *map; + struct cache_tree *it; + + sprintf(path, "%s.aux", get_index_file()); + fd = open(path, O_RDONLY); + if (fd < 0) + return cache_tree(); + + if (fstat(fd, &st)) + return cache_tree(); + size = st.st_size; + map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (map == MAP_FAILED) + return cache_tree(); + it = cache_tree_read(sha1, map, size); + munmap(map, size); + if (!it) + return cache_tree(); + return it; +} + +int write_cache_tree(const unsigned char *sha1, struct cache_tree *root) +{ + char path[PATH_MAX]; + unsigned long size = 0; + void *buf, *buffer; + int fd, ret = -1; + + sprintf(path, "%s.aux", get_index_file()); + if (!root) { + unlink(path); + return -1; + } + fd = open(path, O_WRONLY|O_CREAT, 0666); + if (fd < 0) + return -1; + buffer = buf = cache_tree_write(sha1, root, &size); + while (size) { + int written = xwrite(fd, buf, size); + if (written <= 0) + goto fail; + buf += written; + size -= written; + } + ret = 0; + + fail: + close(fd); + free(buffer); + if (ret) + unlink(path); + return ret; +} diff --git a/cache-tree.h b/cache-tree.h new file mode 100644 index 0000000000..7b149afdc5 --- /dev/null +++ b/cache-tree.h @@ -0,0 +1,29 @@ +#ifndef CACHE_TREE_H +#define CACHE_TREE_H + +struct cache_tree; +struct cache_tree_sub { + struct cache_tree *cache_tree; + int namelen; + int used; + char name[FLEX_ARRAY]; +}; + +struct cache_tree { + int entry_count; /* negative means "invalid" */ + unsigned char sha1[20]; + int subtree_nr; + int subtree_alloc; + struct cache_tree_sub **down; +}; + +struct cache_tree *cache_tree(void); +void cache_tree_free(struct cache_tree *); +void cache_tree_invalidate_path(struct cache_tree *, const char *); + +int write_cache_tree(const unsigned char *, struct cache_tree *); +struct cache_tree *read_cache_tree(unsigned char *); +int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); + + +#endif From a52139b47e505e74e23a02f5324485e11dfe4ef9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 16:52:35 -0700 Subject: [PATCH 3/5] Update write-tree to use cache-tree. The updated write-tree reads from $GIT_DIR/index.aux to pick up subtree objects information, updates the cache-tree with the index, and updates index.aux file after writing a tree out of the index file. Until update-index and other programs that modify the index are updated to maintain index.aux file, the index.aux file written by the last write-tree will become stale immediately after they update the index, which will result in the whole tree recomputation just like the original write-tree. The idea is to convert those commands to invalidate cache-tree whenever they touch the index entries, and write updated index.aux out. After the index is updated with them, write-tree will be able to reuse the parts of the cache-tree that have not been touched. Signed-off-by: Junio C Hamano --- write-tree.c | 137 +++++---------------------------------------------- 1 file changed, 12 insertions(+), 125 deletions(-) diff --git a/write-tree.c b/write-tree.c index dcad6e6670..cef0c5bb42 100644 --- a/write-tree.c +++ b/write-tree.c @@ -5,96 +5,23 @@ */ #include "cache.h" #include "tree.h" +#include "cache-tree.h" + +static unsigned char active_cache_sha1[20]; +static struct cache_tree *active_cache_tree; static int missing_ok = 0; -static int check_valid_sha1(unsigned char *sha1) -{ - int ret; - - /* If we were anal, we'd check that the sha1 of the contents actually matches */ - ret = has_sha1_file(sha1); - if (ret == 0) - perror(sha1_file_name(sha1)); - return ret ? 0 : -1; -} - -static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1) -{ - unsigned char subdir_sha1[20]; - unsigned long size, offset; - char *buffer; - int nr; - - /* Guess at some random initial size */ - size = 8192; - buffer = xmalloc(size); - offset = 0; - - nr = 0; - while (nr < maxentries) { - struct cache_entry *ce = cachep[nr]; - const char *pathname = ce->name, *filename, *dirname; - int pathlen = ce_namelen(ce), entrylen; - unsigned char *sha1; - unsigned int mode; - - /* Did we hit the end of the directory? Return how many we wrote */ - if (baselen >= pathlen || memcmp(base, pathname, baselen)) - break; - - sha1 = ce->sha1; - mode = ntohl(ce->ce_mode); - - /* Do we have _further_ subdirectories? */ - filename = pathname + baselen; - dirname = strchr(filename, '/'); - if (dirname) { - int subdir_written; - - subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1); - nr += subdir_written; - - /* Now we need to write out the directory entry into this tree.. */ - mode = S_IFDIR; - pathlen = dirname - pathname; - - /* ..but the directory entry doesn't count towards the total count */ - nr--; - sha1 = subdir_sha1; - } - - if (!missing_ok && check_valid_sha1(sha1) < 0) - exit(1); - - entrylen = pathlen - baselen; - if (offset + entrylen + 100 > size) { - size = alloc_nr(offset + entrylen + 100); - buffer = xrealloc(buffer, size); - } - offset += sprintf(buffer + offset, "%o %.*s", mode, entrylen, filename); - buffer[offset++] = 0; - memcpy(buffer + offset, sha1, 20); - offset += 20; - nr++; - } - - write_sha1_file(buffer, offset, tree_type, returnsha1); - free(buffer); - return nr; -} - static const char write_tree_usage[] = "git-write-tree [--missing-ok]"; int main(int argc, char **argv) { - int i, funny; int entries; - unsigned char sha1[20]; - + setup_git_directory(); - entries = read_cache(); + entries = read_cache_1(active_cache_sha1); + active_cache_tree = read_cache_tree(active_cache_sha1); if (argc == 2) { if (!strcmp(argv[1], "--missing-ok")) missing_ok = 1; @@ -108,51 +35,11 @@ int main(int argc, char **argv) if (entries < 0) die("git-write-tree: error reading cache"); - /* Verify that the tree is merged */ - funny = 0; - for (i = 0; i < entries; i++) { - struct cache_entry *ce = active_cache[i]; - if (ce_stage(ce)) { - if (10 < ++funny) { - fprintf(stderr, "...\n"); - break; - } - fprintf(stderr, "%s: unmerged (%s)\n", ce->name, sha1_to_hex(ce->sha1)); - } - } - if (funny) - die("git-write-tree: not able to write tree"); + if (cache_tree_update(active_cache_tree, active_cache, active_nr, + missing_ok)) + die("git-write-tree: error building trees"); + write_cache_tree(active_cache_sha1, active_cache_tree); - /* Also verify that the cache does not have path and path/file - * at the same time. At this point we know the cache has only - * stage 0 entries. - */ - funny = 0; - for (i = 0; i < entries - 1; i++) { - /* path/file always comes after path because of the way - * the cache is sorted. Also path can appear only once, - * which means conflicting one would immediately follow. - */ - const char *this_name = active_cache[i]->name; - const char *next_name = active_cache[i+1]->name; - int this_len = strlen(this_name); - if (this_len < strlen(next_name) && - strncmp(this_name, next_name, this_len) == 0 && - next_name[this_len] == '/') { - if (10 < ++funny) { - fprintf(stderr, "...\n"); - break; - } - fprintf(stderr, "You have both %s and %s\n", - this_name, next_name); - } - } - if (funny) - die("git-write-tree: not able to write tree"); - - /* Ok, write it out */ - if (write_tree(active_cache, entries, "", 0, sha1) != entries) - die("git-write-tree: internal error"); - printf("%s\n", sha1_to_hex(sha1)); + printf("%s\n", sha1_to_hex(active_cache_tree->sha1)); return 0; } From 03ac6e64651e4b5ca0c2164a23b5f345f2c03af4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 16:52:52 -0700 Subject: [PATCH 4/5] Invalidate cache-tree entries for touched paths in git-apply. This updates git-apply to maintain cache-tree information. With this and the previous write-tree patch, repeated "apply --index" followed by "write-tree" on a huge tree will hopefully become faster. Signed-off-by: Junio C Hamano --- apply.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/apply.c b/apply.c index 269210a578..e283df38aa 100644 --- a/apply.c +++ b/apply.c @@ -8,9 +8,14 @@ */ #include #include "cache.h" +#include "cache-tree.h" #include "quote.h" #include "blob.h" +static unsigned char active_cache_sha1[20]; +static struct cache_tree *active_cache_tree; + + // --check turns on checking that the working tree matches the // files that are being modified, but doesn't apply the patch // --stat does just a diffstat, and doesn't actually apply @@ -1717,6 +1722,7 @@ static void remove_file(struct patch *patch) if (write_index) { if (remove_file_from_cache(patch->old_name) < 0) die("unable to remove %s from index", patch->old_name); + cache_tree_invalidate_path(active_cache_tree, patch->old_name); } unlink(patch->old_name); } @@ -1813,8 +1819,9 @@ static void create_file(struct patch *patch) if (!mode) mode = S_IFREG | 0644; - create_one_file(path, mode, buf, size); + create_one_file(path, mode, buf, size); add_index_file(path, mode, buf, size); + cache_tree_invalidate_path(active_cache_tree, path); } static void write_out_one_result(struct patch *patch) @@ -1912,8 +1919,9 @@ static int apply_patch(int fd, const char *filename) if (write_index) newfd = hold_index_file_for_update(&cache_file, get_index_file()); if (check_index) { - if (read_cache() < 0) + if (read_cache_1(active_cache_sha1) < 0) die("unable to read index file"); + active_cache_tree = read_cache_tree(active_cache_sha1); } if ((check || apply) && check_patch_list(list) < 0) @@ -1923,9 +1931,11 @@ static int apply_patch(int fd, const char *filename) write_out_results(list, skipped_patch); if (write_index) { - if (write_cache(newfd, active_cache, active_nr) || + if (write_cache_1(newfd, active_cache, active_nr, + active_cache_sha1) || commit_index_file(&cache_file)) die("Unable to write new cachefile"); + write_cache_tree(active_cache_sha1, active_cache_tree); } if (show_index_info) From a6e5642f39db2113785c4f22add7e16d559b8d55 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 24 Apr 2006 00:23:54 -0700 Subject: [PATCH 5/5] Use cache-tree in update-index. Signed-off-by: Junio C Hamano --- update-index.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/update-index.c b/update-index.c index 1efac27c6b..86f53948fc 100644 --- a/update-index.c +++ b/update-index.c @@ -6,6 +6,11 @@ #include "cache.h" #include "strbuf.h" #include "quote.h" +#include "tree.h" +#include "cache-tree.h" + +static unsigned char active_cache_sha1[20]; +static struct cache_tree *active_cache_tree; /* * Default to not allowing changes to the list of files. The @@ -70,6 +75,7 @@ static int mark_valid(const char *path) active_cache[pos]->ce_flags &= ~htons(CE_VALID); break; } + cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; return 0; } @@ -83,6 +89,12 @@ static int add_file_to_cache(const char *path) struct stat st; status = lstat(path, &st); + + /* We probably want to do this in remove_file_from_cache() and + * add_cache_entry() instead... + */ + cache_tree_invalidate_path(active_cache_tree, path); + if (status < 0 || S_ISDIR(st.st_mode)) { /* When we used to have "path" and now we want to add * "path/file", we need a way to remove "path" before @@ -325,6 +337,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, return error("%s: cannot add to the index - missing --add option?", path); report("add '%s'", path); + cache_tree_invalidate_path(active_cache_tree, path); return 0; } @@ -349,6 +362,7 @@ static int chmod_path(int flip, const char *path) default: return -1; } + cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; return 0; } @@ -367,6 +381,7 @@ static void update_one(const char *path, const char *prefix, int prefix_length) die("Unable to mark file %s", path); return; } + cache_tree_invalidate_path(active_cache_tree, path); if (force_remove) { if (remove_file_from_cache(p)) @@ -442,6 +457,7 @@ static void read_index_info(int line_termination) free(path_name); continue; } + cache_tree_invalidate_path(active_cache_tree, path_name); if (!mode) { /* mode == 0 means there is no such path -- remove */ @@ -485,9 +501,10 @@ int main(int argc, const char **argv) if (newfd < 0) die("unable to create new cachefile"); - entries = read_cache(); + entries = read_cache_1(active_cache_sha1); if (entries < 0) die("cache corrupted"); + active_cache_tree = read_cache_tree(active_cache_sha1); for (i = 1 ; i < argc; i++) { const char *path = argv[i]; @@ -613,9 +630,11 @@ int main(int argc, const char **argv) } } if (active_cache_changed) { - if (write_cache(newfd, active_cache, active_nr) || + if (write_cache_1(newfd, active_cache, active_nr, + active_cache_sha1) || commit_index_file(&cache_file)) die("Unable to write new cachefile"); + write_cache_tree(active_cache_sha1, active_cache_tree); } return has_errors ? 1 : 0;