From 355885d531568e144ce5c9a2f65792a3aa8b5528 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:04 +0100 Subject: [PATCH 01/10] add generic, type aware object chain walker The requirements are: * it may not crash on NULL pointers * a callback function is needed, as index-pack/unpack-objects need to do different things * the type information is needed to check the expected <-> real type and print better error messages Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Makefile | 4 +-- cache.h | 1 + fsck.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fsck.h | 23 ++++++++++++++ 4 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 fsck.c create mode 100644 fsck.h diff --git a/Makefile b/Makefile index 92341c4bbe..33c367dfb1 100644 --- a/Makefile +++ b/Makefile @@ -297,7 +297,7 @@ LIB_H = \ run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \ tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \ utf8.h reflog-walk.h patch-ids.h attr.h decorate.h progress.h \ - mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h + mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h fsck.h DIFF_OBJS = \ diff.o diff-lib.o diffcore-break.o diffcore-order.o \ @@ -320,7 +320,7 @@ LIB_OBJS = \ alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \ color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \ convert.o attr.o decorate.o progress.o mailmap.o symlinks.o remote.o \ - transport.o bundle.o walker.o parse-options.o ws.o archive.o + transport.o bundle.o walker.o parse-options.o ws.o archive.o fsck.o BUILTIN_OBJS = \ builtin-add.o \ diff --git a/cache.h b/cache.h index 43ba6a3ba5..d3b898708b 100644 --- a/cache.h +++ b/cache.h @@ -189,6 +189,7 @@ enum object_type { /* 5 for future expansion */ OBJ_OFS_DELTA = 6, OBJ_REF_DELTA = 7, + OBJ_ANY, OBJ_MAX, }; diff --git a/fsck.c b/fsck.c new file mode 100644 index 0000000000..98fb41af68 --- /dev/null +++ b/fsck.c @@ -0,0 +1,91 @@ +#include "cache.h" +#include "object.h" +#include "blob.h" +#include "tree.h" +#include "tree-walk.h" +#include "commit.h" +#include "tag.h" +#include "fsck.h" + +static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data) +{ + struct tree_desc desc; + struct name_entry entry; + int res = 0; + + if (parse_tree(tree)) + return -1; + + init_tree_desc(&desc, tree->buffer, tree->size); + while (tree_entry(&desc, &entry)) { + int result; + + if (S_ISGITLINK(entry.mode)) + continue; + if (S_ISDIR(entry.mode)) + result = walk(&lookup_tree(entry.sha1)->object, OBJ_TREE, data); + else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) + result = walk(&lookup_blob(entry.sha1)->object, OBJ_BLOB, data); + else { + result = error("in tree %s: entry %s has bad mode %.6o\n", + sha1_to_hex(tree->object.sha1), entry.path, entry.mode); + } + if (result < 0) + return result; + if (!res) + res = result; + } + return res; +} + +static int fsck_walk_commit(struct commit *commit, fsck_walk_func walk, void *data) +{ + struct commit_list *parents; + int res; + int result; + + if (parse_commit(commit)) + return -1; + + result = walk((struct object *)commit->tree, OBJ_TREE, data); + if (result < 0) + return result; + res = result; + + parents = commit->parents; + while (parents) { + result = walk((struct object *)parents->item, OBJ_COMMIT, data); + if (result < 0) + return result; + if (!res) + res = result; + parents = parents->next; + } + return res; +} + +static int fsck_walk_tag(struct tag *tag, fsck_walk_func walk, void *data) +{ + if (parse_tag(tag)) + return -1; + return walk(tag->tagged, OBJ_ANY, data); +} + +int fsck_walk(struct object *obj, fsck_walk_func walk, void *data) +{ + if (!obj) + return -1; + switch (obj->type) { + case OBJ_BLOB: + return 0; + case OBJ_TREE: + return fsck_walk_tree((struct tree *)obj, walk, data); + case OBJ_COMMIT: + return fsck_walk_commit((struct commit *)obj, walk, data); + case OBJ_TAG: + return fsck_walk_tag((struct tag *)obj, walk, data); + default: + error("Unknown object type for %s", sha1_to_hex(obj->sha1)); + return -1; + } +} diff --git a/fsck.h b/fsck.h new file mode 100644 index 0000000000..ba5514a818 --- /dev/null +++ b/fsck.h @@ -0,0 +1,23 @@ +#ifndef GIT_FSCK_H +#define GIT_FSCK_H + +/* + * callback function for fsck_walk + * type is the expected type of the object or OBJ_ANY + * the return value is: + * 0 everything OK + * <0 error signaled and abort + * >0 error signaled and do not abort + */ +typedef int (*fsck_walk_func)(struct object *obj, int type, void *data); + +/* descend in all linked child objects + * the return value is: + * -1 error in processing the object + * <0 return value of the callback, which lead to an abort + * >0 return value of the first sigaled error >0 (in the case of no other errors) + * 0 everything OK + */ +int fsck_walk(struct object *obj, fsck_walk_func walk, void *data); + +#endif From 271b8d25b25e49b367087440e093e755e5f35aa9 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:05 +0100 Subject: [PATCH 02/10] builtin-fsck: move away from object-refs to fsck_walk Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- builtin-fsck.c | 99 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 28 deletions(-) diff --git a/builtin-fsck.c b/builtin-fsck.c index 2a6e94deaf..7321ab236b 100644 --- a/builtin-fsck.c +++ b/builtin-fsck.c @@ -8,6 +8,7 @@ #include "pack.h" #include "cache-tree.h" #include "tree-walk.h" +#include "fsck.h" #include "parse-options.h" #define REACHABLE 0x0001 @@ -63,13 +64,73 @@ static int objwarning(struct object *obj, const char *err, ...) return -1; } +static int mark_object(struct object *obj, int type, void *data) +{ + struct tree *tree = NULL; + struct object *parent = data; + int result; + + if (!obj) { + printf("broken link from %7s %s\n", + typename(parent->type), sha1_to_hex(parent->sha1)); + printf("broken link from %7s %s\n", + (type == OBJ_ANY ? "unknown" : typename(type)), "unknown"); + errors_found |= ERROR_REACHABLE; + return 1; + } + + if (type != OBJ_ANY && obj->type != type) + objerror(parent, "wrong object type in link"); + + if (obj->flags & REACHABLE) + return 0; + obj->flags |= REACHABLE; + if (!obj->parsed) { + if (parent && !has_sha1_file(obj->sha1)) { + printf("broken link from %7s %s\n", + typename(parent->type), sha1_to_hex(parent->sha1)); + printf(" to %7s %s\n", + typename(obj->type), sha1_to_hex(obj->sha1)); + errors_found |= ERROR_REACHABLE; + } + return 1; + } + + if (obj->type == OBJ_TREE) { + obj->parsed = 0; + tree = (struct tree *)obj; + if (parse_tree(tree) < 0) + return 1; /* error already displayed */ + } + result = fsck_walk(obj, mark_object, obj); + if (tree) { + free(tree->buffer); + tree->buffer = NULL; + } + if (result < 0) + result = 1; + + return result; +} + +static void mark_object_reachable(struct object *obj) +{ + mark_object(obj, OBJ_ANY, 0); +} + +static int mark_used(struct object *obj, int type, void *data) +{ + if (!obj) + return 1; + obj->used = 1; + return 0; +} + /* * Check a single reachable object */ static void check_reachable_object(struct object *obj) { - const struct object_refs *refs; - /* * We obviously want the object to be parsed, * except if it was in a pack-file and we didn't @@ -82,25 +143,6 @@ static void check_reachable_object(struct object *obj) errors_found |= ERROR_REACHABLE; return; } - - /* - * Check that everything that we try to reference is also good. - */ - refs = lookup_object_refs(obj); - if (refs) { - unsigned j; - for (j = 0; j < refs->count; j++) { - struct object *ref = refs->ref[j]; - if (ref->parsed || - (has_sha1_file(ref->sha1))) - continue; - printf("broken link from %7s %s\n", - typename(obj->type), sha1_to_hex(obj->sha1)); - printf(" to %7s %s\n", - typename(ref->type), sha1_to_hex(ref->sha1)); - errors_found |= ERROR_REACHABLE; - } - } } /* @@ -414,6 +456,8 @@ static int fsck_sha1(const unsigned char *sha1) if (obj->flags & SEEN) return 0; obj->flags |= SEEN; + if (fsck_walk(obj, mark_used, 0)) + objerror(obj, "broken links"); if (obj->type == OBJ_BLOB) return 0; if (obj->type == OBJ_TREE) @@ -538,13 +582,13 @@ static int fsck_handle_reflog_ent(unsigned char *osha1, unsigned char *nsha1, obj = lookup_object(osha1); if (obj) { obj->used = 1; - mark_reachable(obj, REACHABLE); + mark_object_reachable(obj); } } obj = lookup_object(nsha1); if (obj) { obj->used = 1; - mark_reachable(obj, REACHABLE); + mark_object_reachable(obj); } return 0; } @@ -574,7 +618,7 @@ static int fsck_handle_ref(const char *refname, const unsigned char *sha1, int f error("%s: not a commit", refname); default_refs++; obj->used = 1; - mark_reachable(obj, REACHABLE); + mark_object_reachable(obj); return 0; } @@ -660,7 +704,7 @@ static int fsck_cache_tree(struct cache_tree *it) sha1_to_hex(it->sha1)); return 1; } - mark_reachable(obj, REACHABLE); + mark_object_reachable(obj); obj->used = 1; if (obj->type != OBJ_TREE) err |= objerror(obj, "non-tree in cache-tree"); @@ -693,7 +737,6 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) { int i, heads; - track_object_refs = 1; errors_found = 0; argc = parse_options(argc, argv, fsck_opts, fsck_usage, 0); @@ -741,7 +784,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) continue; obj->used = 1; - mark_reachable(obj, REACHABLE); + mark_object_reachable(obj); heads++; continue; } @@ -773,7 +816,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) continue; obj = &blob->object; obj->used = 1; - mark_reachable(obj, REACHABLE); + mark_object_reachable(obj); } if (active_cache_tree) fsck_cache_tree(active_cache_tree); From 7914053ba9901be1f1530f46e8e2e6ee6f4ae5b1 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:06 +0100 Subject: [PATCH 03/10] Remove unused object-ref code Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Makefile | 2 +- builtin-fetch-pack.c | 1 - builtin-pack-objects.c | 1 - builtin-rev-list.c | 1 - commit.c | 11 ------ object-refs.c | 87 ------------------------------------------ object.h | 8 ---- tag.c | 6 --- tree.c | 48 ----------------------- upload-pack.c | 1 - walker.c | 1 - 11 files changed, 1 insertion(+), 166 deletions(-) delete mode 100644 object-refs.c diff --git a/Makefile b/Makefile index 33c367dfb1..8ff0c772f1 100644 --- a/Makefile +++ b/Makefile @@ -312,7 +312,7 @@ LIB_OBJS = \ patch-ids.o \ object.o pack-check.o pack-write.o patch-delta.o path.o pkt-line.o \ sideband.o reachable.o reflog-walk.o \ - quote.o read-cache.o refs.o run-command.o dir.o object-refs.o \ + quote.o read-cache.o refs.o run-command.o dir.o \ server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \ tag.o tree.o usage.o config.o environment.o ctype.o copy.o \ revision.o pager.o tree-walk.o xdiff-interface.o \ diff --git a/builtin-fetch-pack.c b/builtin-fetch-pack.c index e68e01592d..410414d91f 100644 --- a/builtin-fetch-pack.c +++ b/builtin-fetch-pack.c @@ -385,7 +385,6 @@ static int everything_local(struct ref **refs, int nr_match, char **match) int retval; unsigned long cutoff = 0; - track_object_refs = 0; save_commit_buffer = 0; for (ref = *refs; ref; ref = ref->next) { diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index d3efeff03f..6f8f388bdf 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -2009,7 +2009,6 @@ static void get_object_list(int ac, const char **av) init_revisions(&revs, NULL); save_commit_buffer = 0; - track_object_refs = 0; setup_revisions(ac, av, &revs, NULL); while (fgets(line, sizeof(line), stdin) != NULL) { diff --git a/builtin-rev-list.c b/builtin-rev-list.c index de80158fd4..14e86ceabc 100644 --- a/builtin-rev-list.c +++ b/builtin-rev-list.c @@ -605,7 +605,6 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) usage(rev_list_usage); save_commit_buffer = revs.verbose_header || revs.grep_filter; - track_object_refs = 0; if (bisect_list) revs.limited = 1; diff --git a/commit.c b/commit.c index 22ce776863..6684c4e731 100644 --- a/commit.c +++ b/commit.c @@ -290,17 +290,6 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size) } item->date = parse_commit_date(bufptr, tail); - if (track_object_refs) { - unsigned i = 0; - struct commit_list *p; - struct object_refs *refs = alloc_object_refs(n_refs); - if (item->tree) - refs->ref[i++] = &item->tree->object; - for (p = item->parents; p; p = p->next) - refs->ref[i++] = &p->item->object; - set_object_refs(&item->object, refs); - } - return 0; } diff --git a/object-refs.c b/object-refs.c deleted file mode 100644 index 5345671569..0000000000 --- a/object-refs.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "cache.h" -#include "object.h" -#include "decorate.h" - -int track_object_refs = 0; - -static struct decoration ref_decorate; - -struct object_refs *lookup_object_refs(struct object *base) -{ - return lookup_decoration(&ref_decorate, base); -} - -static void add_object_refs(struct object *obj, struct object_refs *refs) -{ - if (add_decoration(&ref_decorate, obj, refs)) - die("object %s tried to add refs twice!", sha1_to_hex(obj->sha1)); -} - -struct object_refs *alloc_object_refs(unsigned count) -{ - struct object_refs *refs; - size_t size = sizeof(*refs) + count*sizeof(struct object *); - - refs = xcalloc(1, size); - refs->count = count; - return refs; -} - -static int compare_object_pointers(const void *a, const void *b) -{ - const struct object * const *pa = a; - const struct object * const *pb = b; - if (*pa == *pb) - return 0; - else if (*pa < *pb) - return -1; - else - return 1; -} - -void set_object_refs(struct object *obj, struct object_refs *refs) -{ - unsigned int i, j; - - /* Do not install empty list of references */ - if (refs->count < 1) { - free(refs); - return; - } - - /* Sort the list and filter out duplicates */ - qsort(refs->ref, refs->count, sizeof(refs->ref[0]), - compare_object_pointers); - for (i = j = 1; i < refs->count; i++) { - if (refs->ref[i] != refs->ref[i - 1]) - refs->ref[j++] = refs->ref[i]; - } - if (j < refs->count) { - /* Duplicates were found - reallocate list */ - size_t size = sizeof(*refs) + j*sizeof(struct object *); - refs->count = j; - refs = xrealloc(refs, size); - } - - for (i = 0; i < refs->count; i++) - refs->ref[i]->used = 1; - add_object_refs(obj, refs); -} - -void mark_reachable(struct object *obj, unsigned int mask) -{ - const struct object_refs *refs; - - if (!track_object_refs) - die("cannot do reachability with object refs turned off"); - /* If we've been here already, don't bother */ - if (obj->flags & mask) - return; - obj->flags |= mask; - refs = lookup_object_refs(obj); - if (refs) { - unsigned i; - for (i = 0; i < refs->count; i++) - mark_reachable(refs->ref[i], mask); - } -} diff --git a/object.h b/object.h index 397bbfa090..036bd66fe9 100644 --- a/object.h +++ b/object.h @@ -35,14 +35,11 @@ struct object { unsigned char sha1[20]; }; -extern int track_object_refs; - extern const char *typename(unsigned int type); extern int type_from_string(const char *str); extern unsigned int get_max_object_index(void); extern struct object *get_indexed_object(unsigned int); -extern struct object_refs *lookup_object_refs(struct object *); /** Internal only **/ struct object *lookup_object(const unsigned char *sha1); @@ -61,11 +58,6 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t /** Returns the object, with potentially excess memory allocated. **/ struct object *lookup_unknown_object(const unsigned char *sha1); -struct object_refs *alloc_object_refs(unsigned count); -void set_object_refs(struct object *obj, struct object_refs *refs); - -void mark_reachable(struct object *obj, unsigned int mask); - struct object_list *object_list_insert(struct object *item, struct object_list **list_p); diff --git a/tag.c b/tag.c index 38bf9134f9..d19f56d60c 100644 --- a/tag.c +++ b/tag.c @@ -84,12 +84,6 @@ int parse_tag_buffer(struct tag *item, void *data, unsigned long size) item->tagged = NULL; } - if (item->tagged && track_object_refs) { - struct object_refs *refs = alloc_object_refs(1); - refs->ref[0] = item->tagged; - set_object_refs(&item->object, refs); - } - return 0; } diff --git a/tree.c b/tree.c index 8c0819fa72..113e1bb0a4 100644 --- a/tree.c +++ b/tree.c @@ -202,52 +202,6 @@ struct tree *lookup_tree(const unsigned char *sha1) return (struct tree *) obj; } -/* - * NOTE! Tree refs to external git repositories - * (ie gitlinks) do not count as real references. - * - * You don't have to have those repositories - * available at all, much less have the objects - * accessible from the current repository. - */ -static void track_tree_refs(struct tree *item) -{ - int n_refs = 0, i; - struct object_refs *refs; - struct tree_desc desc; - struct name_entry entry; - - /* Count how many entries there are.. */ - init_tree_desc(&desc, item->buffer, item->size); - while (tree_entry(&desc, &entry)) { - if (S_ISGITLINK(entry.mode)) - continue; - n_refs++; - } - - /* Allocate object refs and walk it again.. */ - i = 0; - refs = alloc_object_refs(n_refs); - init_tree_desc(&desc, item->buffer, item->size); - while (tree_entry(&desc, &entry)) { - struct object *obj; - - if (S_ISGITLINK(entry.mode)) - continue; - if (S_ISDIR(entry.mode)) - obj = &lookup_tree(entry.sha1)->object; - else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) - obj = &lookup_blob(entry.sha1)->object; - else { - warning("in tree %s: entry %s has bad mode %.6o\n", - sha1_to_hex(item->object.sha1), entry.path, entry.mode); - obj = lookup_unknown_object(entry.sha1); - } - refs->ref[i++] = obj; - } - set_object_refs(&item->object, refs); -} - int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) { if (item->object.parsed) @@ -256,8 +210,6 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) item->buffer = buffer; item->size = size; - if (track_object_refs) - track_tree_refs(item); return 0; } diff --git a/upload-pack.c b/upload-pack.c index d1d2c2a1f7..ba9ba59976 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -392,7 +392,6 @@ static int get_common_commits(void) char hex[41], last_hex[41]; int len; - track_object_refs = 0; save_commit_buffer = 0; for(;;) { diff --git a/walker.c b/walker.c index adc3e80ce1..c10eca8826 100644 --- a/walker.c +++ b/walker.c @@ -256,7 +256,6 @@ int walker_fetch(struct walker *walker, int targets, char **target, int i; save_commit_buffer = 0; - track_object_refs = 0; for (i = 0; i < targets; i++) { if (!write_ref || !write_ref[i]) From 45163382437c3862d3beb88134b7a975a3a26443 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:07 +0100 Subject: [PATCH 04/10] builtin-fsck: reports missing parent commits parse_commit ignores parent commits with certain errors (eg. a non commit object is already loaded under the sha1 of the parent). To make fsck reports such errors, it has to compare the nummer of parent commits returned by parse commit with the number of parent commits in the object or in the graft/shallow file. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- builtin-fsck.c | 24 ++++++++++++++++++++++++ commit.c | 2 +- commit.h | 1 + 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/builtin-fsck.c b/builtin-fsck.c index 7321ab236b..727310afc2 100644 --- a/builtin-fsck.c +++ b/builtin-fsck.c @@ -397,6 +397,8 @@ static int fsck_commit(struct commit *commit) { char *buffer = commit->buffer; unsigned char tree_sha1[20], sha1[20]; + struct commit_graft *graft; + int parents = 0; if (verbose) fprintf(stderr, "Checking commit %s\n", @@ -411,6 +413,28 @@ static int fsck_commit(struct commit *commit) if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n') return objerror(&commit->object, "invalid 'parent' line format - bad sha1"); buffer += 48; + parents++; + } + graft = lookup_commit_graft(commit->object.sha1); + if (graft) { + struct commit_list *p = commit->parents; + parents = 0; + while (p) { + p = p->next; + parents++; + } + if (graft->nr_parent == -1 && !parents) + ; /* shallow commit */ + else if (graft->nr_parent != parents) + return objerror(&commit->object, "graft objects missing"); + } else { + struct commit_list *p = commit->parents; + while (p && parents) { + p = p->next; + parents--; + } + if (p || parents) + return objerror(&commit->object, "parent objects missing"); } if (memcmp(buffer, "author ", 7)) return objerror(&commit->object, "invalid format - expected 'author' line"); diff --git a/commit.c b/commit.c index 6684c4e731..94d5b3d261 100644 --- a/commit.c +++ b/commit.c @@ -193,7 +193,7 @@ static void prepare_commit_graft(void) commit_graft_prepared = 1; } -static struct commit_graft *lookup_commit_graft(const unsigned char *sha1) +struct commit_graft *lookup_commit_graft(const unsigned char *sha1) { int pos; prepare_commit_graft(); diff --git a/commit.h b/commit.h index 10e2b5d4cf..3ad3dd9af1 100644 --- a/commit.h +++ b/commit.h @@ -101,6 +101,7 @@ struct commit_graft { struct commit_graft *read_graft_line(char *buf, int len); int register_commit_graft(struct commit_graft *, int); int read_graft_file(const char *graft_file); +struct commit_graft *lookup_commit_graft(const unsigned char *sha1); extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2, int cleanup); From ba002f3b28ab9febea432d4c415dbe581836d9a0 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:08 +0100 Subject: [PATCH 05/10] builtin-fsck: move common object checking code to fsck.c Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- builtin-fsck.c | 274 +++++++------------------------------------------ fsck.c | 215 ++++++++++++++++++++++++++++++++++++++ fsck.h | 7 ++ 3 files changed, 259 insertions(+), 237 deletions(-) diff --git a/builtin-fsck.c b/builtin-fsck.c index 727310afc2..d545650c8c 100644 --- a/builtin-fsck.c +++ b/builtin-fsck.c @@ -55,13 +55,13 @@ static int objerror(struct object *obj, const char *err, ...) return -1; } -static int objwarning(struct object *obj, const char *err, ...) +static int fsck_error_func(struct object *obj, int type, const char *err, ...) { va_list params; va_start(params, err); - objreport(obj, "warning", err, params); + objreport(obj, (type == FSCK_WARN) ? "warning" : "error", err, params); va_end(params); - return -1; + return (type == FSCK_WARN) ? 0 : 1; } static int mark_object(struct object *obj, int type, void *data) @@ -246,229 +246,6 @@ static void check_connectivity(void) } } -/* - * The entries in a tree are ordered in the _path_ order, - * which means that a directory entry is ordered by adding - * a slash to the end of it. - * - * So a directory called "a" is ordered _after_ a file - * called "a.c", because "a/" sorts after "a.c". - */ -#define TREE_UNORDERED (-1) -#define TREE_HAS_DUPS (-2) - -static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2) -{ - int len1 = strlen(name1); - int len2 = strlen(name2); - int len = len1 < len2 ? len1 : len2; - unsigned char c1, c2; - int cmp; - - cmp = memcmp(name1, name2, len); - if (cmp < 0) - return 0; - if (cmp > 0) - return TREE_UNORDERED; - - /* - * Ok, the first characters are the same. - * Now we need to order the next one, but turn - * a '\0' into a '/' for a directory entry. - */ - c1 = name1[len]; - c2 = name2[len]; - if (!c1 && !c2) - /* - * git-write-tree used to write out a nonsense tree that has - * entries with the same name, one blob and one tree. Make - * sure we do not have duplicate entries. - */ - return TREE_HAS_DUPS; - if (!c1 && S_ISDIR(mode1)) - c1 = '/'; - if (!c2 && S_ISDIR(mode2)) - c2 = '/'; - return c1 < c2 ? 0 : TREE_UNORDERED; -} - -static int fsck_tree(struct tree *item) -{ - int retval; - int has_full_path = 0; - int has_empty_name = 0; - int has_zero_pad = 0; - int has_bad_modes = 0; - int has_dup_entries = 0; - int not_properly_sorted = 0; - struct tree_desc desc; - unsigned o_mode; - const char *o_name; - const unsigned char *o_sha1; - - if (verbose) - fprintf(stderr, "Checking tree %s\n", - sha1_to_hex(item->object.sha1)); - - init_tree_desc(&desc, item->buffer, item->size); - - o_mode = 0; - o_name = NULL; - o_sha1 = NULL; - while (desc.size) { - unsigned mode; - const char *name; - const unsigned char *sha1; - - sha1 = tree_entry_extract(&desc, &name, &mode); - - if (strchr(name, '/')) - has_full_path = 1; - if (!*name) - has_empty_name = 1; - has_zero_pad |= *(char *)desc.buffer == '0'; - update_tree_entry(&desc); - - switch (mode) { - /* - * Standard modes.. - */ - case S_IFREG | 0755: - case S_IFREG | 0644: - case S_IFLNK: - case S_IFDIR: - case S_IFGITLINK: - break; - /* - * This is nonstandard, but we had a few of these - * early on when we honored the full set of mode - * bits.. - */ - case S_IFREG | 0664: - if (!check_strict) - break; - default: - has_bad_modes = 1; - } - - if (o_name) { - switch (verify_ordered(o_mode, o_name, mode, name)) { - case TREE_UNORDERED: - not_properly_sorted = 1; - break; - case TREE_HAS_DUPS: - has_dup_entries = 1; - break; - default: - break; - } - } - - o_mode = mode; - o_name = name; - o_sha1 = sha1; - } - free(item->buffer); - item->buffer = NULL; - - retval = 0; - if (has_full_path) { - objwarning(&item->object, "contains full pathnames"); - } - if (has_empty_name) { - objwarning(&item->object, "contains empty pathname"); - } - if (has_zero_pad) { - objwarning(&item->object, "contains zero-padded file modes"); - } - if (has_bad_modes) { - objwarning(&item->object, "contains bad file modes"); - } - if (has_dup_entries) { - retval = objerror(&item->object, "contains duplicate file entries"); - } - if (not_properly_sorted) { - retval = objerror(&item->object, "not properly sorted"); - } - return retval; -} - -static int fsck_commit(struct commit *commit) -{ - char *buffer = commit->buffer; - unsigned char tree_sha1[20], sha1[20]; - struct commit_graft *graft; - int parents = 0; - - if (verbose) - fprintf(stderr, "Checking commit %s\n", - sha1_to_hex(commit->object.sha1)); - - if (memcmp(buffer, "tree ", 5)) - return objerror(&commit->object, "invalid format - expected 'tree' line"); - if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n') - return objerror(&commit->object, "invalid 'tree' line format - bad sha1"); - buffer += 46; - while (!memcmp(buffer, "parent ", 7)) { - if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n') - return objerror(&commit->object, "invalid 'parent' line format - bad sha1"); - buffer += 48; - parents++; - } - graft = lookup_commit_graft(commit->object.sha1); - if (graft) { - struct commit_list *p = commit->parents; - parents = 0; - while (p) { - p = p->next; - parents++; - } - if (graft->nr_parent == -1 && !parents) - ; /* shallow commit */ - else if (graft->nr_parent != parents) - return objerror(&commit->object, "graft objects missing"); - } else { - struct commit_list *p = commit->parents; - while (p && parents) { - p = p->next; - parents--; - } - if (p || parents) - return objerror(&commit->object, "parent objects missing"); - } - if (memcmp(buffer, "author ", 7)) - return objerror(&commit->object, "invalid format - expected 'author' line"); - free(commit->buffer); - commit->buffer = NULL; - if (!commit->tree) - return objerror(&commit->object, "could not load commit's tree %s", tree_sha1); - if (!commit->parents && show_root) - printf("root %s\n", sha1_to_hex(commit->object.sha1)); - if (!commit->date) - printf("bad commit date in %s\n", - sha1_to_hex(commit->object.sha1)); - return 0; -} - -static int fsck_tag(struct tag *tag) -{ - struct object *tagged = tag->tagged; - - if (verbose) - fprintf(stderr, "Checking tag %s\n", - sha1_to_hex(tag->object.sha1)); - - if (!tagged) { - return objerror(&tag->object, "could not load tagged object"); - } - if (!show_tags) - return 0; - - printf("tagged %s %s", typename(tagged->type), sha1_to_hex(tagged->sha1)); - printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1)); - return 0; -} - static int fsck_sha1(const unsigned char *sha1) { struct object *obj = parse_object(sha1); @@ -480,20 +257,43 @@ static int fsck_sha1(const unsigned char *sha1) if (obj->flags & SEEN) return 0; obj->flags |= SEEN; + + if (verbose) + fprintf(stderr, "Checking %s %s\n", + typename(obj->type), sha1_to_hex(obj->sha1)); + if (fsck_walk(obj, mark_used, 0)) objerror(obj, "broken links"); - if (obj->type == OBJ_BLOB) - return 0; - if (obj->type == OBJ_TREE) - return fsck_tree((struct tree *) obj); - if (obj->type == OBJ_COMMIT) - return fsck_commit((struct commit *) obj); - if (obj->type == OBJ_TAG) - return fsck_tag((struct tag *) obj); + if (fsck_object(obj, check_strict, fsck_error_func)) + return -1; - /* By now, parse_object() would've returned NULL instead. */ - return objerror(obj, "unknown type '%d' (internal fsck error)", - obj->type); + if (obj->type == OBJ_TREE) { + struct tree *item = (struct tree *) obj; + + free(item->buffer); + item->buffer = NULL; + } + + if (obj->type == OBJ_COMMIT) { + struct commit *commit = (struct commit *) obj; + + free(commit->buffer); + commit->buffer = NULL; + + if (!commit->parents && show_root) + printf("root %s\n", sha1_to_hex(commit->object.sha1)); + } + + if (obj->type == OBJ_TAG) { + struct tag *tag = (struct tag *) obj; + + if (show_tags && tag->tagged) { + printf("tagged %s %s", typename(tag->tagged->type), sha1_to_hex(tag->tagged->sha1)); + printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1)); + } + } + + return 0; } /* diff --git a/fsck.c b/fsck.c index 98fb41af68..0680862151 100644 --- a/fsck.c +++ b/fsck.c @@ -89,3 +89,218 @@ int fsck_walk(struct object *obj, fsck_walk_func walk, void *data) return -1; } } + +/* + * The entries in a tree are ordered in the _path_ order, + * which means that a directory entry is ordered by adding + * a slash to the end of it. + * + * So a directory called "a" is ordered _after_ a file + * called "a.c", because "a/" sorts after "a.c". + */ +#define TREE_UNORDERED (-1) +#define TREE_HAS_DUPS (-2) + +static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2) +{ + int len1 = strlen(name1); + int len2 = strlen(name2); + int len = len1 < len2 ? len1 : len2; + unsigned char c1, c2; + int cmp; + + cmp = memcmp(name1, name2, len); + if (cmp < 0) + return 0; + if (cmp > 0) + return TREE_UNORDERED; + + /* + * Ok, the first characters are the same. + * Now we need to order the next one, but turn + * a '\0' into a '/' for a directory entry. + */ + c1 = name1[len]; + c2 = name2[len]; + if (!c1 && !c2) + /* + * git-write-tree used to write out a nonsense tree that has + * entries with the same name, one blob and one tree. Make + * sure we do not have duplicate entries. + */ + return TREE_HAS_DUPS; + if (!c1 && S_ISDIR(mode1)) + c1 = '/'; + if (!c2 && S_ISDIR(mode2)) + c2 = '/'; + return c1 < c2 ? 0 : TREE_UNORDERED; +} + +static int fsck_tree(struct tree *item, int strict, fsck_error error_func) +{ + int retval; + int has_full_path = 0; + int has_empty_name = 0; + int has_zero_pad = 0; + int has_bad_modes = 0; + int has_dup_entries = 0; + int not_properly_sorted = 0; + struct tree_desc desc; + unsigned o_mode; + const char *o_name; + const unsigned char *o_sha1; + + init_tree_desc(&desc, item->buffer, item->size); + + o_mode = 0; + o_name = NULL; + o_sha1 = NULL; + if (!desc.size) + return error_func(&item->object, FSCK_ERROR, "empty tree"); + + while (desc.size) { + unsigned mode; + const char *name; + const unsigned char *sha1; + + sha1 = tree_entry_extract(&desc, &name, &mode); + + if (strchr(name, '/')) + has_full_path = 1; + if (!*name) + has_empty_name = 1; + has_zero_pad |= *(char *)desc.buffer == '0'; + update_tree_entry(&desc); + + switch (mode) { + /* + * Standard modes.. + */ + case S_IFREG | 0755: + case S_IFREG | 0644: + case S_IFLNK: + case S_IFDIR: + case S_IFGITLINK: + break; + /* + * This is nonstandard, but we had a few of these + * early on when we honored the full set of mode + * bits.. + */ + case S_IFREG | 0664: + if (!strict) + break; + default: + has_bad_modes = 1; + } + + if (o_name) { + switch (verify_ordered(o_mode, o_name, mode, name)) { + case TREE_UNORDERED: + not_properly_sorted = 1; + break; + case TREE_HAS_DUPS: + has_dup_entries = 1; + break; + default: + break; + } + } + + o_mode = mode; + o_name = name; + o_sha1 = sha1; + } + + retval = 0; + if (has_full_path) + retval += error_func(&item->object, FSCK_WARN, "contains full pathnames"); + if (has_empty_name) + retval += error_func(&item->object, FSCK_WARN, "contains empty pathname"); + if (has_zero_pad) + retval += error_func(&item->object, FSCK_WARN, "contains zero-padded file modes"); + if (has_bad_modes) + retval += error_func(&item->object, FSCK_WARN, "contains bad file modes"); + if (has_dup_entries) + retval += error_func(&item->object, FSCK_ERROR, "contains duplicate file entries"); + if (not_properly_sorted) + retval += error_func(&item->object, FSCK_ERROR, "not properly sorted"); + return retval; +} + +static int fsck_commit(struct commit *commit, fsck_error error_func) +{ + char *buffer = commit->buffer; + unsigned char tree_sha1[20], sha1[20]; + struct commit_graft *graft; + int parents = 0; + + if (!commit->date) + return error_func(&commit->object, FSCK_ERROR, "invalid author/committer line"); + + if (memcmp(buffer, "tree ", 5)) + return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'tree' line"); + if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n') + return error_func(&commit->object, FSCK_ERROR, "invalid 'tree' line format - bad sha1"); + buffer += 46; + while (!memcmp(buffer, "parent ", 7)) { + if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n') + return error_func(&commit->object, FSCK_ERROR, "invalid 'parent' line format - bad sha1"); + buffer += 48; + parents++; + } + graft = lookup_commit_graft(commit->object.sha1); + if (graft) { + struct commit_list *p = commit->parents; + parents = 0; + while (p) { + p = p->next; + parents++; + } + if (graft->nr_parent == -1 && !parents) + ; /* shallow commit */ + else if (graft->nr_parent != parents) + return error_func(&commit->object, FSCK_ERROR, "graft objects missing"); + } else { + struct commit_list *p = commit->parents; + while (p && parents) { + p = p->next; + parents--; + } + if (p || parents) + return error_func(&commit->object, FSCK_ERROR, "parent objects missing"); + } + if (memcmp(buffer, "author ", 7)) + return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'author' line"); + if (!commit->tree) + return error_func(&commit->object, FSCK_ERROR, "could not load commit's tree %s", sha1_to_hex(tree_sha1)); + + return 0; +} + +static int fsck_tag(struct tag *tag, fsck_error error_func) +{ + struct object *tagged = tag->tagged; + + if (!tagged) + return error_func(&tag->object, FSCK_ERROR, "could not load tagged object"); + return 0; +} + +int fsck_object(struct object *obj, int strict, fsck_error error_func) +{ + if (!obj) + return error_func(obj, FSCK_ERROR, "no valid object to fsck"); + + if (obj->type == OBJ_BLOB) + return 0; + if (obj->type == OBJ_TREE) + return fsck_tree((struct tree *) obj, strict, error_func); + if (obj->type == OBJ_COMMIT) + return fsck_commit((struct commit *) obj, error_func); + if (obj->type == OBJ_TAG) + return fsck_tag((struct tag *) obj, error_func); + + return error_func(obj, FSCK_ERROR, "unknown type '%d' (internal fsck error)", + obj->type); +} diff --git a/fsck.h b/fsck.h index ba5514a818..9bd37c9f1c 100644 --- a/fsck.h +++ b/fsck.h @@ -1,6 +1,9 @@ #ifndef GIT_FSCK_H #define GIT_FSCK_H +#define FSCK_ERROR 1 +#define FSCK_WARN 2 + /* * callback function for fsck_walk * type is the expected type of the object or OBJ_ANY @@ -11,6 +14,9 @@ */ typedef int (*fsck_walk_func)(struct object *obj, int type, void *data); +/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */ +typedef int (*fsck_error)(struct object *obj, int type, const char *err, ...); + /* descend in all linked child objects * the return value is: * -1 error in processing the object @@ -19,5 +25,6 @@ typedef int (*fsck_walk_func)(struct object *obj, int type, void *data); * 0 everything OK */ int fsck_walk(struct object *obj, fsck_walk_func walk, void *data); +int fsck_object(struct object *obj, int strict, fsck_error error_func); #endif From d6ffc8d784d41977397a49ce5333cfe7db1e9c2c Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:09 +0100 Subject: [PATCH 06/10] add common fsck error printing function Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- fsck.c | 29 +++++++++++++++++++++++++++++ fsck.h | 2 ++ 2 files changed, 31 insertions(+) diff --git a/fsck.c b/fsck.c index 0680862151..6883d1bd68 100644 --- a/fsck.c +++ b/fsck.c @@ -304,3 +304,32 @@ int fsck_object(struct object *obj, int strict, fsck_error error_func) return error_func(obj, FSCK_ERROR, "unknown type '%d' (internal fsck error)", obj->type); } + +int fsck_error_function(struct object *obj, int type, const char *fmt, ...) +{ + va_list ap; + int len; + struct strbuf sb; + + strbuf_init(&sb, 0); + strbuf_addf(&sb, "object %s:", obj->sha1?sha1_to_hex(obj->sha1):"(null)"); + + va_start(ap, fmt); + len = vsnprintf(sb.buf + sb.len, strbuf_avail(&sb), fmt, ap); + va_end(ap); + + if (len < 0) + len = 0; + if (len >= strbuf_avail(&sb)) { + strbuf_grow(&sb, len + 2); + va_start(ap, fmt); + len = vsnprintf(sb.buf + sb.len, strbuf_avail(&sb), fmt, ap); + va_end(ap); + if (len >= strbuf_avail(&sb)) + die("this should not happen, your snprintf is broken"); + } + + error(sb.buf); + strbuf_release(&sb); + return 1; +} diff --git a/fsck.h b/fsck.h index 9bd37c9f1c..990ee02335 100644 --- a/fsck.h +++ b/fsck.h @@ -17,6 +17,8 @@ typedef int (*fsck_walk_func)(struct object *obj, int type, void *data); /* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */ typedef int (*fsck_error)(struct object *obj, int type, const char *err, ...); +int fsck_error_function(struct object *obj, int type, const char *fmt, ...); + /* descend in all linked child objects * the return value is: * -1 error in processing the object From 2add1e6db44c81e19cd4263317b53f3c1339e61b Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:10 +0100 Subject: [PATCH 07/10] unpack-object: cache for non written objects Preventing objects with broken links entering the repository means, that write of some objects must be delayed. This patch adds a cache to keep the object data in memory. The delta resolving code must also search in the cache. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- builtin-unpack-objects.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index 1e51865c52..50e07faa12 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -8,6 +8,7 @@ #include "tag.h" #include "tree.h" #include "progress.h" +#include "decorate.h" static int dry_run, quiet, recover, has_errors; static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file"; @@ -18,6 +19,18 @@ static unsigned int offset, len; static off_t consumed_bytes; static SHA_CTX ctx; +struct obj_buffer { + char *buffer; + unsigned long size; +}; + +static struct decoration obj_decorate; + +static struct obj_buffer *lookup_object_buffer(struct object *base) +{ + return lookup_decoration(&obj_decorate, base); +} + /* * Make sure at least "min" bytes are available in the buffer, and * return the pointer to the buffer. @@ -189,6 +202,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, void *delta_data, *base; unsigned long base_size; unsigned char base_sha1[20]; + struct object *obj; if (type == OBJ_REF_DELTA) { hashcpy(base_sha1, fill(20)); @@ -252,6 +266,15 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, } } + obj = lookup_object(base_sha1); + if (obj) { + struct obj_buffer *obj_buf = lookup_object_buffer(obj); + if (obj_buf) { + resolve_delta(nr, obj->type, obj_buf->buffer, obj_buf->size, delta_data, delta_size); + return; + } + } + base = read_sha1_file(base_sha1, &type, &base_size); if (!base) { error("failed to read delta-pack base object %s", From d5ef408b9afb5b4417f4e7e1593a96302d666650 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:11 +0100 Subject: [PATCH 08/10] unpack-objects: prevent writing of inconsistent objects This patch introduces a strict mode, which ensures that: - no malformed object will be written - no object with broken links will be written The patch ensures this by delaying the write of all non blob object. These object are written, after all objects they link to are written. An error can only result in unreferenced objects. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Documentation/git-unpack-objects.txt | 3 + builtin-unpack-objects.c | 110 +++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt index b79be3fd4c..3697896a06 100644 --- a/Documentation/git-unpack-objects.txt +++ b/Documentation/git-unpack-objects.txt @@ -40,6 +40,9 @@ OPTIONS and make the best effort to recover as many objects as possible. +--strict:: + Don't write objects with broken content or links. + Author ------ diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index 50e07faa12..9d2a854950 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -7,11 +7,13 @@ #include "commit.h" #include "tag.h" #include "tree.h" +#include "tree-walk.h" #include "progress.h" #include "decorate.h" +#include "fsck.h" -static int dry_run, quiet, recover, has_errors; -static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file"; +static int dry_run, quiet, recover, has_errors, strict; +static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] [--strict] < pack-file"; /* We always read in 4kB chunks. */ static unsigned char buffer[4096]; @@ -31,6 +33,16 @@ static struct obj_buffer *lookup_object_buffer(struct object *base) return lookup_decoration(&obj_decorate, base); } +static void add_object_buffer(struct object *object, char *buffer, unsigned long size) +{ + struct obj_buffer *obj; + obj = xcalloc(1, sizeof(struct obj_buffer)); + obj->buffer = buffer; + obj->size = size; + if (add_decoration(&obj_decorate, object, obj)) + die("object %s tried to add buffer twice!", sha1_to_hex(object->sha1)); +} + /* * Make sure at least "min" bytes are available in the buffer, and * return the pointer to the buffer. @@ -134,9 +146,58 @@ static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1, struct obj_info { off_t offset; unsigned char sha1[20]; + struct object *obj; }; +#define FLAG_OPEN (1u<<20) +#define FLAG_WRITTEN (1u<<21) + static struct obj_info *obj_list; +unsigned nr_objects; + +static void write_cached_object(struct object *obj) +{ + unsigned char sha1[20]; + struct obj_buffer *obj_buf = lookup_object_buffer(obj); + if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0) + die("failed to write object %s", sha1_to_hex(obj->sha1)); + obj->flags |= FLAG_WRITTEN; +} + +static int check_object(struct object *obj, int type, void *data) +{ + if (!obj) + return 0; + + if (obj->flags & FLAG_WRITTEN) + return 1; + + if (type != OBJ_ANY && obj->type != type) + die("object type mismatch"); + + if (!(obj->flags & FLAG_OPEN)) { + unsigned long size; + int type = sha1_object_info(obj->sha1, &size); + if (type != obj->type || type <= 0) + die("object of unexpected type"); + obj->flags |= FLAG_WRITTEN; + return 1; + } + + if (fsck_object(obj, 1, fsck_error_function)) + die("Error in object"); + if (!fsck_walk(obj, check_object, 0)) + die("Error on reachable objects of %s", sha1_to_hex(obj->sha1)); + write_cached_object(obj); + return 1; +} + +static void write_rest(void) +{ + unsigned i; + for (i = 0; i < nr_objects; i++) + check_object(obj_list[i].obj, OBJ_ANY, 0); +} static void added_object(unsigned nr, enum object_type type, void *data, unsigned long size); @@ -144,9 +205,36 @@ static void added_object(unsigned nr, enum object_type type, static void write_object(unsigned nr, enum object_type type, void *buf, unsigned long size) { - if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) - die("failed to write object"); added_object(nr, type, buf, size); + if (!strict) { + if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) + die("failed to write object"); + free(buf); + obj_list[nr].obj = 0; + } else if (type == OBJ_BLOB) { + struct blob *blob; + if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) + die("failed to write object"); + free(buf); + + blob = lookup_blob(obj_list[nr].sha1); + if (blob) + blob->object.flags |= FLAG_WRITTEN; + else + die("invalid blob object"); + obj_list[nr].obj = 0; + } else { + struct object *obj; + int eaten; + hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1); + obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten); + if (!obj) + die("invalid %s", typename(type)); + /* buf is stored via add_object_buffer and in obj, if its a tree or commit */ + add_object_buffer(obj, buf, size); + obj->flags |= FLAG_OPEN; + obj_list[nr].obj = obj; + } } static void resolve_delta(unsigned nr, enum object_type type, @@ -163,7 +251,6 @@ static void resolve_delta(unsigned nr, enum object_type type, die("failed to apply delta"); free(delta); write_object(nr, type, result, result_size); - free(result); } static void added_object(unsigned nr, enum object_type type, @@ -193,7 +280,8 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, if (!dry_run && buf) write_object(nr, type, buf, size); - free(buf); + else + free(buf); } static void unpack_delta_entry(enum object_type type, unsigned long delta_size, @@ -336,7 +424,8 @@ static void unpack_all(void) int i; struct progress *progress = NULL; struct pack_header *hdr = fill(sizeof(struct pack_header)); - unsigned nr_objects = ntohl(hdr->hdr_entries); + + nr_objects = ntohl(hdr->hdr_entries); if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE) die("bad pack file"); @@ -347,6 +436,7 @@ static void unpack_all(void) if (!quiet) progress = start_progress("Unpacking objects", nr_objects); obj_list = xmalloc(nr_objects * sizeof(*obj_list)); + memset(obj_list, 0, nr_objects * sizeof(*obj_list)); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); @@ -382,6 +472,10 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) recover = 1; continue; } + if (!strcmp(arg, "--strict")) { + strict = 1; + continue; + } if (!prefixcmp(arg, "--pack_header=")) { struct pack_header *hdr; char *c; @@ -407,6 +501,8 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) unpack_all(); SHA1_Update(&ctx, buffer, offset); SHA1_Final(sha1, &ctx); + if (strict) + write_rest(); if (hashcmp(fill(20), sha1)) die("final sha1 did not match"); use(20); From 0153be05ae332b8df9bb21d8d249881323e30725 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:12 +0100 Subject: [PATCH 09/10] index-pack: introduce checking mode Adds strict option, which bails out if the pack would introduces broken object or links in the repository. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Documentation/git-index-pack.txt | 3 ++ index-pack.c | 88 +++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt index 72b5d00116..a7825b6144 100644 --- a/Documentation/git-index-pack.txt +++ b/Documentation/git-index-pack.txt @@ -75,6 +75,9 @@ OPTIONS to force the version for the generated pack index, and to force 64-bit index entries on objects located above the given offset. +--strict:: + Die, if the pack contains broken objects or links. + Note ---- diff --git a/index-pack.c b/index-pack.c index 9fd6982a97..9c0c27813f 100644 --- a/index-pack.c +++ b/index-pack.c @@ -7,9 +7,10 @@ #include "tag.h" #include "tree.h" #include "progress.h" +#include "fsck.h" static const char index_pack_usage[] = -"git-index-pack [-v] [-o ] [{ ---keep | --keep= }] { | --stdin [--fix-thin] [] }"; +"git-index-pack [-v] [-o ] [{ ---keep | --keep= }] [--strict] { | --stdin [--fix-thin] [] }"; struct object_entry { @@ -31,6 +32,9 @@ union delta_base { */ #define UNION_BASE_SZ 20 +#define FLAG_LINK (1u<<20) +#define FLAG_CHECKED (1u<<21) + struct delta_entry { union delta_base base; @@ -44,6 +48,7 @@ static int nr_deltas; static int nr_resolved_deltas; static int from_stdin; +static int strict; static int verbose; static struct progress *progress; @@ -56,6 +61,48 @@ static SHA_CTX input_ctx; static uint32_t input_crc32; static int input_fd, output_fd, pack_fd; +static int mark_link(struct object *obj, int type, void *data) +{ + if (!obj) + return -1; + + if (type != OBJ_ANY && obj->type != type) + die("object type mismatch at %s", sha1_to_hex(obj->sha1)); + + obj->flags |= FLAG_LINK; + return 0; +} + +/* The content of each linked object must have been checked + or it must be already present in the object database */ +static void check_object(struct object *obj) +{ + if (!obj) + return; + + if (!(obj->flags & FLAG_LINK)) + return; + + if (!(obj->flags & FLAG_CHECKED)) { + unsigned long size; + int type = sha1_object_info(obj->sha1, &size); + if (type != obj->type || type <= 0) + die("object of unexpected type"); + obj->flags |= FLAG_CHECKED; + return; + } +} + +static void check_objects(void) +{ + unsigned i, max; + + max = get_max_object_index(); + for (i = 0; i < max; i++) + check_object(get_indexed_object(i)); +} + + /* Discard current buffer used content. */ static void flush(void) { @@ -341,6 +388,41 @@ static void sha1_object(const void *data, unsigned long size, die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1)); free(has_data); } + if (strict) { + if (type == OBJ_BLOB) { + struct blob *blob = lookup_blob(sha1); + if (blob) + blob->object.flags |= FLAG_CHECKED; + else + die("invalid blob object %s", sha1_to_hex(sha1)); + } else { + struct object *obj; + int eaten; + void *buf = (void *) data; + + /* + * we do not need to free the memory here, as the + * buf is deleted by the caller. + */ + obj = parse_object_buffer(sha1, type, size, buf, &eaten); + if (!obj) + die("invalid %s", typename(type)); + if (fsck_object(obj, 1, fsck_error_function)) + die("Error in object"); + if (fsck_walk(obj, mark_link, 0)) + die("Not all child objects of %s are reachable", sha1_to_hex(obj->sha1)); + + if (obj->type == OBJ_TREE) { + struct tree *item = (struct tree *) obj; + item->buffer = NULL; + } + if (obj->type == OBJ_COMMIT) { + struct commit *commit = (struct commit *) obj; + commit->buffer = NULL; + } + obj->flags |= FLAG_CHECKED; + } + } } static void resolve_delta(struct object_entry *delta_obj, void *base_data, @@ -714,6 +796,8 @@ int main(int argc, char **argv) from_stdin = 1; } else if (!strcmp(arg, "--fix-thin")) { fix_thin_pack = 1; + } else if (!strcmp(arg, "--strict")) { + strict = 1; } else if (!strcmp(arg, "--keep")) { keep_msg = ""; } else if (!prefixcmp(arg, "--keep=")) { @@ -812,6 +896,8 @@ int main(int argc, char **argv) nr_deltas - nr_resolved_deltas); } free(deltas); + if (strict) + check_objects(); idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *)); for (i = 0; i < nr_objects; i++) From 28f72a0f232dfc71b3be726e7e71d0a6d5f9ebba Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:13 +0100 Subject: [PATCH 10/10] receive-pack: use strict mode for unpacking objects Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Documentation/config.txt | 6 ++++++ receive-pack.c | 36 +++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 6d8cca46ab..72d3a345ec 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -860,6 +860,12 @@ imap:: The configuration variables in the 'imap' section are described in linkgit:git-imap-send[1]. +receive.fsckObjects:: + If it is set to true, git-receive-pack will check all received + objects. It will abort in the case of a malformed object or a + broken link. The result of an abort are only dangling objects. + The default value is true. + receive.unpackLimit:: If the number of objects received in a push is below this limit then the objects will be unpacked into loose object diff --git a/receive-pack.c b/receive-pack.c index 3267495832..f5440ff4d4 100644 --- a/receive-pack.c +++ b/receive-pack.c @@ -10,6 +10,7 @@ static const char receive_pack_usage[] = "git-receive-pack "; static int deny_non_fast_forwards = 0; +static int receive_fsck_objects = 1; static int receive_unpack_limit = -1; static int transfer_unpack_limit = -1; static int unpack_limit = 100; @@ -35,6 +36,11 @@ static int receive_pack_config(const char *var, const char *value) return 0; } + if (strcmp(var, "receive.fsckobjects") == 0) { + receive_fsck_objects = git_config_bool(var, value); + return 0; + } + return git_default_config(var, value); } @@ -367,11 +373,13 @@ static const char *unpack(void) ntohl(hdr.hdr_version), ntohl(hdr.hdr_entries)); if (ntohl(hdr.hdr_entries) < unpack_limit) { - int code; - const char *unpacker[3]; - unpacker[0] = "unpack-objects"; - unpacker[1] = hdr_arg; - unpacker[2] = NULL; + int code, i = 0; + const char *unpacker[4]; + unpacker[i++] = "unpack-objects"; + if (receive_fsck_objects) + unpacker[i++] = "--strict"; + unpacker[i++] = hdr_arg; + unpacker[i++] = NULL; code = run_command_v_opt(unpacker, RUN_GIT_CMD); switch (code) { case 0: @@ -392,8 +400,8 @@ static const char *unpack(void) return "unpacker exited with error code"; } } else { - const char *keeper[6]; - int s, status; + const char *keeper[7]; + int s, status, i = 0; char keep_arg[256]; struct child_process ip; @@ -401,12 +409,14 @@ static const char *unpack(void) if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) strcpy(keep_arg + s, "localhost"); - keeper[0] = "index-pack"; - keeper[1] = "--stdin"; - keeper[2] = "--fix-thin"; - keeper[3] = hdr_arg; - keeper[4] = keep_arg; - keeper[5] = NULL; + keeper[i++] = "index-pack"; + keeper[i++] = "--stdin"; + if (receive_fsck_objects) + keeper[i++] = "--strict"; + keeper[i++] = "--fix-thin"; + keeper[i++] = hdr_arg; + keeper[i++] = keep_arg; + keeper[i++] = NULL; memset(&ip, 0, sizeof(ip)); ip.argv = keeper; ip.out = -1;