From 09d920831edba17fe983269a9d0d85e592dd4bb3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 21 Sep 2005 02:46:32 -0700 Subject: [PATCH 01/11] Make object creation in http fetch a bit safer. Unlike write_sha1_file() that tries to create the object file in a temporary location and then move it to the final location, fetch_object could have been interrupted in the middle, leaving a corrupt file. Signed-off-by: Junio C Hamano --- http-fetch.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/http-fetch.c b/http-fetch.c index 77f530c95d..57141a8a29 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -350,13 +350,18 @@ int fetch_object(struct alt_base *repo, unsigned char *sha1) char *hex = sha1_to_hex(sha1); char *filename = sha1_file_name(sha1); unsigned char real_sha1[20]; + char tmpfile[PATH_MAX]; + int ret; char *url; char *posn; - local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666); + snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX", + get_object_directory()); + local = mkstemp(tmpfile); if (local < 0) - return error("Couldn't open local object %s\n", filename); + return error("Couldn't create temporary file %s for %s: %s\n", + tmpfile, filename, strerror(errno)); memset(&stream, 0, sizeof(stream)); @@ -386,18 +391,32 @@ int fetch_object(struct alt_base *repo, unsigned char *sha1) return -1; } + fchmod(local, 0444); close(local); inflateEnd(&stream); SHA1_Final(real_sha1, &c); if (zret != Z_STREAM_END) { - unlink(filename); + unlink(tmpfile); return error("File %s (%s) corrupt\n", hex, url); } if (memcmp(sha1, real_sha1, 20)) { - unlink(filename); + unlink(tmpfile); return error("File %s has bad hash\n", hex); } - + ret = link(tmpfile, filename); + if (ret < 0) { + /* Same Coda hack as in write_sha1_file(sha1_file.c) */ + ret = errno; + if (ret == EXDEV && !rename(tmpfile, filename)) + goto out; + } + unlink(tmpfile); + if (ret) { + if (ret != EEXIST) + return error("unable to write sha1 filename %s: %s", + filename, strerror(ret)); + } + out: pull_say("got %s\n", hex); return 0; } From b163512d4eb36ee946908b682c7863658c5a8db4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 21 Sep 2005 12:29:59 -0700 Subject: [PATCH 02/11] Fix documentation dependency. Randal L. Schwartz noticed that 'make install' does not rebuild what is installed. Make the 'install' rule depend on 'man'. I noticed also 'touch' of the source files were used to express include dependencies, which is a no-no. Rewrite it to do dependencies properly, and add missing include dependencies while we are at it. Signed-off-by: Junio C Hamano --- Documentation/Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/Makefile b/Documentation/Makefile index 37b7fcb97d..01fad8ea5d 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -44,14 +44,16 @@ man: man1 man7 man1: $(DOC_MAN1) man7: $(DOC_MAN7) -install: +install: man $(INSTALL) -d -m755 $(DESTDIR)/$(man1) $(DESTDIR)/$(man7) $(INSTALL) $(DOC_MAN1) $(DESTDIR)/$(man1) $(INSTALL) $(DOC_MAN7) $(DESTDIR)/$(man7) # 'include' dependencies -git-diff-%.txt: diff-format.txt diff-options.txt - touch $@ +$(patsubst %.txt,%.1,$(wildcard git-diff-*.txt)): \ + diff-format.txt diff-options.txt +$(patsubst %,%.1,git-fetch git-pull git-push): pull-fetch-param.txt +git.7: ../README clean: rm -f *.xml *.html *.1 *.7 howto-index.txt howto/*.html From bc7ccfd2d7342bf81d344c4215a17ebbb64d9212 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 19 Sep 2005 23:52:33 -0700 Subject: [PATCH 03/11] Do not give alarming error message from rsync in fetch and clone. When we check the optional objects/info/alternates file at the remote repository, we forgot to really squelch error message from rsync. Not having that file is not a crime. Signed-off-by: Junio C Hamano (cherry picked from 89d844d084f14bc9506f63cd3c9aa44b21b49067 commit) --- git-clone.sh | 3 ++- git-fetch.sh | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/git-clone.sh b/git-clone.sh index bfb8fd6285..44135f489d 100755 --- a/git-clone.sh +++ b/git-clone.sh @@ -152,7 +152,8 @@ yes,yes) # Look at objects/info/alternates for rsync -- http will # support it natively and git native ones will do it on the # remote end. Not having that file is not a crime. - rsync -q "$repo/objects/info/alternates" "$D/.git/TMP_ALT" || + rsync -q "$repo/objects/info/alternates" \ + "$D/.git/TMP_ALT" 2>/dev/null || rm -f "$D/.git/TMP_ALT" if test -f "$D/.git/TMP_ALT" then diff --git a/git-fetch.sh b/git-fetch.sh index 72f17ab6c9..822b4cd982 100755 --- a/git-fetch.sh +++ b/git-fetch.sh @@ -193,8 +193,9 @@ do # Look at objects/info/alternates for rsync -- http will # support it natively and git native ones will do it on the remote # end. Not having that file is not a crime. - rsync -q "$remote/objects/info/alternates" "$GIT_DIR/TMP_ALT" || - rm -f "$GIT_DIR/TMP_ALT" + rsync -q "$remote/objects/info/alternates" \ + "$GIT_DIR/TMP_ALT" 2>/dev/null || + rm -f "$GIT_DIR/TMP_ALT" if test -f "$GIT_DIR/TMP_ALT" then resolve_alternates "$remote" <"$GIT_DIR/TMP_ALT" | From 80077f071614ef1775472eb54d59c071e15d5784 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 21 Sep 2005 20:33:54 +0400 Subject: [PATCH 04/11] [PATCH] fetch.c: Remove useless lookup_object_type() call in process() In all places where process() is called except the one in pull() (which is executed only once) the pointer to the object is already available, so pass it as the argument to process() instead of sha1 and avoid an unneeded call to lookup_object_type(). Signed-off-by: Sergey Vlasov Signed-off-by: Junio C Hamano --- fetch.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/fetch.c b/fetch.c index af9a013bd2..be3dc0a9f8 100644 --- a/fetch.c +++ b/fetch.c @@ -33,7 +33,7 @@ static void report_missing(const char *what, const unsigned char *missing) what, missing_hex, sha1_to_hex(current_commit_sha1)); } -static int process(unsigned char *sha1, const char *type); +static int process(struct object *obj); static int process_tree(struct tree *tree) { @@ -46,8 +46,7 @@ static int process_tree(struct tree *tree) tree->entries = NULL; while (entry) { struct tree_entry_list *next = entry->next; - if (process(entry->item.any->sha1, - entry->directory ? tree_type : blob_type)) + if (process(entry->item.any)) return -1; free(entry); entry = next; @@ -79,7 +78,7 @@ static int process_commit(struct commit *commit) pull_say("walk %s\n", sha1_to_hex(commit->object.sha1)); if (get_tree) { - if (process(commit->tree->object.sha1, tree_type)) + if (process(&commit->tree->object)) return -1; if (!get_all) get_tree = 0; @@ -87,7 +86,7 @@ static int process_commit(struct commit *commit) if (get_history) { struct commit_list *parents = commit->parents; for (; parents; parents = parents->next) { - if (process(parents->item->object.sha1, commit_type)) + if (process(&parents->item->object)) return -1; } } @@ -98,7 +97,7 @@ static int process_tag(struct tag *tag) { if (parse_tag(tag)) return -1; - return process(tag->tagged->sha1, NULL); + return process(tag->tagged); } static struct object_list *process_queue = NULL; @@ -133,12 +132,10 @@ static int process_object(struct object *obj) obj->type, sha1_to_hex(obj->sha1)); } -static int process(unsigned char *sha1, const char *type) +static int process(struct object *obj) { - struct object *obj = lookup_object_type(sha1, type); - - if (has_sha1_file(sha1)) { - parse_object(sha1); + if (has_sha1_file(obj->sha1)) { + parse_object(obj->sha1); /* We already have it, so we should scan it now. */ if (obj->flags & (SCANNED | TO_SCAN)) return 0; @@ -153,7 +150,7 @@ static int process(unsigned char *sha1, const char *type) process_queue_end = &(*process_queue_end)->next; obj->flags |= TO_FETCH; - prefetch(sha1); + prefetch(obj->sha1); return 0; } @@ -228,7 +225,7 @@ int pull(char *target) if (interpret_target(target, sha1)) return error("Could not interpret %s as something to pull", target); - if (process(sha1, NULL)) + if (process(lookup_unknown_object(sha1))) return -1; if (loop()) return -1; From a82d07e5e61a9aef38d70277a5b27588a939c5ce Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 21 Sep 2005 20:33:59 +0400 Subject: [PATCH 05/11] [PATCH] fetch.c: Make process() look at each object only once The process() function is very often called multiple times for the same object (because lots of trees refer to the same blobs), but did not have a fast check for this, therefore a lot of useless calls to has_sha1_file() and parse_object() were made before discovering that nothing needs to be done. This patch adds the SEEN flag which is used in process() to make it look at each object only once. When testing git-local-fetch on the repository of GIT, this gives a 14x improvement in CPU usage (mainly because the redundant calls to parse_object() are now avoided - parse_object() always unpacks and parses the object data, even if it was already parsed before). Signed-off-by: Sergey Vlasov Signed-off-by: Junio C Hamano --- fetch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fetch.c b/fetch.c index be3dc0a9f8..bcdacaf77f 100644 --- a/fetch.c +++ b/fetch.c @@ -58,6 +58,7 @@ static int process_tree(struct tree *tree) #define TO_FETCH 2U #define TO_SCAN 4U #define SCANNED 8U +#define SEEN 16U static struct commit_list *complete = NULL; @@ -134,6 +135,10 @@ static int process_object(struct object *obj) static int process(struct object *obj) { + if (obj->flags & SEEN) + return 0; + obj->flags |= SEEN; + if (has_sha1_file(obj->sha1)) { parse_object(obj->sha1); /* We already have it, so we should scan it now. */ From 754ac00e710386ab575843225768d4885c11c8c4 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 21 Sep 2005 20:34:04 +0400 Subject: [PATCH 06/11] [PATCH] fetch.c: Remove redundant SCANNED flag After adding the SEEN flag, the SCANNED flag became obviously redundant - each object can get into process_queue through process() only once, and therefore multiple calls to process_object() for the same object are not possible. Signed-off-by: Sergey Vlasov Signed-off-by: Junio C Hamano --- fetch.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fetch.c b/fetch.c index bcdacaf77f..a1748e0b3d 100644 --- a/fetch.c +++ b/fetch.c @@ -57,7 +57,6 @@ static int process_tree(struct tree *tree) #define COMPLETE 1U #define TO_FETCH 2U #define TO_SCAN 4U -#define SCANNED 8U #define SEEN 16U static struct commit_list *complete = NULL; @@ -106,10 +105,6 @@ static struct object_list **process_queue_end = &process_queue; static int process_object(struct object *obj) { - if (obj->flags & SCANNED) - return 0; - obj->flags |= SCANNED; - if (obj->type == commit_type) { if (process_commit((struct commit *)obj)) return -1; @@ -142,7 +137,7 @@ static int process(struct object *obj) if (has_sha1_file(obj->sha1)) { parse_object(obj->sha1); /* We already have it, so we should scan it now. */ - if (obj->flags & (SCANNED | TO_SCAN)) + if (obj->flags & TO_SCAN) return 0; object_list_insert(obj, process_queue_end); process_queue_end = &(*process_queue_end)->next; From 51d8faf8608aa8d2f6a8c4b3c1b712adb0d39325 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 21 Sep 2005 20:34:09 +0400 Subject: [PATCH 07/11] [PATCH] fetch.c: Remove redundant TO_FETCH flag The TO_FETCH flag also became redundant after adding the SEEN flag - it was set and checked in process() to prevent adding the same object to process_queue multiple times, but now SEEN guards against this. Signed-off-by: Sergey Vlasov Signed-off-by: Junio C Hamano --- fetch.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fetch.c b/fetch.c index a1748e0b3d..390de99f2a 100644 --- a/fetch.c +++ b/fetch.c @@ -55,7 +55,6 @@ static int process_tree(struct tree *tree) } #define COMPLETE 1U -#define TO_FETCH 2U #define TO_SCAN 4U #define SEEN 16U @@ -144,11 +143,10 @@ static int process(struct object *obj) obj->flags |= TO_SCAN; return 0; } - if (obj->flags & (COMPLETE | TO_FETCH)) + if (obj->flags & COMPLETE) return 0; object_list_insert(obj, process_queue_end); process_queue_end = &(*process_queue_end)->next; - obj->flags |= TO_FETCH; prefetch(obj->sha1); From 7b64d06b2e74612a0970c8563845cb9ee34724af Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 21 Sep 2005 20:34:14 +0400 Subject: [PATCH 08/11] [PATCH] fetch.c: Remove some duplicated code in process() It does not matter if we call prefetch() or set the TO_SCAN flag before or after adding the object to process_queue. However, doing it before object_list_insert() allows us to kill 3 lines of duplicated code. Signed-off-by: Sergey Vlasov Signed-off-by: Junio C Hamano --- fetch.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fetch.c b/fetch.c index 390de99f2a..3074f5f356 100644 --- a/fetch.c +++ b/fetch.c @@ -138,18 +138,15 @@ static int process(struct object *obj) /* We already have it, so we should scan it now. */ if (obj->flags & TO_SCAN) return 0; - object_list_insert(obj, process_queue_end); - process_queue_end = &(*process_queue_end)->next; obj->flags |= TO_SCAN; - return 0; + } else { + if (obj->flags & COMPLETE) + return 0; + prefetch(obj->sha1); } - if (obj->flags & COMPLETE) - return 0; + object_list_insert(obj, process_queue_end); process_queue_end = &(*process_queue_end)->next; - - prefetch(obj->sha1); - return 0; } From 2449696bcd8b458ae604c35898ecab7271b97b40 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 21 Sep 2005 20:34:19 +0400 Subject: [PATCH 09/11] [PATCH] fetch.c: Remove redundant test of TO_SCAN in process() If the SEEN flag was not set, the TO_SCAN flag cannot be set, therefore testing it is pointless. Signed-off-by: Sergey Vlasov Signed-off-by: Junio C Hamano --- fetch.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fetch.c b/fetch.c index 3074f5f356..21b373d198 100644 --- a/fetch.c +++ b/fetch.c @@ -136,8 +136,6 @@ static int process(struct object *obj) if (has_sha1_file(obj->sha1)) { parse_object(obj->sha1); /* We already have it, so we should scan it now. */ - if (obj->flags & TO_SCAN) - return 0; obj->flags |= TO_SCAN; } else { if (obj->flags & COMPLETE) From 24451c31032d4ea3e7750b5f9c61e9c9f1657449 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 21 Sep 2005 20:34:24 +0400 Subject: [PATCH 10/11] [PATCH] fetch.c: Clean up object flag definitions Remove holes left after deleting flags, and use shifts to emphasize that flags are single bits. Signed-off-by: Sergey Vlasov Signed-off-by: Junio C Hamano --- fetch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fetch.c b/fetch.c index 21b373d198..b92ca8038e 100644 --- a/fetch.c +++ b/fetch.c @@ -54,9 +54,9 @@ static int process_tree(struct tree *tree) return 0; } -#define COMPLETE 1U -#define TO_SCAN 4U -#define SEEN 16U +#define COMPLETE (1U << 0) +#define SEEN (1U << 1) +#define TO_SCAN (1U << 2) static struct commit_list *complete = NULL; From 2c08b3638339f9b73128c41a4882e115222608a3 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 21 Sep 2005 20:34:29 +0400 Subject: [PATCH 11/11] [PATCH] fetch.c: Remove call to parse_object() from process() The call to parse_object() in process() is not actually needed - if the object type is unknown, parse_object() will be called by loop(); if the type is known, the object will be parsed by the appropriate process_*() function. After this change blobs which exist locally are no longer parsed, which gives about 2x CPU usage improvement; the downside is that there will be no warnings for existing corrupted blobs, but detecting such corruption is the job of git-fsck-objects, not the fetch programs. Newly fetched objects are still checked for corruption in http-fetch.c and ssh-fetch.c (local-fetch.c does not seem to do it, but the removed parse_object() call would not be reached for new objects anyway). Signed-off-by: Sergey Vlasov Signed-off-by: Junio C Hamano --- fetch.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fetch.c b/fetch.c index b92ca8038e..e6fd624c12 100644 --- a/fetch.c +++ b/fetch.c @@ -134,7 +134,6 @@ static int process(struct object *obj) obj->flags |= SEEN; if (has_sha1_file(obj->sha1)) { - parse_object(obj->sha1); /* We already have it, so we should scan it now. */ obj->flags |= TO_SCAN; } else {