Merge branch 'jk/fetch-quick-tag-following'

When fetching from a remote that has many tags that are irrelevant
to branches we are following, we used to waste way too many cycles
when checking if the object pointed at by a tag (that we are not
going to fetch!) exists in our repository too carefully.

* jk/fetch-quick-tag-following:
  fetch: use "quick" has_sha1_file for tag following
This commit is contained in:
Junio C Hamano 2016-10-26 13:14:47 -07:00
commit 9fcd14491d
4 changed files with 112 additions and 4 deletions

View File

@ -241,9 +241,10 @@ static void find_non_local_tags(struct transport *transport,
* as one to ignore by setting util to NULL.
*/
if (ends_with(ref->name, "^{}")) {
if (item && !has_object_file(&ref->old_oid) &&
if (item &&
!has_object_file_with_flags(&ref->old_oid, HAS_SHA1_QUICK) &&
!will_fetch(head, ref->old_oid.hash) &&
!has_sha1_file(item->util) &&
!has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;
item = NULL;
@ -256,7 +257,8 @@ static void find_non_local_tags(struct transport *transport,
* to check if it is a lightweight tag that we want to
* fetch.
*/
if (item && !has_sha1_file(item->util) &&
if (item &&
!has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;
@ -276,7 +278,8 @@ static void find_non_local_tags(struct transport *transport,
* We may have a final lightweight tag that needs to be
* checked to see if it needs fetching.
*/
if (item && !has_sha1_file(item->util) &&
if (item &&
!has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;

View File

@ -1157,6 +1157,7 @@ static inline int has_sha1_file(const unsigned char *sha1)
/* Same as the above, except for struct object_id. */
extern int has_object_file(const struct object_id *oid);
extern int has_object_file_with_flags(const struct object_id *oid, int flags);
/*
* Return true iff an alternate object database has a loose object

View File

@ -3335,6 +3335,11 @@ int has_object_file(const struct object_id *oid)
return has_sha1_file(oid->hash);
}
int has_object_file_with_flags(const struct object_id *oid, int flags)
{
return has_sha1_file_with_flags(oid->hash, flags);
}
static void check_tree(const void *buf, size_t size)
{
struct tree_desc desc;

99
t/perf/p5550-fetch-tags.sh Executable file
View File

@ -0,0 +1,99 @@
#!/bin/sh
test_description='performance of tag-following with many tags
This tests a fairly pathological case, so rather than rely on a real-world
case, we will construct our own repository. The situation is roughly as
follows.
The parent repository has a large number of tags which are disconnected from
the rest of history. That makes them candidates for tag-following, but we never
actually grab them (and thus they will impact each subsequent fetch).
The child repository is a clone of parent, without the tags, and is at least
one commit behind the parent (meaning that we will fetch one object and then
examine the tags to see if they need followed). Furthermore, it has a large
number of packs.
The exact values of "large" here are somewhat arbitrary; I picked values that
start to show a noticeable performance problem on my machine, but without
taking too long to set up and run the tests.
'
. ./perf-lib.sh
# make a long nonsense history on branch $1, consisting of $2 commits, each
# with a unique file pointing to the blob at $2.
create_history () {
perl -le '
my ($branch, $n, $blob) = @ARGV;
for (1..$n) {
print "commit refs/heads/$branch";
print "committer nobody <nobody@example.com> now";
print "data 4";
print "foo";
print "M 100644 $blob $_";
}
' "$@" |
git fast-import --date-format=now
}
# make a series of tags, one per commit in the revision range given by $@
create_tags () {
git rev-list "$@" |
perl -lne 'print "create refs/tags/$. $_"' |
git update-ref --stdin
}
# create $1 nonsense packs, each with a single blob
create_packs () {
perl -le '
my ($n) = @ARGV;
for (1..$n) {
print "blob";
print "data <<EOF";
print "$_";
print "EOF";
}
' "$@" |
git fast-import &&
git cat-file --batch-all-objects --batch-check='%(objectname)' |
while read sha1
do
echo $sha1 | git pack-objects .git/objects/pack/pack
done
}
test_expect_success 'create parent and child' '
git init parent &&
git -C parent commit --allow-empty -m base &&
git clone parent child &&
git -C parent commit --allow-empty -m trigger-fetch
'
test_expect_success 'populate parent tags' '
(
cd parent &&
blob=$(echo content | git hash-object -w --stdin) &&
create_history cruft 3000 $blob &&
create_tags cruft &&
git branch -D cruft
)
'
test_expect_success 'create child packs' '
(
cd child &&
git config gc.auto 0 &&
git config gc.autopacklimit 0 &&
create_packs 500
)
'
test_perf 'fetch' '
# make sure there is something to fetch on each iteration
git -C child update-ref -d refs/remotes/origin/master &&
git -C child fetch
'
test_done