Merge branch 'jt/fetch-negotiator-skipping'
Add a server-side knob to skip commits in exponential/fibbonacci stride in an attempt to cover wider swath of history with a smaller number of iterations, potentially accepting a larger packfile transfer, instead of going back one commit a time during common ancestor discovery during the "git fetch" transaction. * jt/fetch-negotiator-skipping: negotiator/skipping: skip commits during fetch
This commit is contained in:
commit
7c85ee6c58
@ -1531,6 +1531,15 @@ fetch.output::
|
||||
`full` and `compact`. Default value is `full`. See section
|
||||
OUTPUT in linkgit:git-fetch[1] for detail.
|
||||
|
||||
fetch.negotiationAlgorithm::
|
||||
Control how information about the commits in the local repository is
|
||||
sent when negotiating the contents of the packfile to be sent by the
|
||||
server. Set to "skipping" to use an algorithm that skips commits in an
|
||||
effort to converge faster, but may result in a larger-than-necessary
|
||||
packfile; any other value instructs Git to use the default algorithm
|
||||
that never skips commits (unless the server has acknowledged it or one
|
||||
of its descendants).
|
||||
|
||||
format.attach::
|
||||
Enable multipart/mixed attachments as the default for
|
||||
'format-patch'. The value can also be a double quoted string
|
||||
|
1
Makefile
1
Makefile
@ -893,6 +893,7 @@ LIB_OBJS += merge-recursive.o
|
||||
LIB_OBJS += mergesort.o
|
||||
LIB_OBJS += name-hash.o
|
||||
LIB_OBJS += negotiator/default.o
|
||||
LIB_OBJS += negotiator/skipping.o
|
||||
LIB_OBJS += notes.o
|
||||
LIB_OBJS += notes-cache.o
|
||||
LIB_OBJS += notes-merge.o
|
||||
|
@ -1,8 +1,14 @@
|
||||
#include "git-compat-util.h"
|
||||
#include "fetch-negotiator.h"
|
||||
#include "negotiator/default.h"
|
||||
#include "negotiator/skipping.h"
|
||||
|
||||
void fetch_negotiator_init(struct fetch_negotiator *negotiator)
|
||||
void fetch_negotiator_init(struct fetch_negotiator *negotiator,
|
||||
const char *algorithm)
|
||||
{
|
||||
if (algorithm && !strcmp(algorithm, "skipping")) {
|
||||
skipping_negotiator_init(negotiator);
|
||||
return;
|
||||
}
|
||||
default_negotiator_init(negotiator);
|
||||
}
|
||||
|
@ -52,6 +52,7 @@ struct fetch_negotiator {
|
||||
void *data;
|
||||
};
|
||||
|
||||
void fetch_negotiator_init(struct fetch_negotiator *negotiator);
|
||||
void fetch_negotiator_init(struct fetch_negotiator *negotiator,
|
||||
const char *algorithm);
|
||||
|
||||
#endif
|
||||
|
@ -35,6 +35,7 @@ static int agent_supported;
|
||||
static int server_supports_filtering;
|
||||
static struct lock_file shallow_lock;
|
||||
static const char *alternate_shallow_file;
|
||||
static char *negotiation_algorithm;
|
||||
|
||||
/* Remember to update object flag allocation in object.h */
|
||||
#define COMPLETE (1U << 0)
|
||||
@ -913,7 +914,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
|
||||
const char *agent_feature;
|
||||
int agent_len;
|
||||
struct fetch_negotiator negotiator;
|
||||
fetch_negotiator_init(&negotiator);
|
||||
fetch_negotiator_init(&negotiator, negotiation_algorithm);
|
||||
|
||||
sort_ref_list(&ref, ref_compare_name);
|
||||
QSORT(sought, nr_sought, cmp_ref_by_name);
|
||||
@ -1324,7 +1325,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
|
||||
int in_vain = 0;
|
||||
int haves_to_send = INITIAL_FLUSH;
|
||||
struct fetch_negotiator negotiator;
|
||||
fetch_negotiator_init(&negotiator);
|
||||
fetch_negotiator_init(&negotiator, negotiation_algorithm);
|
||||
packet_reader_init(&reader, fd[0], NULL, 0,
|
||||
PACKET_READ_CHOMP_NEWLINE);
|
||||
|
||||
@ -1406,6 +1407,8 @@ static void fetch_pack_config(void)
|
||||
git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta);
|
||||
git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects);
|
||||
git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects);
|
||||
git_config_get_string("fetch.negotiationalgorithm",
|
||||
&negotiation_algorithm);
|
||||
|
||||
git_config(git_default_config, NULL);
|
||||
}
|
||||
|
250
negotiator/skipping.c
Normal file
250
negotiator/skipping.c
Normal file
@ -0,0 +1,250 @@
|
||||
#include "cache.h"
|
||||
#include "skipping.h"
|
||||
#include "../commit.h"
|
||||
#include "../fetch-negotiator.h"
|
||||
#include "../prio-queue.h"
|
||||
#include "../refs.h"
|
||||
#include "../tag.h"
|
||||
|
||||
/* Remember to update object flag allocation in object.h */
|
||||
/*
|
||||
* Both us and the server know that both parties have this object.
|
||||
*/
|
||||
#define COMMON (1U << 2)
|
||||
/*
|
||||
* The server has told us that it has this object. We still need to tell the
|
||||
* server that we have this object (or one of its descendants), but since we are
|
||||
* going to do that, we do not need to tell the server about its ancestors.
|
||||
*/
|
||||
#define ADVERTISED (1U << 3)
|
||||
/*
|
||||
* This commit has entered the priority queue.
|
||||
*/
|
||||
#define SEEN (1U << 4)
|
||||
/*
|
||||
* This commit has left the priority queue.
|
||||
*/
|
||||
#define POPPED (1U << 5)
|
||||
|
||||
static int marked;
|
||||
|
||||
/*
|
||||
* An entry in the priority queue.
|
||||
*/
|
||||
struct entry {
|
||||
struct commit *commit;
|
||||
|
||||
/*
|
||||
* Used only if commit is not COMMON.
|
||||
*/
|
||||
uint16_t original_ttl;
|
||||
uint16_t ttl;
|
||||
};
|
||||
|
||||
struct data {
|
||||
struct prio_queue rev_list;
|
||||
|
||||
/*
|
||||
* The number of non-COMMON commits in rev_list.
|
||||
*/
|
||||
int non_common_revs;
|
||||
};
|
||||
|
||||
static int compare(const void *a_, const void *b_, void *unused)
|
||||
{
|
||||
const struct entry *a = a_;
|
||||
const struct entry *b = b_;
|
||||
return compare_commits_by_commit_date(a->commit, b->commit, NULL);
|
||||
}
|
||||
|
||||
static struct entry *rev_list_push(struct data *data, struct commit *commit, int mark)
|
||||
{
|
||||
struct entry *entry;
|
||||
commit->object.flags |= mark | SEEN;
|
||||
|
||||
entry = xcalloc(1, sizeof(*entry));
|
||||
entry->commit = commit;
|
||||
prio_queue_put(&data->rev_list, entry);
|
||||
|
||||
if (!(mark & COMMON))
|
||||
data->non_common_revs++;
|
||||
return entry;
|
||||
}
|
||||
|
||||
static int clear_marks(const char *refname, const struct object_id *oid,
|
||||
int flag, void *cb_data)
|
||||
{
|
||||
struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0);
|
||||
|
||||
if (o && o->type == OBJ_COMMIT)
|
||||
clear_commit_marks((struct commit *)o,
|
||||
COMMON | ADVERTISED | SEEN | POPPED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark this SEEN commit and all its SEEN ancestors as COMMON.
|
||||
*/
|
||||
static void mark_common(struct data *data, struct commit *c)
|
||||
{
|
||||
struct commit_list *p;
|
||||
|
||||
if (c->object.flags & COMMON)
|
||||
return;
|
||||
c->object.flags |= COMMON;
|
||||
if (!(c->object.flags & POPPED))
|
||||
data->non_common_revs--;
|
||||
|
||||
if (!c->object.parsed)
|
||||
return;
|
||||
for (p = c->parents; p; p = p->next) {
|
||||
if (p->item->object.flags & SEEN)
|
||||
mark_common(data, p->item);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that the priority queue has an entry for to_push, and ensure that the
|
||||
* entry has the correct flags and ttl.
|
||||
*
|
||||
* This function returns 1 if an entry was found or created, and 0 otherwise
|
||||
* (because the entry for this commit had already been popped).
|
||||
*/
|
||||
static int push_parent(struct data *data, struct entry *entry,
|
||||
struct commit *to_push)
|
||||
{
|
||||
struct entry *parent_entry;
|
||||
|
||||
if (to_push->object.flags & SEEN) {
|
||||
int i;
|
||||
if (to_push->object.flags & POPPED)
|
||||
/*
|
||||
* The entry for this commit has already been popped,
|
||||
* due to clock skew. Pretend that this parent does not
|
||||
* exist.
|
||||
*/
|
||||
return 0;
|
||||
/*
|
||||
* Find the existing entry and use it.
|
||||
*/
|
||||
for (i = 0; i < data->rev_list.nr; i++) {
|
||||
parent_entry = data->rev_list.array[i].data;
|
||||
if (parent_entry->commit == to_push)
|
||||
goto parent_found;
|
||||
}
|
||||
BUG("missing parent in priority queue");
|
||||
parent_found:
|
||||
;
|
||||
} else {
|
||||
parent_entry = rev_list_push(data, to_push, 0);
|
||||
}
|
||||
|
||||
if (entry->commit->object.flags & (COMMON | ADVERTISED)) {
|
||||
mark_common(data, to_push);
|
||||
} else {
|
||||
uint16_t new_original_ttl = entry->ttl
|
||||
? entry->original_ttl : entry->original_ttl * 3 / 2 + 1;
|
||||
uint16_t new_ttl = entry->ttl
|
||||
? entry->ttl - 1 : new_original_ttl;
|
||||
if (parent_entry->original_ttl < new_original_ttl) {
|
||||
parent_entry->original_ttl = new_original_ttl;
|
||||
parent_entry->ttl = new_ttl;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static const struct object_id *get_rev(struct data *data)
|
||||
{
|
||||
struct commit *to_send = NULL;
|
||||
|
||||
while (to_send == NULL) {
|
||||
struct entry *entry;
|
||||
struct commit *commit;
|
||||
struct commit_list *p;
|
||||
int parent_pushed = 0;
|
||||
|
||||
if (data->rev_list.nr == 0 || data->non_common_revs == 0)
|
||||
return NULL;
|
||||
|
||||
entry = prio_queue_get(&data->rev_list);
|
||||
commit = entry->commit;
|
||||
commit->object.flags |= POPPED;
|
||||
if (!(commit->object.flags & COMMON))
|
||||
data->non_common_revs--;
|
||||
|
||||
if (!(commit->object.flags & COMMON) && !entry->ttl)
|
||||
to_send = commit;
|
||||
|
||||
parse_commit(commit);
|
||||
for (p = commit->parents; p; p = p->next)
|
||||
parent_pushed |= push_parent(data, entry, p->item);
|
||||
|
||||
if (!(commit->object.flags & COMMON) && !parent_pushed)
|
||||
/*
|
||||
* This commit has no parents, or all of its parents
|
||||
* have already been popped (due to clock skew), so send
|
||||
* it anyway.
|
||||
*/
|
||||
to_send = commit;
|
||||
|
||||
free(entry);
|
||||
}
|
||||
|
||||
return &to_send->object.oid;
|
||||
}
|
||||
|
||||
static void known_common(struct fetch_negotiator *n, struct commit *c)
|
||||
{
|
||||
if (c->object.flags & SEEN)
|
||||
return;
|
||||
rev_list_push(n->data, c, ADVERTISED);
|
||||
}
|
||||
|
||||
static void add_tip(struct fetch_negotiator *n, struct commit *c)
|
||||
{
|
||||
n->known_common = NULL;
|
||||
if (c->object.flags & SEEN)
|
||||
return;
|
||||
rev_list_push(n->data, c, 0);
|
||||
}
|
||||
|
||||
static const struct object_id *next(struct fetch_negotiator *n)
|
||||
{
|
||||
n->known_common = NULL;
|
||||
n->add_tip = NULL;
|
||||
return get_rev(n->data);
|
||||
}
|
||||
|
||||
static int ack(struct fetch_negotiator *n, struct commit *c)
|
||||
{
|
||||
int known_to_be_common = !!(c->object.flags & COMMON);
|
||||
if (!(c->object.flags & SEEN))
|
||||
die("received ack for commit %s not sent as 'have'\n",
|
||||
oid_to_hex(&c->object.oid));
|
||||
mark_common(n->data, c);
|
||||
return known_to_be_common;
|
||||
}
|
||||
|
||||
static void release(struct fetch_negotiator *n)
|
||||
{
|
||||
clear_prio_queue(&((struct data *)n->data)->rev_list);
|
||||
FREE_AND_NULL(n->data);
|
||||
}
|
||||
|
||||
void skipping_negotiator_init(struct fetch_negotiator *negotiator)
|
||||
{
|
||||
struct data *data;
|
||||
negotiator->known_common = known_common;
|
||||
negotiator->add_tip = add_tip;
|
||||
negotiator->next = next;
|
||||
negotiator->ack = ack;
|
||||
negotiator->release = release;
|
||||
negotiator->data = data = xcalloc(1, sizeof(*data));
|
||||
data->rev_list.compare = compare;
|
||||
|
||||
if (marked)
|
||||
for_each_ref(clear_marks, NULL);
|
||||
marked = 1;
|
||||
}
|
8
negotiator/skipping.h
Normal file
8
negotiator/skipping.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef NEGOTIATOR_SKIPPING_H
|
||||
#define NEGOTIATOR_SKIPPING_H
|
||||
|
||||
struct fetch_negotiator;
|
||||
|
||||
void skipping_negotiator_init(struct fetch_negotiator *negotiator);
|
||||
|
||||
#endif
|
179
t/t5552-skipping-fetch-negotiator.sh
Executable file
179
t/t5552-skipping-fetch-negotiator.sh
Executable file
@ -0,0 +1,179 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='test skipping fetch negotiator'
|
||||
. ./test-lib.sh
|
||||
|
||||
have_sent () {
|
||||
while test "$#" -ne 0
|
||||
do
|
||||
grep "fetch> have $(git -C client rev-parse $1)" trace
|
||||
if test $? -ne 0
|
||||
then
|
||||
echo "No have $(git -C client rev-parse $1) ($1)"
|
||||
return 1
|
||||
fi
|
||||
shift
|
||||
done
|
||||
}
|
||||
|
||||
have_not_sent () {
|
||||
while test "$#" -ne 0
|
||||
do
|
||||
grep "fetch> have $(git -C client rev-parse $1)" trace
|
||||
if test $? -eq 0
|
||||
then
|
||||
return 1
|
||||
fi
|
||||
shift
|
||||
done
|
||||
}
|
||||
|
||||
test_expect_success 'commits with no parents are sent regardless of skip distance' '
|
||||
git init server &&
|
||||
test_commit -C server to_fetch &&
|
||||
|
||||
git init client &&
|
||||
for i in $(seq 7)
|
||||
do
|
||||
test_commit -C client c$i
|
||||
done &&
|
||||
|
||||
# We send: "c7" (skip 1) "c5" (skip 2) "c2" (skip 4). After that, since
|
||||
# "c1" has no parent, it is still sent as "have" even though it would
|
||||
# normally be skipped.
|
||||
test_config -C client fetch.negotiationalgorithm skipping &&
|
||||
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
|
||||
have_sent c7 c5 c2 c1 &&
|
||||
have_not_sent c6 c4 c3
|
||||
'
|
||||
|
||||
test_expect_success 'when two skips collide, favor the larger one' '
|
||||
rm -rf server client trace &&
|
||||
git init server &&
|
||||
test_commit -C server to_fetch &&
|
||||
|
||||
git init client &&
|
||||
for i in $(seq 11)
|
||||
do
|
||||
test_commit -C client c$i
|
||||
done &&
|
||||
git -C client checkout c5 &&
|
||||
test_commit -C client c5side &&
|
||||
|
||||
# Before reaching c5, we send "c5side" (skip 1) and "c11" (skip 1) "c9"
|
||||
# (skip 2) "c6" (skip 4). The larger skip (skip 4) takes precedence, so
|
||||
# the next "have" sent will be "c1" (from "c6" skip 4) and not "c4"
|
||||
# (from "c5side" skip 1).
|
||||
test_config -C client fetch.negotiationalgorithm skipping &&
|
||||
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
|
||||
have_sent c5side c11 c9 c6 c1 &&
|
||||
have_not_sent c10 c8 c7 c5 c4 c3 c2
|
||||
'
|
||||
|
||||
test_expect_success 'use ref advertisement to filter out commits' '
|
||||
rm -rf server client trace &&
|
||||
git init server &&
|
||||
test_commit -C server c1 &&
|
||||
test_commit -C server c2 &&
|
||||
test_commit -C server c3 &&
|
||||
git -C server tag -d c1 c2 c3 &&
|
||||
|
||||
git clone server client &&
|
||||
test_commit -C client c4 &&
|
||||
test_commit -C client c5 &&
|
||||
git -C client checkout c4^^ &&
|
||||
test_commit -C client c2side &&
|
||||
|
||||
git -C server checkout --orphan anotherbranch &&
|
||||
test_commit -C server to_fetch &&
|
||||
|
||||
# The server advertising "c3" (as "refs/heads/master") means that we do
|
||||
# not need to send any ancestors of "c3", but we still need to send "c3"
|
||||
# itself.
|
||||
test_config -C client fetch.negotiationalgorithm skipping &&
|
||||
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch origin to_fetch &&
|
||||
have_sent c5 c4^ c2side &&
|
||||
have_not_sent c4 c4^^ c4^^^
|
||||
'
|
||||
|
||||
test_expect_success 'handle clock skew' '
|
||||
rm -rf server client trace &&
|
||||
git init server &&
|
||||
test_commit -C server to_fetch &&
|
||||
|
||||
git init client &&
|
||||
|
||||
# 2 regular commits
|
||||
test_tick=2000000000 &&
|
||||
test_commit -C client c1 &&
|
||||
test_commit -C client c2 &&
|
||||
|
||||
# 4 old commits
|
||||
test_tick=1000000000 &&
|
||||
git -C client checkout c1 &&
|
||||
test_commit -C client old1 &&
|
||||
test_commit -C client old2 &&
|
||||
test_commit -C client old3 &&
|
||||
test_commit -C client old4 &&
|
||||
|
||||
# "c2" and "c1" are popped first, then "old4" to "old1". "old1" would
|
||||
# normally be skipped, but is treated as a commit without a parent here
|
||||
# and sent, because (due to clock skew) its only parent has already been
|
||||
# popped off the priority queue.
|
||||
test_config -C client fetch.negotiationalgorithm skipping &&
|
||||
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
|
||||
have_sent c2 c1 old4 old2 old1 &&
|
||||
have_not_sent old3
|
||||
'
|
||||
|
||||
test_expect_success 'do not send "have" with ancestors of commits that server ACKed' '
|
||||
rm -rf server client trace &&
|
||||
git init server &&
|
||||
test_commit -C server to_fetch &&
|
||||
|
||||
git init client &&
|
||||
for i in $(seq 8)
|
||||
do
|
||||
git -C client checkout --orphan b$i &&
|
||||
test_commit -C client b$i.c0
|
||||
done &&
|
||||
for j in $(seq 19)
|
||||
do
|
||||
for i in $(seq 8)
|
||||
do
|
||||
git -C client checkout b$i &&
|
||||
test_commit -C client b$i.c$j
|
||||
done
|
||||
done &&
|
||||
|
||||
# Copy this branch over to the server and add a commit on it so that it
|
||||
# is reachable but not advertised.
|
||||
git -C server fetch --no-tags "$(pwd)/client" b1:refs/heads/b1 &&
|
||||
git -C server checkout b1 &&
|
||||
test_commit -C server commit-on-b1 &&
|
||||
|
||||
test_config -C client fetch.negotiationalgorithm skipping &&
|
||||
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" to_fetch &&
|
||||
grep " fetch" trace &&
|
||||
|
||||
# fetch-pack sends 2 requests each containing 16 "have" lines before
|
||||
# processing the first response. In these 2 requests, 4 commits from
|
||||
# each branch are sent. Just check the first branch.
|
||||
have_sent b1.c19 b1.c17 b1.c14 b1.c9 &&
|
||||
have_not_sent b1.c18 b1.c16 b1.c15 b1.c13 b1.c12 b1.c11 b1.c10 &&
|
||||
|
||||
# While fetch-pack is processing the first response, it should read that
|
||||
# the server ACKs b1.c19 and b1.c17.
|
||||
grep "fetch< ACK $(git -C client rev-parse b1.c19) common" trace &&
|
||||
grep "fetch< ACK $(git -C client rev-parse b1.c17) common" trace &&
|
||||
|
||||
# fetch-pack should thus not send any more commits in the b1 branch, but
|
||||
# should still send the others (in this test, just check b2).
|
||||
for i in $(seq 0 8)
|
||||
do
|
||||
have_not_sent b1.c$i
|
||||
done &&
|
||||
have_sent b2.c1 b2.c0
|
||||
'
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue
Block a user