Merge branch 'jt/batch-fetch-blobs-in-diff'
While running "git diff" in a lazy clone, we can upfront know which missing blobs we will need, instead of waiting for the on-demand machinery to discover them one by one. Aim to achieve better performance by batching the request for these promised blobs. * jt/batch-fetch-blobs-in-diff: diff: batch fetching of missing blobs sha1-file: support OBJECT_INFO_FOR_PREFETCH
This commit is contained in:
commit
32dc15dec1
34
diff.c
34
diff.c
@ -25,6 +25,7 @@
|
|||||||
#include "packfile.h"
|
#include "packfile.h"
|
||||||
#include "parse-options.h"
|
#include "parse-options.h"
|
||||||
#include "help.h"
|
#include "help.h"
|
||||||
|
#include "fetch-object.h"
|
||||||
|
|
||||||
#ifdef NO_FAST_WORKING_DIRECTORY
|
#ifdef NO_FAST_WORKING_DIRECTORY
|
||||||
#define FAST_WORKING_DIRECTORY 0
|
#define FAST_WORKING_DIRECTORY 0
|
||||||
@ -6477,8 +6478,41 @@ void diffcore_fix_diff_index(void)
|
|||||||
QSORT(q->queue, q->nr, diffnamecmp);
|
QSORT(q->queue, q->nr, diffnamecmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void add_if_missing(struct repository *r,
|
||||||
|
struct oid_array *to_fetch,
|
||||||
|
const struct diff_filespec *filespec)
|
||||||
|
{
|
||||||
|
if (filespec && filespec->oid_valid &&
|
||||||
|
oid_object_info_extended(r, &filespec->oid, NULL,
|
||||||
|
OBJECT_INFO_FOR_PREFETCH))
|
||||||
|
oid_array_append(to_fetch, &filespec->oid);
|
||||||
|
}
|
||||||
|
|
||||||
void diffcore_std(struct diff_options *options)
|
void diffcore_std(struct diff_options *options)
|
||||||
{
|
{
|
||||||
|
if (options->repo == the_repository &&
|
||||||
|
repository_format_partial_clone) {
|
||||||
|
/*
|
||||||
|
* Prefetch the diff pairs that are about to be flushed.
|
||||||
|
*/
|
||||||
|
int i;
|
||||||
|
struct diff_queue_struct *q = &diff_queued_diff;
|
||||||
|
struct oid_array to_fetch = OID_ARRAY_INIT;
|
||||||
|
|
||||||
|
for (i = 0; i < q->nr; i++) {
|
||||||
|
struct diff_filepair *p = q->queue[i];
|
||||||
|
add_if_missing(options->repo, &to_fetch, p->one);
|
||||||
|
add_if_missing(options->repo, &to_fetch, p->two);
|
||||||
|
}
|
||||||
|
if (to_fetch.nr)
|
||||||
|
/*
|
||||||
|
* NEEDSWORK: Consider deduplicating the OIDs sent.
|
||||||
|
*/
|
||||||
|
fetch_objects(repository_format_partial_clone,
|
||||||
|
to_fetch.oid, to_fetch.nr);
|
||||||
|
oid_array_clear(&to_fetch);
|
||||||
|
}
|
||||||
|
|
||||||
/* NOTE please keep the following in sync with diff_tree_combined() */
|
/* NOTE please keep the following in sync with diff_tree_combined() */
|
||||||
if (options->skip_stat_unmatch)
|
if (options->skip_stat_unmatch)
|
||||||
diffcore_skip_stat_unmatch(options);
|
diffcore_skip_stat_unmatch(options);
|
||||||
|
@ -280,6 +280,12 @@ struct object_info {
|
|||||||
#define OBJECT_INFO_QUICK 8
|
#define OBJECT_INFO_QUICK 8
|
||||||
/* Do not check loose object */
|
/* Do not check loose object */
|
||||||
#define OBJECT_INFO_IGNORE_LOOSE 16
|
#define OBJECT_INFO_IGNORE_LOOSE 16
|
||||||
|
/*
|
||||||
|
* Do not attempt to fetch the object if missing (even if fetch_is_missing is
|
||||||
|
* nonzero). This is meant for bulk prefetching of missing blobs in a partial
|
||||||
|
* clone. Implies OBJECT_INFO_QUICK.
|
||||||
|
*/
|
||||||
|
#define OBJECT_INFO_FOR_PREFETCH (32 + OBJECT_INFO_QUICK)
|
||||||
|
|
||||||
int oid_object_info_extended(struct repository *r,
|
int oid_object_info_extended(struct repository *r,
|
||||||
const struct object_id *,
|
const struct object_id *,
|
||||||
|
@ -1378,7 +1378,8 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid,
|
|||||||
|
|
||||||
/* Check if it is a missing object */
|
/* Check if it is a missing object */
|
||||||
if (fetch_if_missing && repository_format_partial_clone &&
|
if (fetch_if_missing && repository_format_partial_clone &&
|
||||||
!already_retried && r == the_repository) {
|
!already_retried && r == the_repository &&
|
||||||
|
!(flags & OBJECT_INFO_FOR_PREFETCH)) {
|
||||||
/*
|
/*
|
||||||
* TODO Investigate having fetch_object() return
|
* TODO Investigate having fetch_object() return
|
||||||
* TODO error/success and stopping the music here.
|
* TODO error/success and stopping the music here.
|
||||||
|
103
t/t4067-diff-partial-clone.sh
Executable file
103
t/t4067-diff-partial-clone.sh
Executable file
@ -0,0 +1,103 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='behavior of diff when reading objects in a partial clone'
|
||||||
|
|
||||||
|
. ./test-lib.sh
|
||||||
|
|
||||||
|
test_expect_success 'git show batches blobs' '
|
||||||
|
test_when_finished "rm -rf server client trace" &&
|
||||||
|
|
||||||
|
test_create_repo server &&
|
||||||
|
echo a >server/a &&
|
||||||
|
echo b >server/b &&
|
||||||
|
git -C server add a b &&
|
||||||
|
git -C server commit -m x &&
|
||||||
|
|
||||||
|
test_config -C server uploadpack.allowfilter 1 &&
|
||||||
|
test_config -C server uploadpack.allowanysha1inwant 1 &&
|
||||||
|
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
|
||||||
|
|
||||||
|
# Ensure that there is exactly 1 negotiation by checking that there is
|
||||||
|
# only 1 "done" line sent. ("done" marks the end of negotiation.)
|
||||||
|
GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
|
||||||
|
grep "git> done" trace >done_lines &&
|
||||||
|
test_line_count = 1 done_lines
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'diff batches blobs' '
|
||||||
|
test_when_finished "rm -rf server client trace" &&
|
||||||
|
|
||||||
|
test_create_repo server &&
|
||||||
|
echo a >server/a &&
|
||||||
|
echo b >server/b &&
|
||||||
|
git -C server add a b &&
|
||||||
|
git -C server commit -m x &&
|
||||||
|
echo c >server/c &&
|
||||||
|
echo d >server/d &&
|
||||||
|
git -C server add c d &&
|
||||||
|
git -C server commit -m x &&
|
||||||
|
|
||||||
|
test_config -C server uploadpack.allowfilter 1 &&
|
||||||
|
test_config -C server uploadpack.allowanysha1inwant 1 &&
|
||||||
|
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
|
||||||
|
|
||||||
|
# Ensure that there is exactly 1 negotiation by checking that there is
|
||||||
|
# only 1 "done" line sent. ("done" marks the end of negotiation.)
|
||||||
|
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
|
||||||
|
grep "git> done" trace >done_lines &&
|
||||||
|
test_line_count = 1 done_lines
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'diff skips same-OID blobs' '
|
||||||
|
test_when_finished "rm -rf server client trace" &&
|
||||||
|
|
||||||
|
test_create_repo server &&
|
||||||
|
echo a >server/a &&
|
||||||
|
echo b >server/b &&
|
||||||
|
git -C server add a b &&
|
||||||
|
git -C server commit -m x &&
|
||||||
|
echo another-a >server/a &&
|
||||||
|
git -C server add a &&
|
||||||
|
git -C server commit -m x &&
|
||||||
|
|
||||||
|
test_config -C server uploadpack.allowfilter 1 &&
|
||||||
|
test_config -C server uploadpack.allowanysha1inwant 1 &&
|
||||||
|
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
|
||||||
|
|
||||||
|
echo a | git hash-object --stdin >hash-old-a &&
|
||||||
|
echo another-a | git hash-object --stdin >hash-new-a &&
|
||||||
|
echo b | git hash-object --stdin >hash-b &&
|
||||||
|
|
||||||
|
# Ensure that only a and another-a are fetched.
|
||||||
|
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
|
||||||
|
grep "want $(cat hash-old-a)" trace &&
|
||||||
|
grep "want $(cat hash-new-a)" trace &&
|
||||||
|
! grep "want $(cat hash-b)" trace
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'diff with rename detection batches blobs' '
|
||||||
|
test_when_finished "rm -rf server client trace" &&
|
||||||
|
|
||||||
|
test_create_repo server &&
|
||||||
|
echo a >server/a &&
|
||||||
|
printf "b\nb\nb\nb\nb\n" >server/b &&
|
||||||
|
git -C server add a b &&
|
||||||
|
git -C server commit -m x &&
|
||||||
|
rm server/b &&
|
||||||
|
printf "b\nb\nb\nb\nbX\n" >server/c &&
|
||||||
|
git -C server add c &&
|
||||||
|
git -C server commit -a -m x &&
|
||||||
|
|
||||||
|
test_config -C server uploadpack.allowfilter 1 &&
|
||||||
|
test_config -C server uploadpack.allowanysha1inwant 1 &&
|
||||||
|
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
|
||||||
|
|
||||||
|
# Ensure that there is exactly 1 negotiation by checking that there is
|
||||||
|
# only 1 "done" line sent. ("done" marks the end of negotiation.)
|
||||||
|
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
|
||||||
|
grep "similarity index" out &&
|
||||||
|
grep "git> done" trace >done_lines &&
|
||||||
|
test_line_count = 1 done_lines
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
@ -406,20 +406,21 @@ static int check_updates(struct unpack_trees_options *o)
|
|||||||
* below.
|
* below.
|
||||||
*/
|
*/
|
||||||
struct oid_array to_fetch = OID_ARRAY_INIT;
|
struct oid_array to_fetch = OID_ARRAY_INIT;
|
||||||
int fetch_if_missing_store = fetch_if_missing;
|
|
||||||
fetch_if_missing = 0;
|
|
||||||
for (i = 0; i < index->cache_nr; i++) {
|
for (i = 0; i < index->cache_nr; i++) {
|
||||||
struct cache_entry *ce = index->cache[i];
|
struct cache_entry *ce = index->cache[i];
|
||||||
if ((ce->ce_flags & CE_UPDATE) &&
|
|
||||||
!S_ISGITLINK(ce->ce_mode)) {
|
if (!(ce->ce_flags & CE_UPDATE) ||
|
||||||
if (!has_object_file(&ce->oid))
|
S_ISGITLINK(ce->ce_mode))
|
||||||
oid_array_append(&to_fetch, &ce->oid);
|
continue;
|
||||||
}
|
if (!oid_object_info_extended(the_repository, &ce->oid,
|
||||||
|
NULL,
|
||||||
|
OBJECT_INFO_FOR_PREFETCH))
|
||||||
|
continue;
|
||||||
|
oid_array_append(&to_fetch, &ce->oid);
|
||||||
}
|
}
|
||||||
if (to_fetch.nr)
|
if (to_fetch.nr)
|
||||||
fetch_objects(repository_format_partial_clone,
|
fetch_objects(repository_format_partial_clone,
|
||||||
to_fetch.oid, to_fetch.nr);
|
to_fetch.oid, to_fetch.nr);
|
||||||
fetch_if_missing = fetch_if_missing_store;
|
|
||||||
oid_array_clear(&to_fetch);
|
oid_array_clear(&to_fetch);
|
||||||
}
|
}
|
||||||
for (i = 0; i < index->cache_nr; i++) {
|
for (i = 0; i < index->cache_nr; i++) {
|
||||||
|
Loading…
Reference in New Issue
Block a user