diff: restrict when prefetching occurs
Commit 7fbbcb21b1
("diff: batch fetching of missing blobs", 2019-04-08)
optimized "diff" by prefetching blobs in a partial clone, but there are
some cases wherein blobs do not need to be prefetched. In these cases,
any command that uses the diff machinery will unnecessarily fetch blobs.
diffcore_std() may read blobs when it calls the following functions:
(1) diffcore_skip_stat_unmatch() (controlled by the config variable
diff.autorefreshindex)
(2) diffcore_break() and diffcore_merge_broken() (for break-rewrite
detection)
(3) diffcore_rename() (for rename detection)
(4) diffcore_pickaxe() (for detecting addition/deletion of specified
string)
Instead of always prefetching blobs, teach diffcore_skip_stat_unmatch(),
diffcore_break(), and diffcore_rename() to prefetch blobs upon the first
read of a missing object. This covers (1), (2), and (3): to cover the
rest, teach diffcore_std() to prefetch if the output type is one that
includes blob data (and hence blob data will be required later anyway),
or if it knows that (4) will be run.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
c14b6f83ec
commit
95acf11a3d
51
diff.c
51
diff.c
@ -4034,10 +4034,18 @@ int diff_populate_filespec(struct repository *r,
|
|||||||
*/
|
*/
|
||||||
info.contentp = &s->data;
|
info.contentp = &s->data;
|
||||||
|
|
||||||
|
if (options && options->missing_object_cb) {
|
||||||
|
if (!oid_object_info_extended(r, &s->oid, &info,
|
||||||
|
OBJECT_INFO_LOOKUP_REPLACE |
|
||||||
|
OBJECT_INFO_SKIP_FETCH_OBJECT))
|
||||||
|
goto object_read;
|
||||||
|
options->missing_object_cb(options->missing_object_data);
|
||||||
|
}
|
||||||
if (oid_object_info_extended(r, &s->oid, &info,
|
if (oid_object_info_extended(r, &s->oid, &info,
|
||||||
OBJECT_INFO_LOOKUP_REPLACE))
|
OBJECT_INFO_LOOKUP_REPLACE))
|
||||||
die("unable to read %s", oid_to_hex(&s->oid));
|
die("unable to read %s", oid_to_hex(&s->oid));
|
||||||
|
|
||||||
|
object_read:
|
||||||
if (size_only || check_binary) {
|
if (size_only || check_binary) {
|
||||||
if (size_only)
|
if (size_only)
|
||||||
return 0;
|
return 0;
|
||||||
@ -6444,6 +6452,8 @@ static int diff_filespec_check_stat_unmatch(struct repository *r,
|
|||||||
{
|
{
|
||||||
struct diff_populate_filespec_options dpf_options = {
|
struct diff_populate_filespec_options dpf_options = {
|
||||||
.check_size_only = 1,
|
.check_size_only = 1,
|
||||||
|
.missing_object_cb = diff_queued_diff_prefetch,
|
||||||
|
.missing_object_data = r,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (p->done_skip_stat_unmatch)
|
if (p->done_skip_stat_unmatch)
|
||||||
@ -6520,7 +6530,7 @@ void diffcore_fix_diff_index(void)
|
|||||||
QSORT(q->queue, q->nr, diffnamecmp);
|
QSORT(q->queue, q->nr, diffnamecmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_if_missing(struct repository *r,
|
void diff_add_if_missing(struct repository *r,
|
||||||
struct oid_array *to_fetch,
|
struct oid_array *to_fetch,
|
||||||
const struct diff_filespec *filespec)
|
const struct diff_filespec *filespec)
|
||||||
{
|
{
|
||||||
@ -6531,28 +6541,47 @@ static void add_if_missing(struct repository *r,
|
|||||||
oid_array_append(to_fetch, &filespec->oid);
|
oid_array_append(to_fetch, &filespec->oid);
|
||||||
}
|
}
|
||||||
|
|
||||||
void diffcore_std(struct diff_options *options)
|
void diff_queued_diff_prefetch(void *repository)
|
||||||
{
|
{
|
||||||
if (options->repo == the_repository && has_promisor_remote()) {
|
struct repository *repo = repository;
|
||||||
/*
|
|
||||||
* Prefetch the diff pairs that are about to be flushed.
|
|
||||||
*/
|
|
||||||
int i;
|
int i;
|
||||||
struct diff_queue_struct *q = &diff_queued_diff;
|
struct diff_queue_struct *q = &diff_queued_diff;
|
||||||
struct oid_array to_fetch = OID_ARRAY_INIT;
|
struct oid_array to_fetch = OID_ARRAY_INIT;
|
||||||
|
|
||||||
for (i = 0; i < q->nr; i++) {
|
for (i = 0; i < q->nr; i++) {
|
||||||
struct diff_filepair *p = q->queue[i];
|
struct diff_filepair *p = q->queue[i];
|
||||||
add_if_missing(options->repo, &to_fetch, p->one);
|
diff_add_if_missing(repo, &to_fetch, p->one);
|
||||||
add_if_missing(options->repo, &to_fetch, p->two);
|
diff_add_if_missing(repo, &to_fetch, p->two);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NEEDSWORK: Consider deduplicating the OIDs sent.
|
* NEEDSWORK: Consider deduplicating the OIDs sent.
|
||||||
*/
|
*/
|
||||||
promisor_remote_get_direct(options->repo,
|
promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr);
|
||||||
to_fetch.oid, to_fetch.nr);
|
|
||||||
oid_array_clear(&to_fetch);
|
oid_array_clear(&to_fetch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void diffcore_std(struct diff_options *options)
|
||||||
|
{
|
||||||
|
int output_formats_to_prefetch = DIFF_FORMAT_DIFFSTAT |
|
||||||
|
DIFF_FORMAT_NUMSTAT |
|
||||||
|
DIFF_FORMAT_PATCH |
|
||||||
|
DIFF_FORMAT_SHORTSTAT |
|
||||||
|
DIFF_FORMAT_DIRSTAT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if the user requested a blob-data-requiring diff output and/or
|
||||||
|
* break-rewrite detection (which requires blob data). If yes, prefetch
|
||||||
|
* the diff pairs.
|
||||||
|
*
|
||||||
|
* If no prefetching occurs, diffcore_rename() will prefetch if it
|
||||||
|
* decides that it needs inexact rename detection.
|
||||||
|
*/
|
||||||
|
if (options->repo == the_repository && has_promisor_remote() &&
|
||||||
|
(options->output_format & output_formats_to_prefetch ||
|
||||||
|
options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK))
|
||||||
|
diff_queued_diff_prefetch(options->repo);
|
||||||
|
|
||||||
/* NOTE please keep the following in sync with diff_tree_combined() */
|
/* NOTE please keep the following in sync with diff_tree_combined() */
|
||||||
if (options->skip_stat_unmatch)
|
if (options->skip_stat_unmatch)
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include "cache.h"
|
#include "cache.h"
|
||||||
#include "diff.h"
|
#include "diff.h"
|
||||||
#include "diffcore.h"
|
#include "diffcore.h"
|
||||||
|
#include "promisor-remote.h"
|
||||||
|
|
||||||
static int should_break(struct repository *r,
|
static int should_break(struct repository *r,
|
||||||
struct diff_filespec *src,
|
struct diff_filespec *src,
|
||||||
@ -49,6 +50,8 @@ static int should_break(struct repository *r,
|
|||||||
unsigned long delta_size, max_size;
|
unsigned long delta_size, max_size;
|
||||||
unsigned long src_copied, literal_added, src_removed;
|
unsigned long src_copied, literal_added, src_removed;
|
||||||
|
|
||||||
|
struct diff_populate_filespec_options options = { 0 };
|
||||||
|
|
||||||
*merge_score_p = 0; /* assume no deletion --- "do not break"
|
*merge_score_p = 0; /* assume no deletion --- "do not break"
|
||||||
* is the default.
|
* is the default.
|
||||||
*/
|
*/
|
||||||
@ -62,8 +65,13 @@ static int should_break(struct repository *r,
|
|||||||
oideq(&src->oid, &dst->oid))
|
oideq(&src->oid, &dst->oid))
|
||||||
return 0; /* they are the same */
|
return 0; /* they are the same */
|
||||||
|
|
||||||
if (diff_populate_filespec(r, src, NULL) ||
|
if (r == the_repository && has_promisor_remote()) {
|
||||||
diff_populate_filespec(r, dst, NULL))
|
options.missing_object_cb = diff_queued_diff_prefetch;
|
||||||
|
options.missing_object_data = r;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (diff_populate_filespec(r, src, &options) ||
|
||||||
|
diff_populate_filespec(r, dst, &options))
|
||||||
return 0; /* error but caught downstream */
|
return 0; /* error but caught downstream */
|
||||||
|
|
||||||
max_size = ((src->size > dst->size) ? src->size : dst->size);
|
max_size = ((src->size > dst->size) ? src->size : dst->size);
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
|
*
|
||||||
* Copyright (C) 2005 Junio C Hamano
|
* Copyright (C) 2005 Junio C Hamano
|
||||||
*/
|
*/
|
||||||
#include "cache.h"
|
#include "cache.h"
|
||||||
@ -7,6 +8,7 @@
|
|||||||
#include "object-store.h"
|
#include "object-store.h"
|
||||||
#include "hashmap.h"
|
#include "hashmap.h"
|
||||||
#include "progress.h"
|
#include "progress.h"
|
||||||
|
#include "promisor-remote.h"
|
||||||
|
|
||||||
/* Table of rename/copy destinations */
|
/* Table of rename/copy destinations */
|
||||||
|
|
||||||
@ -128,10 +130,46 @@ struct diff_score {
|
|||||||
short name_score;
|
short name_score;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct prefetch_options {
|
||||||
|
struct repository *repo;
|
||||||
|
int skip_unmodified;
|
||||||
|
};
|
||||||
|
static void prefetch(void *prefetch_options)
|
||||||
|
{
|
||||||
|
struct prefetch_options *options = prefetch_options;
|
||||||
|
int i;
|
||||||
|
struct oid_array to_fetch = OID_ARRAY_INIT;
|
||||||
|
|
||||||
|
for (i = 0; i < rename_dst_nr; i++) {
|
||||||
|
if (rename_dst[i].pair)
|
||||||
|
/*
|
||||||
|
* The loop in diffcore_rename() will not need these
|
||||||
|
* blobs, so skip prefetching.
|
||||||
|
*/
|
||||||
|
continue; /* already found exact match */
|
||||||
|
diff_add_if_missing(options->repo, &to_fetch,
|
||||||
|
rename_dst[i].two);
|
||||||
|
}
|
||||||
|
for (i = 0; i < rename_src_nr; i++) {
|
||||||
|
if (options->skip_unmodified &&
|
||||||
|
diff_unmodified_pair(rename_src[i].p))
|
||||||
|
/*
|
||||||
|
* The loop in diffcore_rename() will not need these
|
||||||
|
* blobs, so skip prefetching.
|
||||||
|
*/
|
||||||
|
continue;
|
||||||
|
diff_add_if_missing(options->repo, &to_fetch,
|
||||||
|
rename_src[i].p->one);
|
||||||
|
}
|
||||||
|
promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr);
|
||||||
|
oid_array_clear(&to_fetch);
|
||||||
|
}
|
||||||
|
|
||||||
static int estimate_similarity(struct repository *r,
|
static int estimate_similarity(struct repository *r,
|
||||||
struct diff_filespec *src,
|
struct diff_filespec *src,
|
||||||
struct diff_filespec *dst,
|
struct diff_filespec *dst,
|
||||||
int minimum_score)
|
int minimum_score,
|
||||||
|
int skip_unmodified)
|
||||||
{
|
{
|
||||||
/* src points at a file that existed in the original tree (or
|
/* src points at a file that existed in the original tree (or
|
||||||
* optionally a file in the destination tree) and dst points
|
* optionally a file in the destination tree) and dst points
|
||||||
@ -151,6 +189,12 @@ static int estimate_similarity(struct repository *r,
|
|||||||
struct diff_populate_filespec_options dpf_options = {
|
struct diff_populate_filespec_options dpf_options = {
|
||||||
.check_size_only = 1
|
.check_size_only = 1
|
||||||
};
|
};
|
||||||
|
struct prefetch_options prefetch_options = {r, skip_unmodified};
|
||||||
|
|
||||||
|
if (r == the_repository && has_promisor_remote()) {
|
||||||
|
dpf_options.missing_object_cb = prefetch;
|
||||||
|
dpf_options.missing_object_data = &prefetch_options;
|
||||||
|
}
|
||||||
|
|
||||||
/* We deal only with regular files. Symlink renames are handled
|
/* We deal only with regular files. Symlink renames are handled
|
||||||
* only when they are exact matches --- in other words, no edits
|
* only when they are exact matches --- in other words, no edits
|
||||||
@ -190,9 +234,11 @@ static int estimate_similarity(struct repository *r,
|
|||||||
if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
|
if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!src->cnt_data && diff_populate_filespec(r, src, NULL))
|
dpf_options.check_size_only = 0;
|
||||||
|
|
||||||
|
if (!src->cnt_data && diff_populate_filespec(r, src, &dpf_options))
|
||||||
return 0;
|
return 0;
|
||||||
if (!dst->cnt_data && diff_populate_filespec(r, dst, NULL))
|
if (!dst->cnt_data && diff_populate_filespec(r, dst, &dpf_options))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (diffcore_count_changes(r, src, dst,
|
if (diffcore_count_changes(r, src, dst,
|
||||||
@ -569,7 +615,8 @@ void diffcore_rename(struct diff_options *options)
|
|||||||
|
|
||||||
this_src.score = estimate_similarity(options->repo,
|
this_src.score = estimate_similarity(options->repo,
|
||||||
one, two,
|
one, two,
|
||||||
minimum_score);
|
minimum_score,
|
||||||
|
skip_unmodified);
|
||||||
this_src.name_score = basename_same(one, two);
|
this_src.name_score = basename_same(one, two);
|
||||||
this_src.dst = i;
|
this_src.dst = i;
|
||||||
this_src.src = j;
|
this_src.src = j;
|
||||||
|
21
diffcore.h
21
diffcore.h
@ -65,9 +65,22 @@ void free_filespec(struct diff_filespec *);
|
|||||||
void fill_filespec(struct diff_filespec *, const struct object_id *,
|
void fill_filespec(struct diff_filespec *, const struct object_id *,
|
||||||
int, unsigned short);
|
int, unsigned short);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prefetch the entries in diff_queued_diff. The parameter is a pointer to a
|
||||||
|
* struct repository.
|
||||||
|
*/
|
||||||
|
void diff_queued_diff_prefetch(void *repository);
|
||||||
|
|
||||||
struct diff_populate_filespec_options {
|
struct diff_populate_filespec_options {
|
||||||
unsigned check_size_only : 1;
|
unsigned check_size_only : 1;
|
||||||
unsigned check_binary : 1;
|
unsigned check_binary : 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If an object is missing, diff_populate_filespec() will invoke this
|
||||||
|
* callback before attempting to read that object again.
|
||||||
|
*/
|
||||||
|
void (*missing_object_cb)(void *);
|
||||||
|
void *missing_object_data;
|
||||||
};
|
};
|
||||||
int diff_populate_filespec(struct repository *, struct diff_filespec *,
|
int diff_populate_filespec(struct repository *, struct diff_filespec *,
|
||||||
const struct diff_populate_filespec_options *);
|
const struct diff_populate_filespec_options *);
|
||||||
@ -185,4 +198,12 @@ int diffcore_count_changes(struct repository *r,
|
|||||||
unsigned long *src_copied,
|
unsigned long *src_copied,
|
||||||
unsigned long *literal_added);
|
unsigned long *literal_added);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If filespec contains an OID and if that object is missing from the given
|
||||||
|
* repository, add that OID to to_fetch.
|
||||||
|
*/
|
||||||
|
void diff_add_if_missing(struct repository *r,
|
||||||
|
struct oid_array *to_fetch,
|
||||||
|
const struct diff_filespec *filespec);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -131,4 +131,52 @@ test_expect_success 'diff with rename detection batches blobs' '
|
|||||||
test_line_count = 1 done_lines
|
test_line_count = 1 done_lines
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'diff does not fetch anything if inexact rename detection is not needed' '
|
||||||
|
test_when_finished "rm -rf server client trace" &&
|
||||||
|
|
||||||
|
test_create_repo server &&
|
||||||
|
echo a >server/a &&
|
||||||
|
printf "b\nb\nb\nb\nb\n" >server/b &&
|
||||||
|
git -C server add a b &&
|
||||||
|
git -C server commit -m x &&
|
||||||
|
mv server/b server/c &&
|
||||||
|
git -C server add c &&
|
||||||
|
git -C server commit -a -m x &&
|
||||||
|
|
||||||
|
test_config -C server uploadpack.allowfilter 1 &&
|
||||||
|
test_config -C server uploadpack.allowanysha1inwant 1 &&
|
||||||
|
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
|
||||||
|
|
||||||
|
# Ensure no fetches.
|
||||||
|
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --raw -M HEAD^ HEAD &&
|
||||||
|
! test_path_exists trace
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'diff --break-rewrites fetches only if necessary, and batches blobs if it does' '
|
||||||
|
test_when_finished "rm -rf server client trace" &&
|
||||||
|
|
||||||
|
test_create_repo server &&
|
||||||
|
echo a >server/a &&
|
||||||
|
printf "b\nb\nb\nb\nb\n" >server/b &&
|
||||||
|
git -C server add a b &&
|
||||||
|
git -C server commit -m x &&
|
||||||
|
printf "c\nc\nc\nc\nc\n" >server/b &&
|
||||||
|
git -C server commit -a -m x &&
|
||||||
|
|
||||||
|
test_config -C server uploadpack.allowfilter 1 &&
|
||||||
|
test_config -C server uploadpack.allowanysha1inwant 1 &&
|
||||||
|
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
|
||||||
|
|
||||||
|
# Ensure no fetches.
|
||||||
|
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --raw -M HEAD^ HEAD &&
|
||||||
|
! test_path_exists trace &&
|
||||||
|
|
||||||
|
# But with --break-rewrites, ensure that there is exactly 1 negotiation
|
||||||
|
# by checking that there is only 1 "done" line sent. ("done" marks the
|
||||||
|
# end of negotiation.)
|
||||||
|
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --break-rewrites --raw -M HEAD^ HEAD &&
|
||||||
|
grep "git> done" trace >done_lines &&
|
||||||
|
test_line_count = 1 done_lines
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
Loading…
Reference in New Issue
Block a user