Merge branch 'jt/avoid-prefetch-when-able-in-diff'

"git diff" in a partial clone learned to avoid lazy loading blob
objects in more casese when they are not needed.

* jt/avoid-prefetch-when-able-in-diff:
  diff: restrict when prefetching occurs
  diff: refactor object read
  diff: make diff_populate_filespec_options struct
  promisor-remote: accept 0 as oid_nr in function
This commit is contained in:
Junio C Hamano 2020-04-28 15:50:04 -07:00
commit 8f5dc5a4af
10 changed files with 267 additions and 71 deletions

View File

@ -1368,9 +1368,8 @@ static void fix_unresolved_deltas(struct hashfile *f)
continue;
oid_array_append(&to_fetch, &d->oid);
}
if (to_fetch.nr)
promisor_remote_get_direct(the_repository,
to_fetch.oid, to_fetch.nr);
promisor_remote_get_direct(the_repository,
to_fetch.oid, to_fetch.nr);
oid_array_clear(&to_fetch);
}

157
diff.c
View File

@ -573,7 +573,7 @@ static int fill_mmfile(struct repository *r, mmfile_t *mf,
mf->size = 0;
return 0;
}
else if (diff_populate_filespec(r, one, 0))
else if (diff_populate_filespec(r, one, NULL))
return -1;
mf->ptr = one->data;
@ -585,9 +585,13 @@ static int fill_mmfile(struct repository *r, mmfile_t *mf,
static unsigned long diff_filespec_size(struct repository *r,
struct diff_filespec *one)
{
struct diff_populate_filespec_options dpf_options = {
.check_size_only = 1,
};
if (!DIFF_FILE_VALID(one))
return 0;
diff_populate_filespec(r, one, CHECK_SIZE_ONLY);
diff_populate_filespec(r, one, &dpf_options);
return one->size;
}
@ -3020,6 +3024,9 @@ static void show_dirstat(struct diff_options *options)
struct diff_filepair *p = q->queue[i];
const char *name;
unsigned long copied, added, damage;
struct diff_populate_filespec_options dpf_options = {
.check_size_only = 1,
};
name = p->two->path ? p->two->path : p->one->path;
@ -3047,19 +3054,19 @@ static void show_dirstat(struct diff_options *options)
}
if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
diff_populate_filespec(options->repo, p->one, 0);
diff_populate_filespec(options->repo, p->two, 0);
diff_populate_filespec(options->repo, p->one, NULL);
diff_populate_filespec(options->repo, p->two, NULL);
diffcore_count_changes(options->repo,
p->one, p->two, NULL, NULL,
&copied, &added);
diff_free_filespec_data(p->one);
diff_free_filespec_data(p->two);
} else if (DIFF_FILE_VALID(p->one)) {
diff_populate_filespec(options->repo, p->one, CHECK_SIZE_ONLY);
diff_populate_filespec(options->repo, p->one, &dpf_options);
copied = added = 0;
diff_free_filespec_data(p->one);
} else if (DIFF_FILE_VALID(p->two)) {
diff_populate_filespec(options->repo, p->two, CHECK_SIZE_ONLY);
diff_populate_filespec(options->repo, p->two, &dpf_options);
copied = 0;
added = p->two->size;
diff_free_filespec_data(p->two);
@ -3339,13 +3346,17 @@ static void emit_binary_diff(struct diff_options *o,
int diff_filespec_is_binary(struct repository *r,
struct diff_filespec *one)
{
struct diff_populate_filespec_options dpf_options = {
.check_binary = 1,
};
if (one->is_binary == -1) {
diff_filespec_load_driver(one, r->index);
if (one->driver->binary != -1)
one->is_binary = one->driver->binary;
else {
if (!one->data && DIFF_FILE_VALID(one))
diff_populate_filespec(r, one, CHECK_BINARY);
diff_populate_filespec(r, one, &dpf_options);
if (one->is_binary == -1 && one->data)
one->is_binary = buffer_is_binary(one->data,
one->size);
@ -3677,8 +3688,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
}
else if (complete_rewrite) {
diff_populate_filespec(o->repo, one, 0);
diff_populate_filespec(o->repo, two, 0);
diff_populate_filespec(o->repo, one, NULL);
diff_populate_filespec(o->repo, two, NULL);
data->deleted = count_lines(one->data, one->size);
data->added = count_lines(two->data, two->size);
}
@ -3914,9 +3925,10 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
*/
int diff_populate_filespec(struct repository *r,
struct diff_filespec *s,
unsigned int flags)
const struct diff_populate_filespec_options *options)
{
int size_only = flags & CHECK_SIZE_ONLY;
int size_only = options ? options->check_size_only : 0;
int check_binary = options ? options->check_binary : 0;
int err = 0;
int conv_flags = global_conv_flags_eol;
/*
@ -3986,7 +3998,7 @@ int diff_populate_filespec(struct repository *r,
* opening the file and inspecting the contents, this
* is probably fine.
*/
if ((flags & CHECK_BINARY) &&
if (check_binary &&
s->size > big_file_threshold && s->is_binary == -1) {
s->is_binary = 1;
return 0;
@ -4011,12 +4023,30 @@ int diff_populate_filespec(struct repository *r,
}
}
else {
enum object_type type;
if (size_only || (flags & CHECK_BINARY)) {
type = oid_object_info(r, &s->oid, &s->size);
if (type < 0)
die("unable to read %s",
oid_to_hex(&s->oid));
struct object_info info = {
.sizep = &s->size
};
if (!(size_only || check_binary))
/*
* Set contentp, since there is no chance that merely
* the size is sufficient.
*/
info.contentp = &s->data;
if (options && options->missing_object_cb) {
if (!oid_object_info_extended(r, &s->oid, &info,
OBJECT_INFO_LOOKUP_REPLACE |
OBJECT_INFO_SKIP_FETCH_OBJECT))
goto object_read;
options->missing_object_cb(options->missing_object_data);
}
if (oid_object_info_extended(r, &s->oid, &info,
OBJECT_INFO_LOOKUP_REPLACE))
die("unable to read %s", oid_to_hex(&s->oid));
object_read:
if (size_only || check_binary) {
if (size_only)
return 0;
if (s->size > big_file_threshold && s->is_binary == -1) {
@ -4024,9 +4054,12 @@ int diff_populate_filespec(struct repository *r,
return 0;
}
}
s->data = repo_read_object_file(r, &s->oid, &type, &s->size);
if (!s->data)
die("unable to read %s", oid_to_hex(&s->oid));
if (!info.contentp) {
info.contentp = &s->data;
if (oid_object_info_extended(r, &s->oid, &info,
OBJECT_INFO_LOOKUP_REPLACE))
die("unable to read %s", oid_to_hex(&s->oid));
}
s->should_free = 1;
}
return 0;
@ -4144,7 +4177,7 @@ static struct diff_tempfile *prepare_temp_file(struct repository *r,
return temp;
}
else {
if (diff_populate_filespec(r, one, 0))
if (diff_populate_filespec(r, one, NULL))
die("cannot read data blob for %s", one->path);
prep_temp_blob(r->index, name, temp,
one->data, one->size,
@ -6410,9 +6443,9 @@ static int diff_filespec_is_identical(struct repository *r,
{
if (S_ISGITLINK(one->mode))
return 0;
if (diff_populate_filespec(r, one, 0))
if (diff_populate_filespec(r, one, NULL))
return 0;
if (diff_populate_filespec(r, two, 0))
if (diff_populate_filespec(r, two, NULL))
return 0;
return !memcmp(one->data, two->data, one->size);
}
@ -6420,6 +6453,12 @@ static int diff_filespec_is_identical(struct repository *r,
static int diff_filespec_check_stat_unmatch(struct repository *r,
struct diff_filepair *p)
{
struct diff_populate_filespec_options dpf_options = {
.check_size_only = 1,
.missing_object_cb = diff_queued_diff_prefetch,
.missing_object_data = r,
};
if (p->done_skip_stat_unmatch)
return p->skip_stat_unmatch_result;
@ -6442,8 +6481,8 @@ static int diff_filespec_check_stat_unmatch(struct repository *r,
!DIFF_FILE_VALID(p->two) ||
(p->one->oid_valid && p->two->oid_valid) ||
(p->one->mode != p->two->mode) ||
diff_populate_filespec(r, p->one, CHECK_SIZE_ONLY) ||
diff_populate_filespec(r, p->two, CHECK_SIZE_ONLY) ||
diff_populate_filespec(r, p->one, &dpf_options) ||
diff_populate_filespec(r, p->two, &dpf_options) ||
(p->one->size != p->two->size) ||
!diff_filespec_is_identical(r, p->one, p->two)) /* (2) */
p->skip_stat_unmatch_result = 1;
@ -6494,9 +6533,9 @@ void diffcore_fix_diff_index(void)
QSORT(q->queue, q->nr, diffnamecmp);
}
static void add_if_missing(struct repository *r,
struct oid_array *to_fetch,
const struct diff_filespec *filespec)
void diff_add_if_missing(struct repository *r,
struct oid_array *to_fetch,
const struct diff_filespec *filespec)
{
if (filespec && filespec->oid_valid &&
!S_ISGITLINK(filespec->mode) &&
@ -6505,29 +6544,47 @@ static void add_if_missing(struct repository *r,
oid_array_append(to_fetch, &filespec->oid);
}
void diff_queued_diff_prefetch(void *repository)
{
struct repository *repo = repository;
int i;
struct diff_queue_struct *q = &diff_queued_diff;
struct oid_array to_fetch = OID_ARRAY_INIT;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
diff_add_if_missing(repo, &to_fetch, p->one);
diff_add_if_missing(repo, &to_fetch, p->two);
}
/*
* NEEDSWORK: Consider deduplicating the OIDs sent.
*/
promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr);
oid_array_clear(&to_fetch);
}
void diffcore_std(struct diff_options *options)
{
if (options->repo == the_repository && has_promisor_remote()) {
/*
* Prefetch the diff pairs that are about to be flushed.
*/
int i;
struct diff_queue_struct *q = &diff_queued_diff;
struct oid_array to_fetch = OID_ARRAY_INIT;
int output_formats_to_prefetch = DIFF_FORMAT_DIFFSTAT |
DIFF_FORMAT_NUMSTAT |
DIFF_FORMAT_PATCH |
DIFF_FORMAT_SHORTSTAT |
DIFF_FORMAT_DIRSTAT;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
add_if_missing(options->repo, &to_fetch, p->one);
add_if_missing(options->repo, &to_fetch, p->two);
}
if (to_fetch.nr)
/*
* NEEDSWORK: Consider deduplicating the OIDs sent.
*/
promisor_remote_get_direct(options->repo,
to_fetch.oid, to_fetch.nr);
oid_array_clear(&to_fetch);
}
/*
* Check if the user requested a blob-data-requiring diff output and/or
* break-rewrite detection (which requires blob data). If yes, prefetch
* the diff pairs.
*
* If no prefetching occurs, diffcore_rename() will prefetch if it
* decides that it needs inexact rename detection.
*/
if (options->repo == the_repository && has_promisor_remote() &&
(options->output_format & output_formats_to_prefetch ||
options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK))
diff_queued_diff_prefetch(options->repo);
/* NOTE please keep the following in sync with diff_tree_combined() */
if (options->skip_stat_unmatch)
@ -6774,7 +6831,7 @@ size_t fill_textconv(struct repository *r,
*outbuf = "";
return 0;
}
if (diff_populate_filespec(r, df, 0))
if (diff_populate_filespec(r, df, NULL))
die("unable to read files to diff");
*outbuf = df->data;
return df->size;

View File

@ -4,6 +4,7 @@
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
#include "promisor-remote.h"
static int should_break(struct repository *r,
struct diff_filespec *src,
@ -49,6 +50,8 @@ static int should_break(struct repository *r,
unsigned long delta_size, max_size;
unsigned long src_copied, literal_added, src_removed;
struct diff_populate_filespec_options options = { 0 };
*merge_score_p = 0; /* assume no deletion --- "do not break"
* is the default.
*/
@ -62,8 +65,13 @@ static int should_break(struct repository *r,
oideq(&src->oid, &dst->oid))
return 0; /* they are the same */
if (diff_populate_filespec(r, src, 0) ||
diff_populate_filespec(r, dst, 0))
if (r == the_repository && has_promisor_remote()) {
options.missing_object_cb = diff_queued_diff_prefetch;
options.missing_object_data = r;
}
if (diff_populate_filespec(r, src, &options) ||
diff_populate_filespec(r, dst, &options))
return 0; /* error but caught downstream */
max_size = ((src->size > dst->size) ? src->size : dst->size);

View File

@ -1,4 +1,5 @@
/*
*
* Copyright (C) 2005 Junio C Hamano
*/
#include "cache.h"
@ -7,6 +8,7 @@
#include "object-store.h"
#include "hashmap.h"
#include "progress.h"
#include "promisor-remote.h"
/* Table of rename/copy destinations */
@ -128,10 +130,46 @@ struct diff_score {
short name_score;
};
struct prefetch_options {
struct repository *repo;
int skip_unmodified;
};
static void prefetch(void *prefetch_options)
{
struct prefetch_options *options = prefetch_options;
int i;
struct oid_array to_fetch = OID_ARRAY_INIT;
for (i = 0; i < rename_dst_nr; i++) {
if (rename_dst[i].pair)
/*
* The loop in diffcore_rename() will not need these
* blobs, so skip prefetching.
*/
continue; /* already found exact match */
diff_add_if_missing(options->repo, &to_fetch,
rename_dst[i].two);
}
for (i = 0; i < rename_src_nr; i++) {
if (options->skip_unmodified &&
diff_unmodified_pair(rename_src[i].p))
/*
* The loop in diffcore_rename() will not need these
* blobs, so skip prefetching.
*/
continue;
diff_add_if_missing(options->repo, &to_fetch,
rename_src[i].p->one);
}
promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr);
oid_array_clear(&to_fetch);
}
static int estimate_similarity(struct repository *r,
struct diff_filespec *src,
struct diff_filespec *dst,
int minimum_score)
int minimum_score,
int skip_unmodified)
{
/* src points at a file that existed in the original tree (or
* optionally a file in the destination tree) and dst points
@ -148,6 +186,15 @@ static int estimate_similarity(struct repository *r,
*/
unsigned long max_size, delta_size, base_size, src_copied, literal_added;
int score;
struct diff_populate_filespec_options dpf_options = {
.check_size_only = 1
};
struct prefetch_options prefetch_options = {r, skip_unmodified};
if (r == the_repository && has_promisor_remote()) {
dpf_options.missing_object_cb = prefetch;
dpf_options.missing_object_data = &prefetch_options;
}
/* We deal only with regular files. Symlink renames are handled
* only when they are exact matches --- in other words, no edits
@ -166,10 +213,10 @@ static int estimate_similarity(struct repository *r,
* say whether the size is valid or not!)
*/
if (!src->cnt_data &&
diff_populate_filespec(r, src, CHECK_SIZE_ONLY))
diff_populate_filespec(r, src, &dpf_options))
return 0;
if (!dst->cnt_data &&
diff_populate_filespec(r, dst, CHECK_SIZE_ONLY))
diff_populate_filespec(r, dst, &dpf_options))
return 0;
max_size = ((src->size > dst->size) ? src->size : dst->size);
@ -187,9 +234,11 @@ static int estimate_similarity(struct repository *r,
if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
return 0;
if (!src->cnt_data && diff_populate_filespec(r, src, 0))
dpf_options.check_size_only = 0;
if (!src->cnt_data && diff_populate_filespec(r, src, &dpf_options))
return 0;
if (!dst->cnt_data && diff_populate_filespec(r, dst, 0))
if (!dst->cnt_data && diff_populate_filespec(r, dst, &dpf_options))
return 0;
if (diffcore_count_changes(r, src, dst,
@ -261,7 +310,7 @@ static unsigned int hash_filespec(struct repository *r,
struct diff_filespec *filespec)
{
if (!filespec->oid_valid) {
if (diff_populate_filespec(r, filespec, 0))
if (diff_populate_filespec(r, filespec, NULL))
return 0;
hash_object_file(r->hash_algo, filespec->data, filespec->size,
"blob", &filespec->oid);
@ -566,7 +615,8 @@ void diffcore_rename(struct diff_options *options)
this_src.score = estimate_similarity(options->repo,
one, two,
minimum_score);
minimum_score,
skip_unmodified);
this_src.name_score = basename_same(one, two);
this_src.dst = i;
this_src.src = j;

View File

@ -65,9 +65,25 @@ void free_filespec(struct diff_filespec *);
void fill_filespec(struct diff_filespec *, const struct object_id *,
int, unsigned short);
#define CHECK_SIZE_ONLY 1
#define CHECK_BINARY 2
int diff_populate_filespec(struct repository *, struct diff_filespec *, unsigned int);
/*
* Prefetch the entries in diff_queued_diff. The parameter is a pointer to a
* struct repository.
*/
void diff_queued_diff_prefetch(void *repository);
struct diff_populate_filespec_options {
unsigned check_size_only : 1;
unsigned check_binary : 1;
/*
* If an object is missing, diff_populate_filespec() will invoke this
* callback before attempting to read that object again.
*/
void (*missing_object_cb)(void *);
void *missing_object_data;
};
int diff_populate_filespec(struct repository *, struct diff_filespec *,
const struct diff_populate_filespec_options *);
void diff_free_filespec_data(struct diff_filespec *);
void diff_free_filespec_blob(struct diff_filespec *);
int diff_filespec_is_binary(struct repository *, struct diff_filespec *);
@ -182,4 +198,12 @@ int diffcore_count_changes(struct repository *r,
unsigned long *src_copied,
unsigned long *literal_added);
/*
* If filespec contains an OID and if that object is missing from the given
* repository, add that OID to to_fetch.
*/
void diff_add_if_missing(struct repository *r,
struct oid_array *to_fetch,
const struct diff_filespec *filespec);
#endif

View File

@ -519,7 +519,7 @@ static void fill_line_ends(struct repository *r,
unsigned long *ends = NULL;
char *data = NULL;
if (diff_populate_filespec(r, spec, 0))
if (diff_populate_filespec(r, spec, NULL))
die("Cannot read blob %s", oid_to_hex(&spec->oid));
ALLOC_ARRAY(ends, size);
@ -1045,12 +1045,12 @@ static int process_diff_filepair(struct rev_info *rev,
return 0;
assert(pair->two->oid_valid);
diff_populate_filespec(rev->diffopt.repo, pair->two, 0);
diff_populate_filespec(rev->diffopt.repo, pair->two, NULL);
file_target.ptr = pair->two->data;
file_target.size = pair->two->size;
if (pair->one->oid_valid) {
diff_populate_filespec(rev->diffopt.repo, pair->one, 0);
diff_populate_filespec(rev->diffopt.repo, pair->one, NULL);
file_parent.ptr = pair->one->data;
file_parent.size = pair->one->size;
} else {

View File

@ -241,6 +241,9 @@ int promisor_remote_get_direct(struct repository *repo,
int to_free = 0;
int res = -1;
if (oid_nr == 0)
return 0;
promisor_remote_init();
for (r = promisors; r; r = r->next) {

View File

@ -20,6 +20,14 @@ struct promisor_remote {
void promisor_remote_reinit(void);
struct promisor_remote *promisor_remote_find(const char *remote_name);
int has_promisor_remote(void);
/*
* Fetches all requested objects from all promisor remotes, trying them one at
* a time until all objects are fetched. Returns 0 upon success, and non-zero
* otherwise.
*
* If oid_nr is 0, this function returns 0 (success) immediately.
*/
int promisor_remote_get_direct(struct repository *repo,
const struct object_id *oids,
int oid_nr);

View File

@ -131,4 +131,52 @@ test_expect_success 'diff with rename detection batches blobs' '
test_line_count = 1 done_lines
'
test_expect_success 'diff does not fetch anything if inexact rename detection is not needed' '
test_when_finished "rm -rf server client trace" &&
test_create_repo server &&
echo a >server/a &&
printf "b\nb\nb\nb\nb\n" >server/b &&
git -C server add a b &&
git -C server commit -m x &&
mv server/b server/c &&
git -C server add c &&
git -C server commit -a -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
# Ensure no fetches.
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --raw -M HEAD^ HEAD &&
! test_path_exists trace
'
test_expect_success 'diff --break-rewrites fetches only if necessary, and batches blobs if it does' '
test_when_finished "rm -rf server client trace" &&
test_create_repo server &&
echo a >server/a &&
printf "b\nb\nb\nb\nb\n" >server/b &&
git -C server add a b &&
git -C server commit -m x &&
printf "c\nc\nc\nc\nc\n" >server/b &&
git -C server commit -a -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
# Ensure no fetches.
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --raw -M HEAD^ HEAD &&
! test_path_exists trace &&
# But with --break-rewrites, ensure that there is exactly 1 negotiation
# by checking that there is only 1 "done" line sent. ("done" marks the
# end of negotiation.)
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --break-rewrites --raw -M HEAD^ HEAD &&
grep "git> done" trace >done_lines &&
test_line_count = 1 done_lines
'
test_done

View File

@ -423,9 +423,8 @@ static int check_updates(struct unpack_trees_options *o)
continue;
oid_array_append(&to_fetch, &ce->oid);
}
if (to_fetch.nr)
promisor_remote_get_direct(the_repository,
to_fetch.oid, to_fetch.nr);
promisor_remote_get_direct(the_repository,
to_fetch.oid, to_fetch.nr);
oid_array_clear(&to_fetch);
}
for (i = 0; i < index->cache_nr; i++) {