Merge branch 'kw/patch-ids-optim'
When "git rebase" tries to compare set of changes on the updated upstream and our own branch, it computes patch-id for all of these changes and attempts to find matches. This has been optimized by lazily computing the full patch-id (which is expensive) to be compared only for changes that touch the same set of paths. * kw/patch-ids-optim: rebase: avoid computing unnecessary patch IDs patch-ids: add flag to create the diff patch id using header only data patch-ids: replace the seen indicator with a commit pointer patch-ids: stop using a hand-rolled hashmap implementation
This commit is contained in:
commit
dd610aeda6
@ -1343,7 +1343,7 @@ static void prepare_bases(struct base_tree_info *bases,
|
||||
struct object_id *patch_id;
|
||||
if (commit->util)
|
||||
continue;
|
||||
if (commit_patch_id(commit, &diffopt, sha1))
|
||||
if (commit_patch_id(commit, &diffopt, sha1, 0))
|
||||
die(_("cannot get patch id"));
|
||||
ALLOC_GROW(bases->patch_id, bases->nr_patch_id + 1, bases->alloc_patch_id);
|
||||
patch_id = bases->patch_id + bases->nr_patch_id;
|
||||
|
16
diff.c
16
diff.c
@ -4462,7 +4462,7 @@ static void patch_id_consume(void *priv, char *line, unsigned long len)
|
||||
}
|
||||
|
||||
/* returns 0 upon success, and writes result into sha1 */
|
||||
static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
|
||||
static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only)
|
||||
{
|
||||
struct diff_queue_struct *q = &diff_queued_diff;
|
||||
int i;
|
||||
@ -4497,9 +4497,6 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
|
||||
|
||||
diff_fill_sha1_info(p->one);
|
||||
diff_fill_sha1_info(p->two);
|
||||
if (fill_mmfile(&mf1, p->one) < 0 ||
|
||||
fill_mmfile(&mf2, p->two) < 0)
|
||||
return error("unable to read files to diff");
|
||||
|
||||
len1 = remove_space(p->one->path, strlen(p->one->path));
|
||||
len2 = remove_space(p->two->path, strlen(p->two->path));
|
||||
@ -4534,6 +4531,13 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
|
||||
len2, p->two->path);
|
||||
git_SHA1_Update(&ctx, buffer, len1);
|
||||
|
||||
if (diff_header_only)
|
||||
continue;
|
||||
|
||||
if (fill_mmfile(&mf1, p->one) < 0 ||
|
||||
fill_mmfile(&mf2, p->two) < 0)
|
||||
return error("unable to read files to diff");
|
||||
|
||||
if (diff_filespec_is_binary(p->one) ||
|
||||
diff_filespec_is_binary(p->two)) {
|
||||
git_SHA1_Update(&ctx, oid_to_hex(&p->one->oid),
|
||||
@ -4556,11 +4560,11 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1)
|
||||
int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only)
|
||||
{
|
||||
struct diff_queue_struct *q = &diff_queued_diff;
|
||||
int i;
|
||||
int result = diff_get_patch_id(options, sha1);
|
||||
int result = diff_get_patch_id(options, sha1, diff_header_only);
|
||||
|
||||
for (i = 0; i < q->nr; i++)
|
||||
diff_free_filepair(q->queue[i]);
|
||||
|
2
diff.h
2
diff.h
@ -342,7 +342,7 @@ extern int run_diff_files(struct rev_info *revs, unsigned int option);
|
||||
extern int run_diff_index(struct rev_info *revs, int cached);
|
||||
|
||||
extern int do_diff_cache(const unsigned char *, struct diff_options *);
|
||||
extern int diff_flush_patch_id(struct diff_options *, unsigned char *);
|
||||
extern int diff_flush_patch_id(struct diff_options *, unsigned char *, int);
|
||||
|
||||
extern int diff_result_code(struct diff_options *, int);
|
||||
|
||||
|
109
patch-ids.c
109
patch-ids.c
@ -5,7 +5,7 @@
|
||||
#include "patch-ids.h"
|
||||
|
||||
int commit_patch_id(struct commit *commit, struct diff_options *options,
|
||||
unsigned char *sha1)
|
||||
unsigned char *sha1, int diff_header_only)
|
||||
{
|
||||
if (commit->parents)
|
||||
diff_tree_sha1(commit->parents->item->object.oid.hash,
|
||||
@ -13,93 +13,86 @@ int commit_patch_id(struct commit *commit, struct diff_options *options,
|
||||
else
|
||||
diff_root_tree_sha1(commit->object.oid.hash, "", options);
|
||||
diffcore_std(options);
|
||||
return diff_flush_patch_id(options, sha1);
|
||||
return diff_flush_patch_id(options, sha1, diff_header_only);
|
||||
}
|
||||
|
||||
static const unsigned char *patch_id_access(size_t index, void *table)
|
||||
/*
|
||||
* When we cannot load the full patch-id for both commits for whatever
|
||||
* reason, the function returns -1 (i.e. return error(...)). Despite
|
||||
* the "cmp" in the name of this function, the caller only cares about
|
||||
* the return value being zero (a and b are equivalent) or non-zero (a
|
||||
* and b are different), and returning non-zero would keep both in the
|
||||
* result, even if they actually were equivalent, in order to err on
|
||||
* the side of safety. The actual value being negative does not have
|
||||
* any significance; only that it is non-zero matters.
|
||||
*/
|
||||
static int patch_id_cmp(struct patch_id *a,
|
||||
struct patch_id *b,
|
||||
struct diff_options *opt)
|
||||
{
|
||||
struct patch_id **id_table = table;
|
||||
return id_table[index]->patch_id;
|
||||
if (is_null_sha1(a->patch_id) &&
|
||||
commit_patch_id(a->commit, opt, a->patch_id, 0))
|
||||
return error("Could not get patch ID for %s",
|
||||
oid_to_hex(&a->commit->object.oid));
|
||||
if (is_null_sha1(b->patch_id) &&
|
||||
commit_patch_id(b->commit, opt, b->patch_id, 0))
|
||||
return error("Could not get patch ID for %s",
|
||||
oid_to_hex(&b->commit->object.oid));
|
||||
return hashcmp(a->patch_id, b->patch_id);
|
||||
}
|
||||
|
||||
static int patch_pos(struct patch_id **table, int nr, const unsigned char *id)
|
||||
{
|
||||
return sha1_pos(id, table, nr, patch_id_access);
|
||||
}
|
||||
|
||||
#define BUCKET_SIZE 190 /* 190 * 21 = 3990, with slop close enough to 4K */
|
||||
struct patch_id_bucket {
|
||||
struct patch_id_bucket *next;
|
||||
int nr;
|
||||
struct patch_id bucket[BUCKET_SIZE];
|
||||
};
|
||||
|
||||
int init_patch_ids(struct patch_ids *ids)
|
||||
{
|
||||
memset(ids, 0, sizeof(*ids));
|
||||
diff_setup(&ids->diffopts);
|
||||
DIFF_OPT_SET(&ids->diffopts, RECURSIVE);
|
||||
diff_setup_done(&ids->diffopts);
|
||||
hashmap_init(&ids->patches, (hashmap_cmp_fn)patch_id_cmp, 256);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int free_patch_ids(struct patch_ids *ids)
|
||||
{
|
||||
struct patch_id_bucket *next, *patches;
|
||||
|
||||
free(ids->table);
|
||||
for (patches = ids->patches; patches; patches = next) {
|
||||
next = patches->next;
|
||||
free(patches);
|
||||
}
|
||||
hashmap_free(&ids->patches, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct patch_id *add_commit(struct commit *commit,
|
||||
struct patch_ids *ids,
|
||||
int no_add)
|
||||
static int init_patch_id_entry(struct patch_id *patch,
|
||||
struct commit *commit,
|
||||
struct patch_ids *ids)
|
||||
{
|
||||
struct patch_id_bucket *bucket;
|
||||
struct patch_id *ent;
|
||||
unsigned char sha1[20];
|
||||
int pos;
|
||||
unsigned char header_only_patch_id[GIT_SHA1_RAWSZ];
|
||||
|
||||
if (commit_patch_id(commit, &ids->diffopts, sha1))
|
||||
return NULL;
|
||||
pos = patch_pos(ids->table, ids->nr, sha1);
|
||||
if (0 <= pos)
|
||||
return ids->table[pos];
|
||||
if (no_add)
|
||||
return NULL;
|
||||
patch->commit = commit;
|
||||
if (commit_patch_id(commit, &ids->diffopts, header_only_patch_id, 1))
|
||||
return -1;
|
||||
|
||||
pos = -1 - pos;
|
||||
|
||||
bucket = ids->patches;
|
||||
if (!bucket || (BUCKET_SIZE <= bucket->nr)) {
|
||||
bucket = xcalloc(1, sizeof(*bucket));
|
||||
bucket->next = ids->patches;
|
||||
ids->patches = bucket;
|
||||
}
|
||||
ent = &bucket->bucket[bucket->nr++];
|
||||
hashcpy(ent->patch_id, sha1);
|
||||
|
||||
ALLOC_GROW(ids->table, ids->nr + 1, ids->alloc);
|
||||
if (pos < ids->nr)
|
||||
memmove(ids->table + pos + 1, ids->table + pos,
|
||||
sizeof(ent) * (ids->nr - pos));
|
||||
ids->nr++;
|
||||
ids->table[pos] = ent;
|
||||
return ids->table[pos];
|
||||
hashmap_entry_init(patch, sha1hash(header_only_patch_id));
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct patch_id *has_commit_patch_id(struct commit *commit,
|
||||
struct patch_ids *ids)
|
||||
{
|
||||
return add_commit(commit, ids, 1);
|
||||
struct patch_id patch;
|
||||
|
||||
memset(&patch, 0, sizeof(patch));
|
||||
if (init_patch_id_entry(&patch, commit, ids))
|
||||
return NULL;
|
||||
|
||||
return hashmap_get(&ids->patches, &patch, &ids->diffopts);
|
||||
}
|
||||
|
||||
struct patch_id *add_commit_patch_id(struct commit *commit,
|
||||
struct patch_ids *ids)
|
||||
{
|
||||
return add_commit(commit, ids, 0);
|
||||
struct patch_id *key = xcalloc(1, sizeof(*key));
|
||||
|
||||
if (init_patch_id_entry(key, commit, ids)) {
|
||||
free(key);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hashmap_add(&ids->patches, key);
|
||||
return key;
|
||||
}
|
||||
|
11
patch-ids.h
11
patch-ids.h
@ -2,19 +2,18 @@
|
||||
#define PATCH_IDS_H
|
||||
|
||||
struct patch_id {
|
||||
unsigned char patch_id[20];
|
||||
char seen;
|
||||
struct hashmap_entry ent;
|
||||
unsigned char patch_id[GIT_SHA1_RAWSZ];
|
||||
struct commit *commit;
|
||||
};
|
||||
|
||||
struct patch_ids {
|
||||
struct hashmap patches;
|
||||
struct diff_options diffopts;
|
||||
int nr, alloc;
|
||||
struct patch_id **table;
|
||||
struct patch_id_bucket *patches;
|
||||
};
|
||||
|
||||
int commit_patch_id(struct commit *commit, struct diff_options *options,
|
||||
unsigned char *sha1);
|
||||
unsigned char *sha1, int);
|
||||
int init_patch_ids(struct patch_ids *);
|
||||
int free_patch_ids(struct patch_ids *);
|
||||
struct patch_id *add_commit_patch_id(struct commit *, struct patch_ids *);
|
||||
|
16
revision.c
16
revision.c
@ -846,7 +846,7 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs)
|
||||
*/
|
||||
if (left_first != !!(flags & SYMMETRIC_LEFT))
|
||||
continue;
|
||||
commit->util = add_commit_patch_id(commit, &ids);
|
||||
add_commit_patch_id(commit, &ids);
|
||||
}
|
||||
|
||||
/* either cherry_mark or cherry_pick are true */
|
||||
@ -873,21 +873,9 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs)
|
||||
id = has_commit_patch_id(commit, &ids);
|
||||
if (!id)
|
||||
continue;
|
||||
id->seen = 1;
|
||||
commit->object.flags |= cherry_flag;
|
||||
}
|
||||
|
||||
/* Now check the original side for seen ones */
|
||||
for (p = list; p; p = p->next) {
|
||||
struct commit *commit = p->item;
|
||||
struct patch_id *ent;
|
||||
|
||||
ent = commit->util;
|
||||
if (!ent)
|
||||
continue;
|
||||
if (ent->seen)
|
||||
commit->object.flags |= cherry_flag;
|
||||
commit->util = NULL;
|
||||
id->commit->object.flags |= cherry_flag;
|
||||
}
|
||||
|
||||
free_patch_ids(&ids);
|
||||
|
36
t/perf/p3400-rebase.sh
Normal file
36
t/perf/p3400-rebase.sh
Normal file
@ -0,0 +1,36 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='Tests rebase performance'
|
||||
. ./perf-lib.sh
|
||||
|
||||
test_perf_default_repo
|
||||
|
||||
test_expect_success 'setup' '
|
||||
git checkout -f -b base &&
|
||||
git checkout -b to-rebase &&
|
||||
git checkout -b upstream &&
|
||||
for i in $(seq 100)
|
||||
do
|
||||
# simulate huge diffs
|
||||
echo change$i >unrelated-file$i &&
|
||||
seq 1000 >>unrelated-file$i &&
|
||||
git add unrelated-file$i &&
|
||||
test_tick &&
|
||||
git commit -m commit$i unrelated-file$i &&
|
||||
echo change$i >unrelated-file$i &&
|
||||
seq 1000 | tac >>unrelated-file$i &&
|
||||
git add unrelated-file$i &&
|
||||
test_tick &&
|
||||
git commit -m commit$i-reverse unrelated-file$i ||
|
||||
break
|
||||
done &&
|
||||
git checkout to-rebase &&
|
||||
test_commit our-patch interesting-file
|
||||
'
|
||||
|
||||
test_perf 'rebase on top of a lot of unrelated changes' '
|
||||
git rebase --onto upstream HEAD^ &&
|
||||
git rebase --onto base HEAD^
|
||||
'
|
||||
|
||||
test_done
|
@ -207,4 +207,25 @@ test_expect_success '--count --left-right' '
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
# Corrupt the object store deliberately to make sure
|
||||
# the object is not even checked for its existence.
|
||||
remove_loose_object () {
|
||||
sha1="$(git rev-parse "$1")" &&
|
||||
remainder=${sha1#??} &&
|
||||
firsttwo=${sha1%$remainder} &&
|
||||
rm .git/objects/$firsttwo/$remainder
|
||||
}
|
||||
|
||||
test_expect_success '--cherry-pick avoids looking at full diffs' '
|
||||
git checkout -b shy-diff &&
|
||||
test_commit dont-look-at-me &&
|
||||
echo Hello >dont-look-at-me.t &&
|
||||
test_tick &&
|
||||
git commit -m tip dont-look-at-me.t &&
|
||||
git checkout -b mainline HEAD^ &&
|
||||
test_commit to-cherry-pick &&
|
||||
remove_loose_object shy-diff^:dont-look-at-me.t &&
|
||||
git rev-list --cherry-pick ...shy-diff
|
||||
'
|
||||
|
||||
test_done
|
||||
|
Loading…
Reference in New Issue
Block a user