Merge branch 'kw/patch-ids-optim'

When "git rebase" tries to compare set of changes on the updated
upstream and our own branch, it computes patch-id for all of these
changes and attempts to find matches. This has been optimized by
lazily computing the full patch-id (which is expensive) to be
compared only for changes that touch the same set of paths.

* kw/patch-ids-optim:
  rebase: avoid computing unnecessary patch IDs
  patch-ids: add flag to create the diff patch id using header only data
  patch-ids: replace the seen indicator with a commit pointer
  patch-ids: stop using a hand-rolled hashmap implementation
This commit is contained in:
Junio C Hamano 2016-08-12 09:47:39 -07:00
commit dd610aeda6
8 changed files with 128 additions and 87 deletions

View File

@ -1343,7 +1343,7 @@ static void prepare_bases(struct base_tree_info *bases,
struct object_id *patch_id;
if (commit->util)
continue;
if (commit_patch_id(commit, &diffopt, sha1))
if (commit_patch_id(commit, &diffopt, sha1, 0))
die(_("cannot get patch id"));
ALLOC_GROW(bases->patch_id, bases->nr_patch_id + 1, bases->alloc_patch_id);
patch_id = bases->patch_id + bases->nr_patch_id;

16
diff.c
View File

@ -4462,7 +4462,7 @@ static void patch_id_consume(void *priv, char *line, unsigned long len)
}
/* returns 0 upon success, and writes result into sha1 */
static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only)
{
struct diff_queue_struct *q = &diff_queued_diff;
int i;
@ -4497,9 +4497,6 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
diff_fill_sha1_info(p->one);
diff_fill_sha1_info(p->two);
if (fill_mmfile(&mf1, p->one) < 0 ||
fill_mmfile(&mf2, p->two) < 0)
return error("unable to read files to diff");
len1 = remove_space(p->one->path, strlen(p->one->path));
len2 = remove_space(p->two->path, strlen(p->two->path));
@ -4534,6 +4531,13 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
len2, p->two->path);
git_SHA1_Update(&ctx, buffer, len1);
if (diff_header_only)
continue;
if (fill_mmfile(&mf1, p->one) < 0 ||
fill_mmfile(&mf2, p->two) < 0)
return error("unable to read files to diff");
if (diff_filespec_is_binary(p->one) ||
diff_filespec_is_binary(p->two)) {
git_SHA1_Update(&ctx, oid_to_hex(&p->one->oid),
@ -4556,11 +4560,11 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
return 0;
}
int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1)
int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only)
{
struct diff_queue_struct *q = &diff_queued_diff;
int i;
int result = diff_get_patch_id(options, sha1);
int result = diff_get_patch_id(options, sha1, diff_header_only);
for (i = 0; i < q->nr; i++)
diff_free_filepair(q->queue[i]);

2
diff.h
View File

@ -342,7 +342,7 @@ extern int run_diff_files(struct rev_info *revs, unsigned int option);
extern int run_diff_index(struct rev_info *revs, int cached);
extern int do_diff_cache(const unsigned char *, struct diff_options *);
extern int diff_flush_patch_id(struct diff_options *, unsigned char *);
extern int diff_flush_patch_id(struct diff_options *, unsigned char *, int);
extern int diff_result_code(struct diff_options *, int);

View File

@ -5,7 +5,7 @@
#include "patch-ids.h"
int commit_patch_id(struct commit *commit, struct diff_options *options,
unsigned char *sha1)
unsigned char *sha1, int diff_header_only)
{
if (commit->parents)
diff_tree_sha1(commit->parents->item->object.oid.hash,
@ -13,93 +13,86 @@ int commit_patch_id(struct commit *commit, struct diff_options *options,
else
diff_root_tree_sha1(commit->object.oid.hash, "", options);
diffcore_std(options);
return diff_flush_patch_id(options, sha1);
return diff_flush_patch_id(options, sha1, diff_header_only);
}
static const unsigned char *patch_id_access(size_t index, void *table)
/*
* When we cannot load the full patch-id for both commits for whatever
* reason, the function returns -1 (i.e. return error(...)). Despite
* the "cmp" in the name of this function, the caller only cares about
* the return value being zero (a and b are equivalent) or non-zero (a
* and b are different), and returning non-zero would keep both in the
* result, even if they actually were equivalent, in order to err on
* the side of safety. The actual value being negative does not have
* any significance; only that it is non-zero matters.
*/
static int patch_id_cmp(struct patch_id *a,
struct patch_id *b,
struct diff_options *opt)
{
struct patch_id **id_table = table;
return id_table[index]->patch_id;
if (is_null_sha1(a->patch_id) &&
commit_patch_id(a->commit, opt, a->patch_id, 0))
return error("Could not get patch ID for %s",
oid_to_hex(&a->commit->object.oid));
if (is_null_sha1(b->patch_id) &&
commit_patch_id(b->commit, opt, b->patch_id, 0))
return error("Could not get patch ID for %s",
oid_to_hex(&b->commit->object.oid));
return hashcmp(a->patch_id, b->patch_id);
}
static int patch_pos(struct patch_id **table, int nr, const unsigned char *id)
{
return sha1_pos(id, table, nr, patch_id_access);
}
#define BUCKET_SIZE 190 /* 190 * 21 = 3990, with slop close enough to 4K */
struct patch_id_bucket {
struct patch_id_bucket *next;
int nr;
struct patch_id bucket[BUCKET_SIZE];
};
int init_patch_ids(struct patch_ids *ids)
{
memset(ids, 0, sizeof(*ids));
diff_setup(&ids->diffopts);
DIFF_OPT_SET(&ids->diffopts, RECURSIVE);
diff_setup_done(&ids->diffopts);
hashmap_init(&ids->patches, (hashmap_cmp_fn)patch_id_cmp, 256);
return 0;
}
int free_patch_ids(struct patch_ids *ids)
{
struct patch_id_bucket *next, *patches;
free(ids->table);
for (patches = ids->patches; patches; patches = next) {
next = patches->next;
free(patches);
}
hashmap_free(&ids->patches, 1);
return 0;
}
static struct patch_id *add_commit(struct commit *commit,
struct patch_ids *ids,
int no_add)
static int init_patch_id_entry(struct patch_id *patch,
struct commit *commit,
struct patch_ids *ids)
{
struct patch_id_bucket *bucket;
struct patch_id *ent;
unsigned char sha1[20];
int pos;
unsigned char header_only_patch_id[GIT_SHA1_RAWSZ];
if (commit_patch_id(commit, &ids->diffopts, sha1))
return NULL;
pos = patch_pos(ids->table, ids->nr, sha1);
if (0 <= pos)
return ids->table[pos];
if (no_add)
return NULL;
patch->commit = commit;
if (commit_patch_id(commit, &ids->diffopts, header_only_patch_id, 1))
return -1;
pos = -1 - pos;
bucket = ids->patches;
if (!bucket || (BUCKET_SIZE <= bucket->nr)) {
bucket = xcalloc(1, sizeof(*bucket));
bucket->next = ids->patches;
ids->patches = bucket;
}
ent = &bucket->bucket[bucket->nr++];
hashcpy(ent->patch_id, sha1);
ALLOC_GROW(ids->table, ids->nr + 1, ids->alloc);
if (pos < ids->nr)
memmove(ids->table + pos + 1, ids->table + pos,
sizeof(ent) * (ids->nr - pos));
ids->nr++;
ids->table[pos] = ent;
return ids->table[pos];
hashmap_entry_init(patch, sha1hash(header_only_patch_id));
return 0;
}
struct patch_id *has_commit_patch_id(struct commit *commit,
struct patch_ids *ids)
{
return add_commit(commit, ids, 1);
struct patch_id patch;
memset(&patch, 0, sizeof(patch));
if (init_patch_id_entry(&patch, commit, ids))
return NULL;
return hashmap_get(&ids->patches, &patch, &ids->diffopts);
}
struct patch_id *add_commit_patch_id(struct commit *commit,
struct patch_ids *ids)
{
return add_commit(commit, ids, 0);
struct patch_id *key = xcalloc(1, sizeof(*key));
if (init_patch_id_entry(key, commit, ids)) {
free(key);
return NULL;
}
hashmap_add(&ids->patches, key);
return key;
}

View File

@ -2,19 +2,18 @@
#define PATCH_IDS_H
struct patch_id {
unsigned char patch_id[20];
char seen;
struct hashmap_entry ent;
unsigned char patch_id[GIT_SHA1_RAWSZ];
struct commit *commit;
};
struct patch_ids {
struct hashmap patches;
struct diff_options diffopts;
int nr, alloc;
struct patch_id **table;
struct patch_id_bucket *patches;
};
int commit_patch_id(struct commit *commit, struct diff_options *options,
unsigned char *sha1);
unsigned char *sha1, int);
int init_patch_ids(struct patch_ids *);
int free_patch_ids(struct patch_ids *);
struct patch_id *add_commit_patch_id(struct commit *, struct patch_ids *);

View File

@ -846,7 +846,7 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs)
*/
if (left_first != !!(flags & SYMMETRIC_LEFT))
continue;
commit->util = add_commit_patch_id(commit, &ids);
add_commit_patch_id(commit, &ids);
}
/* either cherry_mark or cherry_pick are true */
@ -873,21 +873,9 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs)
id = has_commit_patch_id(commit, &ids);
if (!id)
continue;
id->seen = 1;
commit->object.flags |= cherry_flag;
}
/* Now check the original side for seen ones */
for (p = list; p; p = p->next) {
struct commit *commit = p->item;
struct patch_id *ent;
ent = commit->util;
if (!ent)
continue;
if (ent->seen)
commit->object.flags |= cherry_flag;
commit->util = NULL;
id->commit->object.flags |= cherry_flag;
}
free_patch_ids(&ids);

36
t/perf/p3400-rebase.sh Normal file
View File

@ -0,0 +1,36 @@
#!/bin/sh
test_description='Tests rebase performance'
. ./perf-lib.sh
test_perf_default_repo
test_expect_success 'setup' '
git checkout -f -b base &&
git checkout -b to-rebase &&
git checkout -b upstream &&
for i in $(seq 100)
do
# simulate huge diffs
echo change$i >unrelated-file$i &&
seq 1000 >>unrelated-file$i &&
git add unrelated-file$i &&
test_tick &&
git commit -m commit$i unrelated-file$i &&
echo change$i >unrelated-file$i &&
seq 1000 | tac >>unrelated-file$i &&
git add unrelated-file$i &&
test_tick &&
git commit -m commit$i-reverse unrelated-file$i ||
break
done &&
git checkout to-rebase &&
test_commit our-patch interesting-file
'
test_perf 'rebase on top of a lot of unrelated changes' '
git rebase --onto upstream HEAD^ &&
git rebase --onto base HEAD^
'
test_done

View File

@ -207,4 +207,25 @@ test_expect_success '--count --left-right' '
test_cmp expect actual
'
# Corrupt the object store deliberately to make sure
# the object is not even checked for its existence.
remove_loose_object () {
sha1="$(git rev-parse "$1")" &&
remainder=${sha1#??} &&
firsttwo=${sha1%$remainder} &&
rm .git/objects/$firsttwo/$remainder
}
test_expect_success '--cherry-pick avoids looking at full diffs' '
git checkout -b shy-diff &&
test_commit dont-look-at-me &&
echo Hello >dont-look-at-me.t &&
test_tick &&
git commit -m tip dont-look-at-me.t &&
git checkout -b mainline HEAD^ &&
test_commit to-cherry-pick &&
remove_loose_object shy-diff^:dont-look-at-me.t &&
git rev-list --cherry-pick ...shy-diff
'
test_done