Merge branch 'tb/pack-bitmap'

Various improvements to the codepath that writes out pack bitmaps.

* tb/pack-bitmap: (24 commits)
  pack-bitmap-write: better reuse bitmaps
  pack-bitmap-write: relax unique revwalk condition
  pack-bitmap-write: use existing bitmaps
  pack-bitmap: factor out 'add_commit_to_bitmap()'
  pack-bitmap: factor out 'bitmap_for_commit()'
  pack-bitmap-write: ignore BITMAP_FLAG_REUSE
  pack-bitmap-write: build fewer intermediate bitmaps
  pack-bitmap.c: check reads more aggressively when loading
  pack-bitmap-write: rename children to reverse_edges
  t5310: add branch-based checks
  commit: implement commit_list_contains()
  bitmap: implement bitmap_is_subset()
  pack-bitmap-write: fill bitmap with commit history
  pack-bitmap-write: pass ownership of intermediate bitmaps
  pack-bitmap-write: reimplement bitmap writing
  ewah: add bitmap_dup() function
  ewah: implement bitmap_or()
  ewah: make bitmap growth less aggressive
  ewah: factor out bitmap growth
  rev-list: die when --test-bitmap detects a mismatch
  ...
This commit is contained in:
Junio C Hamano 2021-01-06 23:33:43 -08:00
commit c256631065
10 changed files with 583 additions and 301 deletions

View File

@ -1104,7 +1104,6 @@ static void write_pack_file(void)
stop_progress(&progress_state);
bitmap_writer_show_progress(progress);
bitmap_writer_reuse_bitmaps(&to_pack);
bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
bitmap_writer_build(&to_pack);
bitmap_writer_finish(written_list, nr_written,

View File

@ -544,6 +544,17 @@ struct commit_list *commit_list_insert(struct commit *item, struct commit_list *
return new_list;
}
int commit_list_contains(struct commit *item, struct commit_list *list)
{
while (list) {
if (list->item == item)
return 1;
list = list->next;
}
return 0;
}
unsigned commit_list_count(const struct commit_list *l)
{
unsigned c = 0;

View File

@ -167,6 +167,8 @@ int find_commit_subject(const char *commit_buffer, const char **subject);
struct commit_list *commit_list_insert(struct commit *item,
struct commit_list **list);
int commit_list_contains(struct commit *item,
struct commit_list *list);
struct commit_list **commit_list_append(struct commit *commit,
struct commit_list **next);
unsigned commit_list_count(const struct commit_list *l);

View File

@ -35,18 +35,26 @@ struct bitmap *bitmap_new(void)
return bitmap_word_alloc(32);
}
struct bitmap *bitmap_dup(const struct bitmap *src)
{
struct bitmap *dst = bitmap_word_alloc(src->word_alloc);
COPY_ARRAY(dst->words, src->words, src->word_alloc);
return dst;
}
static void bitmap_grow(struct bitmap *self, size_t word_alloc)
{
size_t old_size = self->word_alloc;
ALLOC_GROW(self->words, word_alloc, self->word_alloc);
memset(self->words + old_size, 0x0,
(self->word_alloc - old_size) * sizeof(eword_t));
}
void bitmap_set(struct bitmap *self, size_t pos)
{
size_t block = EWAH_BLOCK(pos);
if (block >= self->word_alloc) {
size_t old_size = self->word_alloc;
self->word_alloc = block ? block * 2 : 1;
REALLOC_ARRAY(self->words, self->word_alloc);
memset(self->words + old_size, 0x0,
(self->word_alloc - old_size) * sizeof(eword_t));
}
bitmap_grow(self, block + 1);
self->words[block] |= EWAH_MASK(pos);
}
@ -121,6 +129,15 @@ void bitmap_and_not(struct bitmap *self, struct bitmap *other)
self->words[i] &= ~other->words[i];
}
void bitmap_or(struct bitmap *self, const struct bitmap *other)
{
size_t i;
bitmap_grow(self, other->word_alloc);
for (i = 0; i < other->word_alloc; i++)
self->words[i] |= other->words[i];
}
void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other)
{
size_t original_size = self->word_alloc;
@ -178,6 +195,27 @@ int bitmap_equals(struct bitmap *self, struct bitmap *other)
return 1;
}
int bitmap_is_subset(struct bitmap *self, struct bitmap *other)
{
size_t common_size, i;
if (self->word_alloc < other->word_alloc)
common_size = self->word_alloc;
else {
common_size = other->word_alloc;
for (i = common_size; i < self->word_alloc; i++) {
if (self->words[i])
return 1;
}
}
for (i = 0; i < common_size; i++) {
if (self->words[i] & ~other->words[i])
return 1;
}
return 0;
}
void bitmap_reset(struct bitmap *bitmap)
{
memset(bitmap->words, 0x0, bitmap->word_alloc * sizeof(eword_t));

View File

@ -19,6 +19,7 @@
#include "git-compat-util.h"
#include "ewok.h"
#include "ewok_rlw.h"
#include "cache.h"
static inline size_t min_size(size_t a, size_t b)
{
@ -33,20 +34,13 @@ static inline size_t max_size(size_t a, size_t b)
static inline void buffer_grow(struct ewah_bitmap *self, size_t new_size)
{
size_t rlw_offset = (uint8_t *)self->rlw - (uint8_t *)self->buffer;
if (self->alloc_size >= new_size)
return;
self->alloc_size = new_size;
REALLOC_ARRAY(self->buffer, self->alloc_size);
ALLOC_GROW(self->buffer, new_size, self->alloc_size);
self->rlw = self->buffer + (rlw_offset / sizeof(eword_t));
}
static inline void buffer_push(struct ewah_bitmap *self, eword_t value)
{
if (self->buffer_size + 1 >= self->alloc_size)
buffer_grow(self, self->buffer_size * 3 / 2);
buffer_grow(self, self->buffer_size + 1);
self->buffer[self->buffer_size++] = value;
}
@ -137,8 +131,7 @@ void ewah_add_dirty_words(
rlw_set_literal_words(self->rlw, literals + can_add);
if (self->buffer_size + can_add >= self->alloc_size)
buffer_grow(self, (self->buffer_size + can_add) * 3 / 2);
buffer_grow(self, self->buffer_size + can_add);
if (negate) {
size_t i;

View File

@ -173,13 +173,14 @@ struct bitmap {
struct bitmap *bitmap_new(void);
struct bitmap *bitmap_word_alloc(size_t word_alloc);
struct bitmap *bitmap_dup(const struct bitmap *src);
void bitmap_set(struct bitmap *self, size_t pos);
void bitmap_unset(struct bitmap *self, size_t pos);
int bitmap_get(struct bitmap *self, size_t pos);
void bitmap_reset(struct bitmap *self);
void bitmap_free(struct bitmap *self);
int bitmap_equals(struct bitmap *self, struct bitmap *other);
int bitmap_is_subset(struct bitmap *self, struct bitmap *super);
int bitmap_is_subset(struct bitmap *self, struct bitmap *other);
struct ewah_bitmap * bitmap_to_ewah(struct bitmap *bitmap);
struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah);

View File

@ -12,6 +12,7 @@
#include "sha1-lookup.h"
#include "pack-objects.h"
#include "commit-reach.h"
#include "prio-queue.h"
struct bitmapped_commit {
struct commit *commit;
@ -29,7 +30,6 @@ struct bitmap_writer {
struct ewah_bitmap *tags;
kh_oid_map_t *bitmaps;
kh_oid_map_t *reused;
struct packing_data *to_pack;
struct bitmapped_commit *selected;
@ -110,10 +110,8 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack,
/**
* Compute the actual bitmaps
*/
static struct object **seen_objects;
static unsigned int seen_objects_nr, seen_objects_alloc;
static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitmap *reused)
static inline void push_bitmapped_commit(struct commit *commit)
{
if (writer.selected_nr >= writer.selected_alloc) {
writer.selected_alloc = (writer.selected_alloc + 32) * 2;
@ -121,27 +119,12 @@ static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitm
}
writer.selected[writer.selected_nr].commit = commit;
writer.selected[writer.selected_nr].bitmap = reused;
writer.selected[writer.selected_nr].bitmap = NULL;
writer.selected[writer.selected_nr].flags = 0;
writer.selected_nr++;
}
static inline void mark_as_seen(struct object *object)
{
ALLOC_GROW(seen_objects, seen_objects_nr + 1, seen_objects_alloc);
seen_objects[seen_objects_nr++] = object;
}
static inline void reset_all_seen(void)
{
unsigned int i;
for (i = 0; i < seen_objects_nr; ++i) {
seen_objects[i]->flags &= ~(SEEN | ADDED | SHOWN);
}
seen_objects_nr = 0;
}
static uint32_t find_object_pos(const struct object_id *oid)
{
struct object_entry *entry = packlist_find(writer.to_pack, oid);
@ -154,60 +137,6 @@ static uint32_t find_object_pos(const struct object_id *oid)
return oe_in_pack_pos(writer.to_pack, entry);
}
static void show_object(struct object *object, const char *name, void *data)
{
struct bitmap *base = data;
bitmap_set(base, find_object_pos(&object->oid));
mark_as_seen(object);
}
static void show_commit(struct commit *commit, void *data)
{
mark_as_seen((struct object *)commit);
}
static int
add_to_include_set(struct bitmap *base, struct commit *commit)
{
khiter_t hash_pos;
uint32_t bitmap_pos = find_object_pos(&commit->object.oid);
if (bitmap_get(base, bitmap_pos))
return 0;
hash_pos = kh_get_oid_map(writer.bitmaps, commit->object.oid);
if (hash_pos < kh_end(writer.bitmaps)) {
struct bitmapped_commit *bc = kh_value(writer.bitmaps, hash_pos);
bitmap_or_ewah(base, bc->bitmap);
return 0;
}
bitmap_set(base, bitmap_pos);
return 1;
}
static int
should_include(struct commit *commit, void *_data)
{
struct bitmap *base = _data;
if (!add_to_include_set(base, commit)) {
struct commit_list *parent = commit->parents;
mark_as_seen((struct object *)commit);
while (parent) {
parent->item->object.flags |= SEEN;
mark_as_seen((struct object *)parent->item);
parent = parent->next;
}
return 0;
}
return 1;
}
static void compute_xor_offsets(void)
{
static const int MAX_XOR_OFFSET_SEARCH = 10;
@ -248,79 +177,326 @@ static void compute_xor_offsets(void)
}
}
struct bb_commit {
struct commit_list *reverse_edges;
struct bitmap *commit_mask;
struct bitmap *bitmap;
unsigned selected:1,
maximal:1;
unsigned idx; /* within selected array */
};
define_commit_slab(bb_data, struct bb_commit);
struct bitmap_builder {
struct bb_data data;
struct commit **commits;
size_t commits_nr, commits_alloc;
};
static void bitmap_builder_init(struct bitmap_builder *bb,
struct bitmap_writer *writer,
struct bitmap_index *old_bitmap)
{
struct rev_info revs;
struct commit *commit;
struct commit_list *reusable = NULL;
struct commit_list *r;
unsigned int i, num_maximal = 0;
memset(bb, 0, sizeof(*bb));
init_bb_data(&bb->data);
reset_revision_walk();
repo_init_revisions(writer->to_pack->repo, &revs, NULL);
revs.topo_order = 1;
revs.first_parent_only = 1;
for (i = 0; i < writer->selected_nr; i++) {
struct commit *c = writer->selected[i].commit;
struct bb_commit *ent = bb_data_at(&bb->data, c);
ent->selected = 1;
ent->maximal = 1;
ent->idx = i;
ent->commit_mask = bitmap_new();
bitmap_set(ent->commit_mask, i);
add_pending_object(&revs, &c->object, "");
}
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
while ((commit = get_revision(&revs))) {
struct commit_list *p = commit->parents;
struct bb_commit *c_ent;
parse_commit_or_die(commit);
c_ent = bb_data_at(&bb->data, commit);
/*
* If there is no commit_mask, there is no reason to iterate
* over this commit; it is not selected (if it were, it would
* not have a blank commit mask) and all its children have
* existing bitmaps (see the comment starting with "This commit
* has an existing bitmap" below), so it does not contribute
* anything to the final bitmap file or its descendants.
*/
if (!c_ent->commit_mask)
continue;
if (old_bitmap && bitmap_for_commit(old_bitmap, commit)) {
/*
* This commit has an existing bitmap, so we can
* get its bits immediately without an object
* walk. That is, it is reusable as-is and there is no
* need to continue walking beyond it.
*
* Mark it as such and add it to bb->commits separately
* to avoid allocating a position in the commit mask.
*/
commit_list_insert(commit, &reusable);
goto next;
}
if (c_ent->maximal) {
num_maximal++;
ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc);
bb->commits[bb->commits_nr++] = commit;
}
if (p) {
struct bb_commit *p_ent = bb_data_at(&bb->data, p->item);
int c_not_p, p_not_c;
if (!p_ent->commit_mask) {
p_ent->commit_mask = bitmap_new();
c_not_p = 1;
p_not_c = 0;
} else {
c_not_p = bitmap_is_subset(c_ent->commit_mask, p_ent->commit_mask);
p_not_c = bitmap_is_subset(p_ent->commit_mask, c_ent->commit_mask);
}
if (!c_not_p)
continue;
bitmap_or(p_ent->commit_mask, c_ent->commit_mask);
if (p_not_c)
p_ent->maximal = 1;
else {
p_ent->maximal = 0;
free_commit_list(p_ent->reverse_edges);
p_ent->reverse_edges = NULL;
}
if (c_ent->maximal) {
commit_list_insert(commit, &p_ent->reverse_edges);
} else {
struct commit_list *cc = c_ent->reverse_edges;
for (; cc; cc = cc->next) {
if (!commit_list_contains(cc->item, p_ent->reverse_edges))
commit_list_insert(cc->item, &p_ent->reverse_edges);
}
}
}
next:
bitmap_free(c_ent->commit_mask);
c_ent->commit_mask = NULL;
}
for (r = reusable; r; r = r->next) {
ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc);
bb->commits[bb->commits_nr++] = r->item;
}
trace2_data_intmax("pack-bitmap-write", the_repository,
"num_selected_commits", writer->selected_nr);
trace2_data_intmax("pack-bitmap-write", the_repository,
"num_maximal_commits", num_maximal);
free_commit_list(reusable);
}
static void bitmap_builder_clear(struct bitmap_builder *bb)
{
clear_bb_data(&bb->data);
free(bb->commits);
bb->commits_nr = bb->commits_alloc = 0;
}
static void fill_bitmap_tree(struct bitmap *bitmap,
struct tree *tree)
{
uint32_t pos;
struct tree_desc desc;
struct name_entry entry;
/*
* If our bit is already set, then there is nothing to do. Both this
* tree and all of its children will be set.
*/
pos = find_object_pos(&tree->object.oid);
if (bitmap_get(bitmap, pos))
return;
bitmap_set(bitmap, pos);
if (parse_tree(tree) < 0)
die("unable to load tree object %s",
oid_to_hex(&tree->object.oid));
init_tree_desc(&desc, tree->buffer, tree->size);
while (tree_entry(&desc, &entry)) {
switch (object_type(entry.mode)) {
case OBJ_TREE:
fill_bitmap_tree(bitmap,
lookup_tree(the_repository, &entry.oid));
break;
case OBJ_BLOB:
bitmap_set(bitmap, find_object_pos(&entry.oid));
break;
default:
/* Gitlink, etc; not reachable */
break;
}
}
free_tree_buffer(tree);
}
static void fill_bitmap_commit(struct bb_commit *ent,
struct commit *commit,
struct prio_queue *queue,
struct prio_queue *tree_queue,
struct bitmap_index *old_bitmap,
const uint32_t *mapping)
{
if (!ent->bitmap)
ent->bitmap = bitmap_new();
prio_queue_put(queue, commit);
while (queue->nr) {
struct commit_list *p;
struct commit *c = prio_queue_get(queue);
if (old_bitmap && mapping) {
struct ewah_bitmap *old = bitmap_for_commit(old_bitmap, c);
/*
* If this commit has an old bitmap, then translate that
* bitmap and add its bits to this one. No need to walk
* parents or the tree for this commit.
*/
if (old && !rebuild_bitmap(mapping, old, ent->bitmap))
continue;
}
/*
* Mark ourselves and queue our tree. The commit
* walk ensures we cover all parents.
*/
bitmap_set(ent->bitmap, find_object_pos(&c->object.oid));
prio_queue_put(tree_queue, get_commit_tree(c));
for (p = c->parents; p; p = p->next) {
int pos = find_object_pos(&p->item->object.oid);
if (!bitmap_get(ent->bitmap, pos)) {
bitmap_set(ent->bitmap, pos);
prio_queue_put(queue, p->item);
}
}
}
while (tree_queue->nr)
fill_bitmap_tree(ent->bitmap, prio_queue_get(tree_queue));
}
static void store_selected(struct bb_commit *ent, struct commit *commit)
{
struct bitmapped_commit *stored = &writer.selected[ent->idx];
khiter_t hash_pos;
int hash_ret;
stored->bitmap = bitmap_to_ewah(ent->bitmap);
hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret);
if (hash_ret == 0)
die("Duplicate entry when writing index: %s",
oid_to_hex(&commit->object.oid));
kh_value(writer.bitmaps, hash_pos) = stored;
}
void bitmap_writer_build(struct packing_data *to_pack)
{
static const double REUSE_BITMAP_THRESHOLD = 0.2;
int i, reuse_after, need_reset;
struct bitmap *base = bitmap_new();
struct rev_info revs;
struct bitmap_builder bb;
size_t i;
int nr_stored = 0; /* for progress */
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
struct prio_queue tree_queue = { NULL };
struct bitmap_index *old_bitmap;
uint32_t *mapping;
writer.bitmaps = kh_init_oid_map();
writer.to_pack = to_pack;
if (writer.show_progress)
writer.progress = start_progress("Building bitmaps", writer.selected_nr);
trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
the_repository);
repo_init_revisions(to_pack->repo, &revs, NULL);
revs.tag_objects = 1;
revs.tree_objects = 1;
revs.blob_objects = 1;
revs.no_walk = 0;
old_bitmap = prepare_bitmap_git(to_pack->repo);
if (old_bitmap)
mapping = create_bitmap_mapping(old_bitmap, to_pack);
else
mapping = NULL;
revs.include_check = should_include;
reset_revision_walk();
bitmap_builder_init(&bb, &writer, old_bitmap);
for (i = bb.commits_nr; i > 0; i--) {
struct commit *commit = bb.commits[i-1];
struct bb_commit *ent = bb_data_at(&bb.data, commit);
struct commit *child;
int reused = 0;
reuse_after = writer.selected_nr * REUSE_BITMAP_THRESHOLD;
need_reset = 0;
fill_bitmap_commit(ent, commit, &queue, &tree_queue,
old_bitmap, mapping);
for (i = writer.selected_nr - 1; i >= 0; --i) {
struct bitmapped_commit *stored;
struct object *object;
if (ent->selected) {
store_selected(ent, commit);
nr_stored++;
display_progress(writer.progress, nr_stored);
}
khiter_t hash_pos;
int hash_ret;
while ((child = pop_commit(&ent->reverse_edges))) {
struct bb_commit *child_ent =
bb_data_at(&bb.data, child);
stored = &writer.selected[i];
object = (struct object *)stored->commit;
if (stored->bitmap == NULL) {
if (i < writer.selected_nr - 1 &&
(need_reset ||
!in_merge_bases(writer.selected[i + 1].commit,
stored->commit))) {
bitmap_reset(base);
reset_all_seen();
if (child_ent->bitmap)
bitmap_or(child_ent->bitmap, ent->bitmap);
else if (reused)
child_ent->bitmap = bitmap_dup(ent->bitmap);
else {
child_ent->bitmap = ent->bitmap;
reused = 1;
}
add_pending_object(&revs, object, "");
revs.include_check_data = base;
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
traverse_commit_list(&revs, show_commit, show_object, base);
object_array_clear(&revs.pending);
stored->bitmap = bitmap_to_ewah(base);
need_reset = 0;
} else
need_reset = 1;
if (i >= reuse_after)
stored->flags |= BITMAP_FLAG_REUSE;
hash_pos = kh_put_oid_map(writer.bitmaps, object->oid, &hash_ret);
if (hash_ret == 0)
die("Duplicate entry when writing index: %s",
oid_to_hex(&object->oid));
kh_value(writer.bitmaps, hash_pos) = stored;
display_progress(writer.progress, writer.selected_nr - i);
}
if (!reused)
bitmap_free(ent->bitmap);
ent->bitmap = NULL;
}
clear_prio_queue(&queue);
clear_prio_queue(&tree_queue);
bitmap_builder_clear(&bb);
free(mapping);
trace2_region_leave("pack-bitmap-write", "building_bitmaps_total",
the_repository);
bitmap_free(base);
stop_progress(&writer.progress);
compute_xor_offsets();
@ -360,35 +536,6 @@ static int date_compare(const void *_a, const void *_b)
return (long)b->date - (long)a->date;
}
void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack)
{
struct bitmap_index *bitmap_git;
if (!(bitmap_git = prepare_bitmap_git(to_pack->repo)))
return;
writer.reused = kh_init_oid_map();
rebuild_existing_bitmaps(bitmap_git, to_pack, writer.reused,
writer.show_progress);
/*
* NEEDSWORK: rebuild_existing_bitmaps() makes writer.reused reference
* some bitmaps in bitmap_git, so we can't free the latter.
*/
}
static struct ewah_bitmap *find_reused_bitmap(const struct object_id *oid)
{
khiter_t hash_pos;
if (!writer.reused)
return NULL;
hash_pos = kh_get_oid_map(writer.reused, *oid);
if (hash_pos >= kh_end(writer.reused))
return NULL;
return kh_value(writer.reused, hash_pos);
}
void bitmap_writer_select_commits(struct commit **indexed_commits,
unsigned int indexed_commits_nr,
int max_bitmaps)
@ -402,12 +549,11 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
if (indexed_commits_nr < 100) {
for (i = 0; i < indexed_commits_nr; ++i)
push_bitmapped_commit(indexed_commits[i], NULL);
push_bitmapped_commit(indexed_commits[i]);
return;
}
for (;;) {
struct ewah_bitmap *reused_bitmap = NULL;
struct commit *chosen = NULL;
next = next_commit_index(i);
@ -422,15 +568,13 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
if (next == 0) {
chosen = indexed_commits[i];
reused_bitmap = find_reused_bitmap(&chosen->object.oid);
} else {
chosen = indexed_commits[i + next];
for (j = 0; j <= next; ++j) {
struct commit *cm = indexed_commits[i + j];
reused_bitmap = find_reused_bitmap(&cm->object.oid);
if (reused_bitmap || (cm->object.flags & NEEDS_BITMAP) != 0) {
if ((cm->object.flags & NEEDS_BITMAP) != 0) {
chosen = cm;
break;
}
@ -440,7 +584,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
}
}
push_bitmapped_commit(chosen, reused_bitmap);
push_bitmapped_commit(chosen);
i += next + 1;
display_progress(writer.progress, i);

View File

@ -138,9 +138,10 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
if (index->map_size < sizeof(*header) + the_hash_algo->rawsz)
return error("Corrupted bitmap index (missing header data)");
if (index->map_size < header_size + the_hash_algo->rawsz)
return error("Corrupted bitmap index (too small)");
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
return error("Corrupted bitmap index file (wrong header)");
@ -152,19 +153,23 @@ static int load_bitmap_header(struct bitmap_index *index)
/* Parse known bitmap format options */
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(index->pack->num_objects, sizeof(uint32_t));
unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
return error("Unsupported options for bitmap index file "
"(Git requires BITMAP_OPT_FULL_DAG)");
if (flags & BITMAP_OPT_HASH_CACHE) {
unsigned char *end = index->map + index->map_size - the_hash_algo->rawsz;
index->hashes = ((uint32_t *)end) - index->pack->num_objects;
if (cache_size > index_end - index->map - header_size)
return error("corrupted bitmap index file (too short to fit hash cache)");
index->hashes = (void *)(index_end - cache_size);
index_end -= cache_size;
}
}
index->entry_count = ntohl(header->entry_count);
index->map_pos += sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
index->map_pos += header_size;
return 0;
}
@ -224,11 +229,16 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
uint32_t commit_idx_pos;
struct object_id oid;
if (index->map_size - index->map_pos < 6)
return error("corrupt ewah bitmap: truncated header for entry %d", i);
commit_idx_pos = read_be32(index->map, &index->map_pos);
xor_offset = read_u8(index->map, &index->map_pos);
flags = read_u8(index->map, &index->map_pos);
nth_packed_object_id(&oid, index->pack, commit_idx_pos);
if (nth_packed_object_id(&oid, index->pack, commit_idx_pos) < 0)
return error("corrupt ewah bitmap: commit index %u out of range",
(unsigned)commit_idx_pos);
bitmap = read_bitmap_1(index);
if (!bitmap)
@ -370,6 +380,16 @@ struct include_data {
struct bitmap *seen;
};
struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
struct commit *commit)
{
khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps,
commit->object.oid);
if (hash_pos >= kh_end(bitmap_git->bitmaps))
return NULL;
return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos));
}
static inline int bitmap_position_extended(struct bitmap_index *bitmap_git,
const struct object_id *oid)
{
@ -455,10 +475,10 @@ static void show_commit(struct commit *commit, void *data)
static int add_to_include_set(struct bitmap_index *bitmap_git,
struct include_data *data,
const struct object_id *oid,
struct commit *commit,
int bitmap_pos)
{
khiter_t hash_pos;
struct ewah_bitmap *partial;
if (data->seen && bitmap_get(data->seen, bitmap_pos))
return 0;
@ -466,10 +486,9 @@ static int add_to_include_set(struct bitmap_index *bitmap_git,
if (bitmap_get(data->base, bitmap_pos))
return 0;
hash_pos = kh_get_oid_map(bitmap_git->bitmaps, *oid);
if (hash_pos < kh_end(bitmap_git->bitmaps)) {
struct stored_bitmap *st = kh_value(bitmap_git->bitmaps, hash_pos);
bitmap_or_ewah(data->base, lookup_stored_bitmap(st));
partial = bitmap_for_commit(bitmap_git, commit);
if (partial) {
bitmap_or_ewah(data->base, partial);
return 0;
}
@ -488,8 +507,7 @@ static int should_include(struct commit *commit, void *_data)
(struct object *)commit,
NULL);
if (!add_to_include_set(data->bitmap_git, data, &commit->object.oid,
bitmap_pos)) {
if (!add_to_include_set(data->bitmap_git, data, commit, bitmap_pos)) {
struct commit_list *parent = commit->parents;
while (parent) {
@ -503,6 +521,23 @@ static int should_include(struct commit *commit, void *_data)
return 1;
}
static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **base,
struct commit *commit)
{
struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit);
if (!or_with)
return 0;
if (*base == NULL)
*base = ewah_to_bitmap(or_with);
else
bitmap_or_ewah(*base, or_with);
return 1;
}
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots,
@ -526,21 +561,10 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct object *object = roots->item;
roots = roots->next;
if (object->type == OBJ_COMMIT) {
khiter_t pos = kh_get_oid_map(bitmap_git->bitmaps, object->oid);
if (pos < kh_end(bitmap_git->bitmaps)) {
struct stored_bitmap *st = kh_value(bitmap_git->bitmaps, pos);
struct ewah_bitmap *or_with = lookup_stored_bitmap(st);
if (base == NULL)
base = ewah_to_bitmap(or_with);
else
bitmap_or_ewah(base, or_with);
object->flags |= SEEN;
continue;
}
if (object->type == OBJ_COMMIT &&
add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) {
object->flags |= SEEN;
continue;
}
object_list_insert(object, &not_mapped);
@ -1272,10 +1296,10 @@ void test_bitmap_walk(struct rev_info *revs)
{
struct object *root;
struct bitmap *result = NULL;
khiter_t pos;
size_t result_popcnt;
struct bitmap_test_data tdata;
struct bitmap_index *bitmap_git;
struct ewah_bitmap *bm;
if (!(bitmap_git = prepare_bitmap_git(revs->repo)))
die("failed to load bitmap indexes");
@ -1287,12 +1311,9 @@ void test_bitmap_walk(struct rev_info *revs)
bitmap_git->version, bitmap_git->entry_count);
root = revs->pending.objects[0].item;
pos = kh_get_oid_map(bitmap_git->bitmaps, root->oid);
if (pos < kh_end(bitmap_git->bitmaps)) {
struct stored_bitmap *st = kh_value(bitmap_git->bitmaps, pos);
struct ewah_bitmap *bm = lookup_stored_bitmap(st);
bm = bitmap_for_commit(bitmap_git, (struct commit *)root);
if (bm) {
fprintf(stderr, "Found bitmap for %s. %d bits / %08x checksum\n",
oid_to_hex(&root->oid), (int)bm->bit_size, ewah_checksum(bm));
@ -1323,14 +1344,14 @@ void test_bitmap_walk(struct rev_info *revs)
if (bitmap_equals(result, tdata.base))
fprintf(stderr, "OK!\n");
else
fprintf(stderr, "Mismatch!\n");
die("mismatch in bitmap results");
free_bitmap_index(bitmap_git);
}
static int rebuild_bitmap(uint32_t *reposition,
struct ewah_bitmap *source,
struct bitmap *dest)
int rebuild_bitmap(const uint32_t *reposition,
struct ewah_bitmap *source,
struct bitmap *dest)
{
uint32_t pos = 0;
struct ewah_iterator it;
@ -1359,19 +1380,11 @@ static int rebuild_bitmap(uint32_t *reposition,
return 0;
}
int rebuild_existing_bitmaps(struct bitmap_index *bitmap_git,
struct packing_data *mapping,
kh_oid_map_t *reused_bitmaps,
int show_progress)
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
uint32_t i, num_objects;
uint32_t *reposition;
struct bitmap *rebuild;
struct stored_bitmap *stored;
struct progress *progress = NULL;
khiter_t hash_pos;
int hash_ret;
num_objects = bitmap_git->pack->num_objects;
reposition = xcalloc(num_objects, sizeof(uint32_t));
@ -1389,33 +1402,7 @@ int rebuild_existing_bitmaps(struct bitmap_index *bitmap_git,
reposition[i] = oe_in_pack_pos(mapping, oe) + 1;
}
rebuild = bitmap_new();
i = 0;
if (show_progress)
progress = start_progress("Reusing bitmaps", 0);
kh_foreach_value(bitmap_git->bitmaps, stored, {
if (stored->flags & BITMAP_FLAG_REUSE) {
if (!rebuild_bitmap(reposition,
lookup_stored_bitmap(stored),
rebuild)) {
hash_pos = kh_put_oid_map(reused_bitmaps,
stored->oid,
&hash_ret);
kh_value(reused_bitmaps, hash_pos) =
bitmap_to_ewah(rebuild);
}
bitmap_reset(rebuild);
display_progress(progress, ++i);
}
});
stop_progress(&progress);
free(reposition);
bitmap_free(rebuild);
return 0;
return reposition;
}
void free_bitmap_index(struct bitmap_index *b)

View File

@ -73,7 +73,13 @@ void bitmap_writer_set_checksum(unsigned char *sha1);
void bitmap_writer_build_type_index(struct packing_data *to_pack,
struct pack_idx_entry **index,
uint32_t index_nr);
void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack);
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping);
int rebuild_bitmap(const uint32_t *reposition,
struct ewah_bitmap *source,
struct bitmap *dest);
struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
struct commit *commit);
void bitmap_writer_select_commits(struct commit **indexed_commits,
unsigned int indexed_commits_nr, int max_bitmaps);
void bitmap_writer_build(struct packing_data *to_pack);

View File

@ -20,84 +20,172 @@ has_any () {
grep -Ff "$1" "$2"
}
# To ensure the logic for "maximal commits" is exercised, make
# the repository a bit more complicated.
#
# other second
# * *
# (99 commits) (99 commits)
# * *
# |\ /|
# | * octo-other octo-second * |
# |/|\_________ ____________/|\|
# | \ \/ __________/ |
# | | ________/\ / |
# * |/ * merge-right *
# | _|__________/ \____________ |
# |/ | \|
# (l1) * * merge-left * (r1)
# | / \________________________ |
# |/ \|
# (l2) * * (r2)
# \___________________________ |
# \|
# * (base)
#
# We only push bits down the first-parent history, which
# makes some of these commits unimportant!
#
# The important part for the maximal commit algorithm is how
# the bitmasks are extended. Assuming starting bit positions
# for second (bit 0) and other (bit 1), the bitmasks at the
# end should be:
#
# second: 1 (maximal, selected)
# other: 01 (maximal, selected)
# (base): 11 (maximal)
#
# This complicated history was important for a previous
# version of the walk that guarantees never walking a
# commit multiple times. That goal might be important
# again, so preserve this complicated case. For now, this
# test will guarantee that the bitmaps are computed
# correctly, even with the repeat calculations.
test_expect_success 'setup repo with moderate-sized history' '
test_commit_bulk --id=file 100 &&
test_commit_bulk --id=file 10 &&
git branch -M second &&
git checkout -b other HEAD~5 &&
test_commit_bulk --id=side 10 &&
git checkout master &&
bitmaptip=$(git rev-parse master) &&
# add complicated history setup, including merges and
# ambiguous merge-bases
git checkout -b merge-left other~2 &&
git merge second~2 -m "merge-left" &&
git checkout -b merge-right second~1 &&
git merge other~1 -m "merge-right" &&
git checkout -b octo-second second &&
git merge merge-left merge-right -m "octopus-second" &&
git checkout -b octo-other other &&
git merge merge-left merge-right -m "octopus-other" &&
git checkout other &&
git merge octo-other -m "pull octopus" &&
git checkout second &&
git merge octo-second -m "pull octopus" &&
# Remove these branches so they are not selected
# as bitmap tips
git branch -D merge-left &&
git branch -D merge-right &&
git branch -D octo-other &&
git branch -D octo-second &&
# add padding to make these merges less interesting
# and avoid having them selected for bitmaps
test_commit_bulk --id=file 100 &&
git checkout other &&
test_commit_bulk --id=side 100 &&
git checkout second &&
bitmaptip=$(git rev-parse second) &&
blob=$(echo tagged-blob | git hash-object -w --stdin) &&
git tag tagged-blob $blob &&
git config repack.writebitmaps true
'
test_expect_success 'full repack creates bitmaps' '
git repack -ad &&
GIT_TRACE2_EVENT_NESTING=4 GIT_TRACE2_EVENT="$(pwd)/trace" \
git repack -ad &&
ls .git/objects/pack/ | grep bitmap >output &&
test_line_count = 1 output
test_line_count = 1 output &&
grep "\"key\":\"num_selected_commits\",\"value\":\"106\"" trace &&
grep "\"key\":\"num_maximal_commits\",\"value\":\"107\"" trace
'
test_expect_success 'rev-list --test-bitmap verifies bitmaps' '
git rev-list --test-bitmap HEAD
'
rev_list_tests() {
state=$1
test_expect_success "counting commits via bitmap ($state)" '
git rev-list --count HEAD >expect &&
git rev-list --use-bitmap-index --count HEAD >actual &&
rev_list_tests_head () {
test_expect_success "counting commits via bitmap ($state, $branch)" '
git rev-list --count $branch >expect &&
git rev-list --use-bitmap-index --count $branch >actual &&
test_cmp expect actual
'
test_expect_success "counting partial commits via bitmap ($state)" '
git rev-list --count HEAD~5..HEAD >expect &&
git rev-list --use-bitmap-index --count HEAD~5..HEAD >actual &&
test_expect_success "counting partial commits via bitmap ($state, $branch)" '
git rev-list --count $branch~5..$branch >expect &&
git rev-list --use-bitmap-index --count $branch~5..$branch >actual &&
test_cmp expect actual
'
test_expect_success "counting commits with limit ($state)" '
git rev-list --count -n 1 HEAD >expect &&
git rev-list --use-bitmap-index --count -n 1 HEAD >actual &&
test_expect_success "counting commits with limit ($state, $branch)" '
git rev-list --count -n 1 $branch >expect &&
git rev-list --use-bitmap-index --count -n 1 $branch >actual &&
test_cmp expect actual
'
test_expect_success "counting non-linear history ($state)" '
git rev-list --count other...master >expect &&
git rev-list --use-bitmap-index --count other...master >actual &&
test_expect_success "counting non-linear history ($state, $branch)" '
git rev-list --count other...second >expect &&
git rev-list --use-bitmap-index --count other...second >actual &&
test_cmp expect actual
'
test_expect_success "counting commits with limiting ($state)" '
git rev-list --count HEAD -- 1.t >expect &&
git rev-list --use-bitmap-index --count HEAD -- 1.t >actual &&
test_expect_success "counting commits with limiting ($state, $branch)" '
git rev-list --count $branch -- 1.t >expect &&
git rev-list --use-bitmap-index --count $branch -- 1.t >actual &&
test_cmp expect actual
'
test_expect_success "counting objects via bitmap ($state)" '
git rev-list --count --objects HEAD >expect &&
git rev-list --use-bitmap-index --count --objects HEAD >actual &&
test_expect_success "counting objects via bitmap ($state, $branch)" '
git rev-list --count --objects $branch >expect &&
git rev-list --use-bitmap-index --count --objects $branch >actual &&
test_cmp expect actual
'
test_expect_success "enumerate commits ($state)" '
git rev-list --use-bitmap-index HEAD >actual &&
git rev-list HEAD >expect &&
test_expect_success "enumerate commits ($state, $branch)" '
git rev-list --use-bitmap-index $branch >actual &&
git rev-list $branch >expect &&
test_bitmap_traversal --no-confirm-bitmaps expect actual
'
test_expect_success "enumerate --objects ($state)" '
git rev-list --objects --use-bitmap-index HEAD >actual &&
git rev-list --objects HEAD >expect &&
test_expect_success "enumerate --objects ($state, $branch)" '
git rev-list --objects --use-bitmap-index $branch >actual &&
git rev-list --objects $branch >expect &&
test_bitmap_traversal expect actual
'
test_expect_success "bitmap --objects handles non-commit objects ($state)" '
git rev-list --objects --use-bitmap-index HEAD tagged-blob >actual &&
test_expect_success "bitmap --objects handles non-commit objects ($state, $branch)" '
git rev-list --objects --use-bitmap-index $branch tagged-blob >actual &&
grep $blob actual
'
}
rev_list_tests () {
state=$1
for branch in "second" "other"
do
rev_list_tests_head
done
}
rev_list_tests 'full bitmap'
test_expect_success 'clone from bitmapped repository' '
@ -128,7 +216,7 @@ test_expect_success 'setup further non-bitmapped commits' '
rev_list_tests 'partial bitmap'
test_expect_success 'fetch (partial bitmap)' '
git --git-dir=clone.git fetch origin master:master &&
git --git-dir=clone.git fetch origin second:second &&
git rev-parse HEAD >expect &&
git --git-dir=clone.git rev-parse HEAD >actual &&
test_cmp expect actual
@ -230,7 +318,7 @@ test_expect_success 'full repack, reusing previous bitmaps' '
'
test_expect_success 'fetch (full bitmap)' '
git --git-dir=clone.git fetch origin master:master &&
git --git-dir=clone.git fetch origin second:second &&
git rev-parse HEAD >expect &&
git --git-dir=clone.git rev-parse HEAD >actual &&
test_cmp expect actual
@ -343,7 +431,20 @@ test_expect_success 'pack reuse respects --incremental' '
test_must_be_empty actual
'
test_expect_success 'truncated bitmap fails gracefully' '
test_expect_success 'truncated bitmap fails gracefully (ewah)' '
test_config pack.writebitmaphashcache false &&
git repack -ad &&
git rev-list --use-bitmap-index --count --all >expect &&
bitmap=$(ls .git/objects/pack/*.bitmap) &&
test_when_finished "rm -f $bitmap" &&
test_copy_bytes 256 <$bitmap >$bitmap.tmp &&
mv -f $bitmap.tmp $bitmap &&
git rev-list --use-bitmap-index --count --all >actual 2>stderr &&
test_cmp expect actual &&
test_i18ngrep corrupt.ewah.bitmap stderr
'
test_expect_success 'truncated bitmap fails gracefully (cache)' '
git repack -ad &&
git rev-list --use-bitmap-index --count --all >expect &&
bitmap=$(ls .git/objects/pack/*.bitmap) &&
@ -352,7 +453,7 @@ test_expect_success 'truncated bitmap fails gracefully' '
mv -f $bitmap.tmp $bitmap &&
git rev-list --use-bitmap-index --count --all >actual 2>stderr &&
test_cmp expect actual &&
test_i18ngrep corrupt stderr
test_i18ngrep corrupted.bitmap.index stderr
'
# have_delta <obj> <expected_base>