Merge branch 'jk/packfile-reuse-cleanup'
The way "git pack-objects" reuses objects stored in existing pack to generate its result has been improved. * jk/packfile-reuse-cleanup: pack-bitmap: don't rely on bitmap_git->reuse_objects pack-objects: add checks for duplicate objects pack-objects: improve partial packfile reuse builtin/pack-objects: introduce obj_is_packed() pack-objects: introduce pack.allowPackReuse csum-file: introduce hashfile_total() pack-bitmap: simplify bitmap_has_oid_in_uninteresting() pack-bitmap: uninteresting oid can be outside bitmapped packfile pack-bitmap: introduce bitmap_walk_contains() ewah/bitmap: introduce bitmap_word_alloc() packfile: expose get_delta_base() builtin/pack-objects: report reused packfile objects
This commit is contained in:
commit
a14aebeac3
@ -27,6 +27,13 @@ Note that changing the compression level will not automatically recompress
|
|||||||
all existing objects. You can force recompression by passing the -F option
|
all existing objects. You can force recompression by passing the -F option
|
||||||
to linkgit:git-repack[1].
|
to linkgit:git-repack[1].
|
||||||
|
|
||||||
|
pack.allowPackReuse::
|
||||||
|
When true, and when reachability bitmaps are enabled,
|
||||||
|
pack-objects will try to send parts of the bitmapped packfile
|
||||||
|
verbatim. This can reduce memory and CPU usage to serve fetches,
|
||||||
|
but might result in sending a slightly larger pack. Defaults to
|
||||||
|
true.
|
||||||
|
|
||||||
pack.island::
|
pack.island::
|
||||||
An extended regular expression configuring a set of delta
|
An extended regular expression configuring a set of delta
|
||||||
islands. See "DELTA ISLANDS" in linkgit:git-pack-objects[1]
|
islands. See "DELTA ISLANDS" in linkgit:git-pack-objects[1]
|
||||||
|
@ -92,10 +92,11 @@ static struct progress *progress_state;
|
|||||||
|
|
||||||
static struct packed_git *reuse_packfile;
|
static struct packed_git *reuse_packfile;
|
||||||
static uint32_t reuse_packfile_objects;
|
static uint32_t reuse_packfile_objects;
|
||||||
static off_t reuse_packfile_offset;
|
static struct bitmap *reuse_packfile_bitmap;
|
||||||
|
|
||||||
static int use_bitmap_index_default = 1;
|
static int use_bitmap_index_default = 1;
|
||||||
static int use_bitmap_index = -1;
|
static int use_bitmap_index = -1;
|
||||||
|
static int allow_pack_reuse = 1;
|
||||||
static enum {
|
static enum {
|
||||||
WRITE_BITMAP_FALSE = 0,
|
WRITE_BITMAP_FALSE = 0,
|
||||||
WRITE_BITMAP_QUIET,
|
WRITE_BITMAP_QUIET,
|
||||||
@ -784,57 +785,185 @@ static struct object_entry **compute_write_order(void)
|
|||||||
return wo;
|
return wo;
|
||||||
}
|
}
|
||||||
|
|
||||||
static off_t write_reused_pack(struct hashfile *f)
|
|
||||||
|
/*
|
||||||
|
* A reused set of objects. All objects in a chunk have the same
|
||||||
|
* relative position in the original packfile and the generated
|
||||||
|
* packfile.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static struct reused_chunk {
|
||||||
|
/* The offset of the first object of this chunk in the original
|
||||||
|
* packfile. */
|
||||||
|
off_t original;
|
||||||
|
/* The offset of the first object of this chunk in the generated
|
||||||
|
* packfile minus "original". */
|
||||||
|
off_t difference;
|
||||||
|
} *reused_chunks;
|
||||||
|
static int reused_chunks_nr;
|
||||||
|
static int reused_chunks_alloc;
|
||||||
|
|
||||||
|
static void record_reused_object(off_t where, off_t offset)
|
||||||
{
|
{
|
||||||
unsigned char buffer[8192];
|
if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
|
||||||
off_t to_write, total;
|
return;
|
||||||
int fd;
|
|
||||||
|
|
||||||
if (!is_pack_valid(reuse_packfile))
|
ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
|
||||||
die(_("packfile is invalid: %s"), reuse_packfile->pack_name);
|
reused_chunks_alloc);
|
||||||
|
reused_chunks[reused_chunks_nr].original = where;
|
||||||
|
reused_chunks[reused_chunks_nr].difference = offset;
|
||||||
|
reused_chunks_nr++;
|
||||||
|
}
|
||||||
|
|
||||||
fd = git_open(reuse_packfile->pack_name);
|
/*
|
||||||
if (fd < 0)
|
* Binary search to find the chunk that "where" is in. Note
|
||||||
die_errno(_("unable to open packfile for reuse: %s"),
|
* that we're not looking for an exact match, just the first
|
||||||
reuse_packfile->pack_name);
|
* chunk that contains it (which implicitly ends at the start
|
||||||
|
* of the next chunk.
|
||||||
if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
|
*/
|
||||||
die_errno(_("unable to seek in reused packfile"));
|
static off_t find_reused_offset(off_t where)
|
||||||
|
{
|
||||||
if (reuse_packfile_offset < 0)
|
int lo = 0, hi = reused_chunks_nr;
|
||||||
reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz;
|
while (lo < hi) {
|
||||||
|
int mi = lo + ((hi - lo) / 2);
|
||||||
total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
|
if (where == reused_chunks[mi].original)
|
||||||
|
return reused_chunks[mi].difference;
|
||||||
while (to_write) {
|
if (where < reused_chunks[mi].original)
|
||||||
int read_pack = xread(fd, buffer, sizeof(buffer));
|
hi = mi;
|
||||||
|
else
|
||||||
if (read_pack <= 0)
|
lo = mi + 1;
|
||||||
die_errno(_("unable to read from reused packfile"));
|
|
||||||
|
|
||||||
if (read_pack > to_write)
|
|
||||||
read_pack = to_write;
|
|
||||||
|
|
||||||
hashwrite(f, buffer, read_pack);
|
|
||||||
to_write -= read_pack;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We don't know the actual number of objects written,
|
|
||||||
* only how many bytes written, how many bytes total, and
|
|
||||||
* how many objects total. So we can fake it by pretending all
|
|
||||||
* objects we are writing are the same size. This gives us a
|
|
||||||
* smooth progress meter, and at the end it matches the true
|
|
||||||
* answer.
|
|
||||||
*/
|
|
||||||
written = reuse_packfile_objects *
|
|
||||||
(((double)(total - to_write)) / total);
|
|
||||||
display_progress(progress_state, written);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
close(fd);
|
/*
|
||||||
written = reuse_packfile_objects;
|
* The first chunk starts at zero, so we can't have gone below
|
||||||
display_progress(progress_state, written);
|
* there.
|
||||||
return reuse_packfile_offset - sizeof(struct pack_header);
|
*/
|
||||||
|
assert(lo);
|
||||||
|
return reused_chunks[lo-1].difference;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_reused_pack_one(size_t pos, struct hashfile *out,
|
||||||
|
struct pack_window **w_curs)
|
||||||
|
{
|
||||||
|
off_t offset, next, cur;
|
||||||
|
enum object_type type;
|
||||||
|
unsigned long size;
|
||||||
|
|
||||||
|
offset = reuse_packfile->revindex[pos].offset;
|
||||||
|
next = reuse_packfile->revindex[pos + 1].offset;
|
||||||
|
|
||||||
|
record_reused_object(offset, offset - hashfile_total(out));
|
||||||
|
|
||||||
|
cur = offset;
|
||||||
|
type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
|
||||||
|
assert(type >= 0);
|
||||||
|
|
||||||
|
if (type == OBJ_OFS_DELTA) {
|
||||||
|
off_t base_offset;
|
||||||
|
off_t fixup;
|
||||||
|
|
||||||
|
unsigned char header[MAX_PACK_OBJECT_HEADER];
|
||||||
|
unsigned len;
|
||||||
|
|
||||||
|
base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
|
||||||
|
assert(base_offset != 0);
|
||||||
|
|
||||||
|
/* Convert to REF_DELTA if we must... */
|
||||||
|
if (!allow_ofs_delta) {
|
||||||
|
int base_pos = find_revindex_position(reuse_packfile, base_offset);
|
||||||
|
const unsigned char *base_sha1 =
|
||||||
|
nth_packed_object_sha1(reuse_packfile,
|
||||||
|
reuse_packfile->revindex[base_pos].nr);
|
||||||
|
|
||||||
|
len = encode_in_pack_object_header(header, sizeof(header),
|
||||||
|
OBJ_REF_DELTA, size);
|
||||||
|
hashwrite(out, header, len);
|
||||||
|
hashwrite(out, base_sha1, 20);
|
||||||
|
copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise see if we need to rewrite the offset... */
|
||||||
|
fixup = find_reused_offset(offset) -
|
||||||
|
find_reused_offset(base_offset);
|
||||||
|
if (fixup) {
|
||||||
|
unsigned char ofs_header[10];
|
||||||
|
unsigned i, ofs_len;
|
||||||
|
off_t ofs = offset - base_offset - fixup;
|
||||||
|
|
||||||
|
len = encode_in_pack_object_header(header, sizeof(header),
|
||||||
|
OBJ_OFS_DELTA, size);
|
||||||
|
|
||||||
|
i = sizeof(ofs_header) - 1;
|
||||||
|
ofs_header[i] = ofs & 127;
|
||||||
|
while (ofs >>= 7)
|
||||||
|
ofs_header[--i] = 128 | (--ofs & 127);
|
||||||
|
|
||||||
|
ofs_len = sizeof(ofs_header) - i;
|
||||||
|
|
||||||
|
hashwrite(out, header, len);
|
||||||
|
hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
|
||||||
|
copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ...otherwise we have no fixup, and can write it verbatim */
|
||||||
|
}
|
||||||
|
|
||||||
|
copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t write_reused_pack_verbatim(struct hashfile *out,
|
||||||
|
struct pack_window **w_curs)
|
||||||
|
{
|
||||||
|
size_t pos = 0;
|
||||||
|
|
||||||
|
while (pos < reuse_packfile_bitmap->word_alloc &&
|
||||||
|
reuse_packfile_bitmap->words[pos] == (eword_t)~0)
|
||||||
|
pos++;
|
||||||
|
|
||||||
|
if (pos) {
|
||||||
|
off_t to_write;
|
||||||
|
|
||||||
|
written = (pos * BITS_IN_EWORD);
|
||||||
|
to_write = reuse_packfile->revindex[written].offset
|
||||||
|
- sizeof(struct pack_header);
|
||||||
|
|
||||||
|
/* We're recording one chunk, not one object. */
|
||||||
|
record_reused_object(sizeof(struct pack_header), 0);
|
||||||
|
hashflush(out);
|
||||||
|
copy_pack_data(out, reuse_packfile, w_curs,
|
||||||
|
sizeof(struct pack_header), to_write);
|
||||||
|
|
||||||
|
display_progress(progress_state, written);
|
||||||
|
}
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_reused_pack(struct hashfile *f)
|
||||||
|
{
|
||||||
|
size_t i = 0;
|
||||||
|
uint32_t offset;
|
||||||
|
struct pack_window *w_curs = NULL;
|
||||||
|
|
||||||
|
if (allow_ofs_delta)
|
||||||
|
i = write_reused_pack_verbatim(f, &w_curs);
|
||||||
|
|
||||||
|
for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
|
||||||
|
eword_t word = reuse_packfile_bitmap->words[i];
|
||||||
|
size_t pos = (i * BITS_IN_EWORD);
|
||||||
|
|
||||||
|
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
|
||||||
|
if ((word >> offset) == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
offset += ewah_bit_ctz64(word >> offset);
|
||||||
|
write_reused_pack_one(pos + offset, f, &w_curs);
|
||||||
|
display_progress(progress_state, ++written);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unuse_pack(&w_curs);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char no_split_warning[] = N_(
|
static const char no_split_warning[] = N_(
|
||||||
@ -867,11 +996,9 @@ static void write_pack_file(void)
|
|||||||
offset = write_pack_header(f, nr_remaining);
|
offset = write_pack_header(f, nr_remaining);
|
||||||
|
|
||||||
if (reuse_packfile) {
|
if (reuse_packfile) {
|
||||||
off_t packfile_size;
|
|
||||||
assert(pack_to_stdout);
|
assert(pack_to_stdout);
|
||||||
|
write_reused_pack(f);
|
||||||
packfile_size = write_reused_pack(f);
|
offset = hashfile_total(f);
|
||||||
offset += packfile_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nr_written = 0;
|
nr_written = 0;
|
||||||
@ -1000,6 +1127,10 @@ static int have_duplicate_entry(const struct object_id *oid,
|
|||||||
{
|
{
|
||||||
struct object_entry *entry;
|
struct object_entry *entry;
|
||||||
|
|
||||||
|
if (reuse_packfile_bitmap &&
|
||||||
|
bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
|
||||||
|
return 1;
|
||||||
|
|
||||||
entry = packlist_find(&to_pack, oid);
|
entry = packlist_find(&to_pack, oid);
|
||||||
if (!entry)
|
if (!entry)
|
||||||
return 0;
|
return 0;
|
||||||
@ -2552,6 +2683,13 @@ static void ll_find_deltas(struct object_entry **list, unsigned list_size,
|
|||||||
free(p);
|
free(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int obj_is_packed(const struct object_id *oid)
|
||||||
|
{
|
||||||
|
return packlist_find(&to_pack, oid) ||
|
||||||
|
(reuse_packfile_bitmap &&
|
||||||
|
bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
|
||||||
|
}
|
||||||
|
|
||||||
static void add_tag_chain(const struct object_id *oid)
|
static void add_tag_chain(const struct object_id *oid)
|
||||||
{
|
{
|
||||||
struct tag *tag;
|
struct tag *tag;
|
||||||
@ -2563,7 +2701,7 @@ static void add_tag_chain(const struct object_id *oid)
|
|||||||
* it was included via bitmaps, we would not have parsed it
|
* it was included via bitmaps, we would not have parsed it
|
||||||
* previously).
|
* previously).
|
||||||
*/
|
*/
|
||||||
if (packlist_find(&to_pack, oid))
|
if (obj_is_packed(oid))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
tag = lookup_tag(the_repository, oid);
|
tag = lookup_tag(the_repository, oid);
|
||||||
@ -2587,7 +2725,7 @@ static int add_ref_tag(const char *path, const struct object_id *oid, int flag,
|
|||||||
|
|
||||||
if (starts_with(path, "refs/tags/") && /* is a tag? */
|
if (starts_with(path, "refs/tags/") && /* is a tag? */
|
||||||
!peel_ref(path, &peeled) && /* peelable? */
|
!peel_ref(path, &peeled) && /* peelable? */
|
||||||
packlist_find(&to_pack, &peeled)) /* object packed? */
|
obj_is_packed(&peeled)) /* object packed? */
|
||||||
add_tag_chain(oid);
|
add_tag_chain(oid);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -2655,6 +2793,7 @@ static void prepare_pack(int window, int depth)
|
|||||||
|
|
||||||
if (nr_deltas && n > 1) {
|
if (nr_deltas && n > 1) {
|
||||||
unsigned nr_done = 0;
|
unsigned nr_done = 0;
|
||||||
|
|
||||||
if (progress)
|
if (progress)
|
||||||
progress_state = start_progress(_("Compressing objects"),
|
progress_state = start_progress(_("Compressing objects"),
|
||||||
nr_deltas);
|
nr_deltas);
|
||||||
@ -2699,6 +2838,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
|
|||||||
use_bitmap_index_default = git_config_bool(k, v);
|
use_bitmap_index_default = git_config_bool(k, v);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (!strcmp(k, "pack.allowpackreuse")) {
|
||||||
|
allow_pack_reuse = git_config_bool(k, v);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
if (!strcmp(k, "pack.threads")) {
|
if (!strcmp(k, "pack.threads")) {
|
||||||
delta_search_threads = git_config_int(k, v);
|
delta_search_threads = git_config_int(k, v);
|
||||||
if (delta_search_threads < 0)
|
if (delta_search_threads < 0)
|
||||||
@ -3030,8 +3173,8 @@ static void loosen_unused_packed_objects(void)
|
|||||||
*/
|
*/
|
||||||
static int pack_options_allow_reuse(void)
|
static int pack_options_allow_reuse(void)
|
||||||
{
|
{
|
||||||
return pack_to_stdout &&
|
return allow_pack_reuse &&
|
||||||
allow_ofs_delta &&
|
pack_to_stdout &&
|
||||||
!ignore_packed_keep_on_disk &&
|
!ignore_packed_keep_on_disk &&
|
||||||
!ignore_packed_keep_in_core &&
|
!ignore_packed_keep_in_core &&
|
||||||
(!local || !have_non_local_packs) &&
|
(!local || !have_non_local_packs) &&
|
||||||
@ -3048,7 +3191,7 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
|
|||||||
bitmap_git,
|
bitmap_git,
|
||||||
&reuse_packfile,
|
&reuse_packfile,
|
||||||
&reuse_packfile_objects,
|
&reuse_packfile_objects,
|
||||||
&reuse_packfile_offset)) {
|
&reuse_packfile_bitmap)) {
|
||||||
assert(reuse_packfile_objects);
|
assert(reuse_packfile_objects);
|
||||||
nr_result += reuse_packfile_objects;
|
nr_result += reuse_packfile_objects;
|
||||||
display_progress(progress_state, nr_result);
|
display_progress(progress_state, nr_result);
|
||||||
@ -3509,7 +3652,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
|
|||||||
if (progress)
|
if (progress)
|
||||||
fprintf_ln(stderr,
|
fprintf_ln(stderr,
|
||||||
_("Total %"PRIu32" (delta %"PRIu32"),"
|
_("Total %"PRIu32" (delta %"PRIu32"),"
|
||||||
" reused %"PRIu32" (delta %"PRIu32")"),
|
" reused %"PRIu32" (delta %"PRIu32"),"
|
||||||
written, written_delta, reused, reused_delta);
|
" pack-reused %"PRIu32),
|
||||||
|
written, written_delta, reused, reused_delta,
|
||||||
|
reuse_packfile_objects);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -42,6 +42,15 @@ void hashflush(struct hashfile *f);
|
|||||||
void crc32_begin(struct hashfile *);
|
void crc32_begin(struct hashfile *);
|
||||||
uint32_t crc32_end(struct hashfile *);
|
uint32_t crc32_end(struct hashfile *);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the total number of bytes fed to the hashfile so far (including ones
|
||||||
|
* that have not been written out to the descriptor yet).
|
||||||
|
*/
|
||||||
|
static inline off_t hashfile_total(struct hashfile *f)
|
||||||
|
{
|
||||||
|
return f->total + f->offset;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void hashwrite_u8(struct hashfile *f, uint8_t data)
|
static inline void hashwrite_u8(struct hashfile *f, uint8_t data)
|
||||||
{
|
{
|
||||||
hashwrite(f, &data, sizeof(data));
|
hashwrite(f, &data, sizeof(data));
|
||||||
|
@ -22,21 +22,26 @@
|
|||||||
#define EWAH_MASK(x) ((eword_t)1 << (x % BITS_IN_EWORD))
|
#define EWAH_MASK(x) ((eword_t)1 << (x % BITS_IN_EWORD))
|
||||||
#define EWAH_BLOCK(x) (x / BITS_IN_EWORD)
|
#define EWAH_BLOCK(x) (x / BITS_IN_EWORD)
|
||||||
|
|
||||||
struct bitmap *bitmap_new(void)
|
struct bitmap *bitmap_word_alloc(size_t word_alloc)
|
||||||
{
|
{
|
||||||
struct bitmap *bitmap = xmalloc(sizeof(struct bitmap));
|
struct bitmap *bitmap = xmalloc(sizeof(struct bitmap));
|
||||||
bitmap->words = xcalloc(32, sizeof(eword_t));
|
bitmap->words = xcalloc(word_alloc, sizeof(eword_t));
|
||||||
bitmap->word_alloc = 32;
|
bitmap->word_alloc = word_alloc;
|
||||||
return bitmap;
|
return bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct bitmap *bitmap_new(void)
|
||||||
|
{
|
||||||
|
return bitmap_word_alloc(32);
|
||||||
|
}
|
||||||
|
|
||||||
void bitmap_set(struct bitmap *self, size_t pos)
|
void bitmap_set(struct bitmap *self, size_t pos)
|
||||||
{
|
{
|
||||||
size_t block = EWAH_BLOCK(pos);
|
size_t block = EWAH_BLOCK(pos);
|
||||||
|
|
||||||
if (block >= self->word_alloc) {
|
if (block >= self->word_alloc) {
|
||||||
size_t old_size = self->word_alloc;
|
size_t old_size = self->word_alloc;
|
||||||
self->word_alloc = block * 2;
|
self->word_alloc = block ? block * 2 : 1;
|
||||||
REALLOC_ARRAY(self->words, self->word_alloc);
|
REALLOC_ARRAY(self->words, self->word_alloc);
|
||||||
memset(self->words + old_size, 0x0,
|
memset(self->words + old_size, 0x0,
|
||||||
(self->word_alloc - old_size) * sizeof(eword_t));
|
(self->word_alloc - old_size) * sizeof(eword_t));
|
||||||
|
@ -172,6 +172,7 @@ struct bitmap {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct bitmap *bitmap_new(void);
|
struct bitmap *bitmap_new(void);
|
||||||
|
struct bitmap *bitmap_word_alloc(size_t word_alloc);
|
||||||
void bitmap_set(struct bitmap *self, size_t pos);
|
void bitmap_set(struct bitmap *self, size_t pos);
|
||||||
int bitmap_get(struct bitmap *self, size_t pos);
|
int bitmap_get(struct bitmap *self, size_t pos);
|
||||||
void bitmap_reset(struct bitmap *self);
|
void bitmap_reset(struct bitmap *self);
|
||||||
|
204
pack-bitmap.c
204
pack-bitmap.c
@ -326,6 +326,13 @@ failed:
|
|||||||
munmap(bitmap_git->map, bitmap_git->map_size);
|
munmap(bitmap_git->map, bitmap_git->map_size);
|
||||||
bitmap_git->map = NULL;
|
bitmap_git->map = NULL;
|
||||||
bitmap_git->map_size = 0;
|
bitmap_git->map_size = 0;
|
||||||
|
|
||||||
|
kh_destroy_oid_map(bitmap_git->bitmaps);
|
||||||
|
bitmap_git->bitmaps = NULL;
|
||||||
|
|
||||||
|
kh_destroy_oid_pos(bitmap_git->ext_index.positions);
|
||||||
|
bitmap_git->ext_index.positions = NULL;
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -622,7 +629,7 @@ static void show_objects_for_type(
|
|||||||
enum object_type object_type,
|
enum object_type object_type,
|
||||||
show_reachable_fn show_reach)
|
show_reachable_fn show_reach)
|
||||||
{
|
{
|
||||||
size_t pos = 0, i = 0;
|
size_t i = 0;
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
|
|
||||||
struct ewah_iterator it;
|
struct ewah_iterator it;
|
||||||
@ -630,13 +637,15 @@ static void show_objects_for_type(
|
|||||||
|
|
||||||
struct bitmap *objects = bitmap_git->result;
|
struct bitmap *objects = bitmap_git->result;
|
||||||
|
|
||||||
if (bitmap_git->reuse_objects == bitmap_git->pack->num_objects)
|
|
||||||
return;
|
|
||||||
|
|
||||||
ewah_iterator_init(&it, type_filter);
|
ewah_iterator_init(&it, type_filter);
|
||||||
|
|
||||||
while (i < objects->word_alloc && ewah_iterator_next(&filter, &it)) {
|
for (i = 0; i < objects->word_alloc &&
|
||||||
|
ewah_iterator_next(&filter, &it); i++) {
|
||||||
eword_t word = objects->words[i] & filter;
|
eword_t word = objects->words[i] & filter;
|
||||||
|
size_t pos = (i * BITS_IN_EWORD);
|
||||||
|
|
||||||
|
if (!word)
|
||||||
|
continue;
|
||||||
|
|
||||||
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
|
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
|
||||||
struct object_id oid;
|
struct object_id oid;
|
||||||
@ -648,9 +657,6 @@ static void show_objects_for_type(
|
|||||||
|
|
||||||
offset += ewah_bit_ctz64(word >> offset);
|
offset += ewah_bit_ctz64(word >> offset);
|
||||||
|
|
||||||
if (pos + offset < bitmap_git->reuse_objects)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
entry = &bitmap_git->pack->revindex[pos + offset];
|
entry = &bitmap_git->pack->revindex[pos + offset];
|
||||||
nth_packed_object_oid(&oid, bitmap_git->pack, entry->nr);
|
nth_packed_object_oid(&oid, bitmap_git->pack, entry->nr);
|
||||||
|
|
||||||
@ -659,9 +665,6 @@ static void show_objects_for_type(
|
|||||||
|
|
||||||
show_reach(&oid, object_type, 0, hash, bitmap_git->pack, entry->offset);
|
show_reach(&oid, object_type, 0, hash, bitmap_git->pack, entry->offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
pos += BITS_IN_EWORD;
|
|
||||||
i++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -768,68 +771,141 @@ cleanup:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
static void try_partial_reuse(struct bitmap_index *bitmap_git,
|
||||||
struct packed_git **packfile,
|
size_t pos,
|
||||||
uint32_t *entries,
|
struct bitmap *reuse,
|
||||||
off_t *up_to)
|
struct pack_window **w_curs)
|
||||||
{
|
{
|
||||||
/*
|
struct revindex_entry *revidx;
|
||||||
* Reuse the packfile content if we need more than
|
off_t offset;
|
||||||
* 90% of its objects
|
enum object_type type;
|
||||||
*/
|
unsigned long size;
|
||||||
static const double REUSE_PERCENT = 0.9;
|
|
||||||
|
|
||||||
|
if (pos >= bitmap_git->pack->num_objects)
|
||||||
|
return; /* not actually in the pack */
|
||||||
|
|
||||||
|
revidx = &bitmap_git->pack->revindex[pos];
|
||||||
|
offset = revidx->offset;
|
||||||
|
type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size);
|
||||||
|
if (type < 0)
|
||||||
|
return; /* broken packfile, punt */
|
||||||
|
|
||||||
|
if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) {
|
||||||
|
off_t base_offset;
|
||||||
|
int base_pos;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the position of the base object so we can look it up
|
||||||
|
* in our bitmaps. If we can't come up with an offset, or if
|
||||||
|
* that offset is not in the revidx, the pack is corrupt.
|
||||||
|
* There's nothing we can do, so just punt on this object,
|
||||||
|
* and the normal slow path will complain about it in
|
||||||
|
* more detail.
|
||||||
|
*/
|
||||||
|
base_offset = get_delta_base(bitmap_git->pack, w_curs,
|
||||||
|
&offset, type, revidx->offset);
|
||||||
|
if (!base_offset)
|
||||||
|
return;
|
||||||
|
base_pos = find_revindex_position(bitmap_git->pack, base_offset);
|
||||||
|
if (base_pos < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We assume delta dependencies always point backwards. This
|
||||||
|
* lets us do a single pass, and is basically always true
|
||||||
|
* due to the way OFS_DELTAs work. You would not typically
|
||||||
|
* find REF_DELTA in a bitmapped pack, since we only bitmap
|
||||||
|
* packs we write fresh, and OFS_DELTA is the default). But
|
||||||
|
* let's double check to make sure the pack wasn't written with
|
||||||
|
* odd parameters.
|
||||||
|
*/
|
||||||
|
if (base_pos >= pos)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* And finally, if we're not sending the base as part of our
|
||||||
|
* reuse chunk, then don't send this object either. The base
|
||||||
|
* would come after us, along with other objects not
|
||||||
|
* necessarily in the pack, which means we'd need to convert
|
||||||
|
* to REF_DELTA on the fly. Better to just let the normal
|
||||||
|
* object_entry code path handle it.
|
||||||
|
*/
|
||||||
|
if (!bitmap_get(reuse, base_pos))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we got here, then the object is OK to reuse. Mark it.
|
||||||
|
*/
|
||||||
|
bitmap_set(reuse, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
||||||
|
struct packed_git **packfile_out,
|
||||||
|
uint32_t *entries,
|
||||||
|
struct bitmap **reuse_out)
|
||||||
|
{
|
||||||
struct bitmap *result = bitmap_git->result;
|
struct bitmap *result = bitmap_git->result;
|
||||||
uint32_t reuse_threshold;
|
struct bitmap *reuse;
|
||||||
uint32_t i, reuse_objects = 0;
|
struct pack_window *w_curs = NULL;
|
||||||
|
size_t i = 0;
|
||||||
|
uint32_t offset;
|
||||||
|
|
||||||
assert(result);
|
assert(result);
|
||||||
|
|
||||||
for (i = 0; i < result->word_alloc; ++i) {
|
while (i < result->word_alloc && result->words[i] == (eword_t)~0)
|
||||||
if (result->words[i] != (eword_t)~0) {
|
i++;
|
||||||
reuse_objects += ewah_bit_ctz64(~result->words[i]);
|
|
||||||
break;
|
/* Don't mark objects not in the packfile */
|
||||||
|
if (i > bitmap_git->pack->num_objects / BITS_IN_EWORD)
|
||||||
|
i = bitmap_git->pack->num_objects / BITS_IN_EWORD;
|
||||||
|
|
||||||
|
reuse = bitmap_word_alloc(i);
|
||||||
|
memset(reuse->words, 0xFF, i * sizeof(eword_t));
|
||||||
|
|
||||||
|
for (; i < result->word_alloc; ++i) {
|
||||||
|
eword_t word = result->words[i];
|
||||||
|
size_t pos = (i * BITS_IN_EWORD);
|
||||||
|
|
||||||
|
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
|
||||||
|
if ((word >> offset) == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
offset += ewah_bit_ctz64(word >> offset);
|
||||||
|
try_partial_reuse(bitmap_git, pos + offset, reuse, &w_curs);
|
||||||
}
|
}
|
||||||
|
|
||||||
reuse_objects += BITS_IN_EWORD;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GIT_BITMAP_DEBUG
|
unuse_pack(&w_curs);
|
||||||
{
|
|
||||||
const unsigned char *sha1;
|
|
||||||
struct revindex_entry *entry;
|
|
||||||
|
|
||||||
entry = &bitmap_git->reverse_index->revindex[reuse_objects];
|
*entries = bitmap_popcount(reuse);
|
||||||
sha1 = nth_packed_object_sha1(bitmap_git->pack, entry->nr);
|
if (!*entries) {
|
||||||
|
bitmap_free(reuse);
|
||||||
fprintf(stderr, "Failed to reuse at %d (%016llx)\n",
|
|
||||||
reuse_objects, result->words[i]);
|
|
||||||
fprintf(stderr, " %s\n", hash_to_hex(sha1));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!reuse_objects)
|
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (reuse_objects >= bitmap_git->pack->num_objects) {
|
|
||||||
bitmap_git->reuse_objects = *entries = bitmap_git->pack->num_objects;
|
|
||||||
*up_to = -1; /* reuse the full pack */
|
|
||||||
*packfile = bitmap_git->pack;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
reuse_threshold = bitmap_popcount(bitmap_git->result) * REUSE_PERCENT;
|
/*
|
||||||
|
* Drop any reused objects from the result, since they will not
|
||||||
if (reuse_objects < reuse_threshold)
|
* need to be handled separately.
|
||||||
return -1;
|
*/
|
||||||
|
bitmap_and_not(result, reuse);
|
||||||
bitmap_git->reuse_objects = *entries = reuse_objects;
|
*packfile_out = bitmap_git->pack;
|
||||||
*up_to = bitmap_git->pack->revindex[reuse_objects].offset;
|
*reuse_out = reuse;
|
||||||
*packfile = bitmap_git->pack;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bitmap_walk_contains(struct bitmap_index *bitmap_git,
|
||||||
|
struct bitmap *bitmap, const struct object_id *oid)
|
||||||
|
{
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
if (!bitmap)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
idx = bitmap_position(bitmap_git, oid);
|
||||||
|
return idx >= 0 && bitmap_get(bitmap, idx);
|
||||||
|
}
|
||||||
|
|
||||||
void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git,
|
void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git,
|
||||||
show_reachable_fn show_reachable)
|
show_reachable_fn show_reachable)
|
||||||
{
|
{
|
||||||
@ -1118,16 +1194,6 @@ void free_bitmap_index(struct bitmap_index *b)
|
|||||||
int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git,
|
int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git,
|
||||||
const struct object_id *oid)
|
const struct object_id *oid)
|
||||||
{
|
{
|
||||||
int pos;
|
return bitmap_git &&
|
||||||
|
bitmap_walk_contains(bitmap_git, bitmap_git->haves, oid);
|
||||||
if (!bitmap_git)
|
|
||||||
return 0; /* no bitmap loaded */
|
|
||||||
if (!bitmap_git->haves)
|
|
||||||
return 0; /* walk had no "haves" */
|
|
||||||
|
|
||||||
pos = bitmap_position_packfile(bitmap_git, oid);
|
|
||||||
if (pos < 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return bitmap_get(bitmap_git->haves, pos);
|
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include "ewah/ewok.h"
|
#include "ewah/ewok.h"
|
||||||
#include "khash.h"
|
#include "khash.h"
|
||||||
|
#include "pack.h"
|
||||||
#include "pack-objects.h"
|
#include "pack-objects.h"
|
||||||
|
|
||||||
struct commit;
|
struct commit;
|
||||||
@ -49,10 +50,13 @@ void test_bitmap_walk(struct rev_info *revs);
|
|||||||
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs);
|
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs);
|
||||||
int reuse_partial_packfile_from_bitmap(struct bitmap_index *,
|
int reuse_partial_packfile_from_bitmap(struct bitmap_index *,
|
||||||
struct packed_git **packfile,
|
struct packed_git **packfile,
|
||||||
uint32_t *entries, off_t *up_to);
|
uint32_t *entries,
|
||||||
|
struct bitmap **reuse_out);
|
||||||
int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping,
|
int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping,
|
||||||
kh_oid_map_t *reused_bitmaps, int show_progress);
|
kh_oid_map_t *reused_bitmaps, int show_progress);
|
||||||
void free_bitmap_index(struct bitmap_index *);
|
void free_bitmap_index(struct bitmap_index *);
|
||||||
|
int bitmap_walk_contains(struct bitmap_index *,
|
||||||
|
struct bitmap *bitmap, const struct object_id *oid);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* After a traversal has been performed by prepare_bitmap_walk(), this can be
|
* After a traversal has been performed by prepare_bitmap_walk(), this can be
|
||||||
|
10
packfile.c
10
packfile.c
@ -1162,11 +1162,11 @@ const struct packed_git *has_packed_and_bad(struct repository *r,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static off_t get_delta_base(struct packed_git *p,
|
off_t get_delta_base(struct packed_git *p,
|
||||||
struct pack_window **w_curs,
|
struct pack_window **w_curs,
|
||||||
off_t *curpos,
|
off_t *curpos,
|
||||||
enum object_type type,
|
enum object_type type,
|
||||||
off_t delta_obj_offset)
|
off_t delta_obj_offset)
|
||||||
{
|
{
|
||||||
unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
|
unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
|
||||||
off_t base_offset;
|
off_t base_offset;
|
||||||
|
@ -151,6 +151,9 @@ void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object
|
|||||||
unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
|
unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
|
||||||
unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
|
unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
|
||||||
int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
|
int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
|
||||||
|
off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
|
||||||
|
off_t *curpos, enum object_type type,
|
||||||
|
off_t delta_obj_offset);
|
||||||
|
|
||||||
void release_pack_memory(size_t);
|
void release_pack_memory(size_t);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user