pack-bitmap: implement BLOB_LIMIT filtering

Just as the previous commit implemented BLOB_NONE, we can support
BLOB_LIMIT filters by looking at the sizes of any blobs in the result
and unsetting their bits as appropriate. This is slightly more expensive
than BLOB_NONE, but still produces a noticeable speedup (these results
are on git.git):

  Test                                         HEAD~2            HEAD
  ------------------------------------------------------------------------------------
  5310.9:  rev-list count with blob:none       1.80(1.77+0.02)   0.22(0.20+0.02) -87.8%
  5310.10: rev-list count with blob:limit=1k   1.99(1.96+0.03)   0.29(0.25+0.03) -85.4%

The implementation is similar to the BLOB_NONE one, with the exception
that we have to go object-by-object while walking the blob-type bitmap
(since we can't mask out the matches, but must look up the size
individually for each blob). The trick with using ctz64() is taken from
show_objects_for_type(), which likewise needs to find individual bits
(but wants to quickly skip over big chunks without blobs).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2020-02-14 13:22:39 -05:00 committed by Junio C Hamano
parent 4f3bd5606a
commit 84243da129
3 changed files with 104 additions and 1 deletions

View File

@ -779,6 +779,78 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
bitmap_free(tips); bitmap_free(tips);
} }
static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
uint32_t pos)
{
struct packed_git *pack = bitmap_git->pack;
unsigned long size;
struct object_info oi = OBJECT_INFO_INIT;
oi.sizep = &size;
if (pos < pack->num_objects) {
struct revindex_entry *entry = &pack->revindex[pos];
if (packed_object_info(the_repository, pack,
entry->offset, &oi) < 0) {
struct object_id oid;
nth_packed_object_oid(&oid, pack, entry->nr);
die(_("unable to get size of %s"), oid_to_hex(&oid));
}
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - pack->num_objects];
if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
return size;
}
static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
struct object_list *tip_objects,
struct bitmap *to_filter,
unsigned long limit)
{
struct eindex *eindex = &bitmap_git->ext_index;
struct bitmap *tips;
struct ewah_iterator it;
eword_t mask;
uint32_t i;
tips = find_tip_blobs(bitmap_git, tip_objects);
for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB);
i < to_filter->word_alloc && ewah_iterator_next(&mask, &it);
i++) {
eword_t word = to_filter->words[i] & mask;
unsigned offset;
for (offset = 0; offset < BITS_IN_EWORD; offset++) {
uint32_t pos;
if ((word >> offset) == 0)
break;
offset += ewah_bit_ctz64(word >> offset);
pos = i * BITS_IN_EWORD + offset;
if (!bitmap_get(tips, pos) &&
get_size_by_pos(bitmap_git, pos) >= limit)
bitmap_unset(to_filter, pos);
}
}
for (i = 0; i < eindex->count; i++) {
uint32_t pos = i + bitmap_git->pack->num_objects;
if (eindex->objects[i]->type == OBJ_BLOB &&
bitmap_get(to_filter, pos) &&
!bitmap_get(tips, pos) &&
get_size_by_pos(bitmap_git, pos) >= limit)
bitmap_unset(to_filter, pos);
}
bitmap_free(tips);
}
static int filter_bitmap(struct bitmap_index *bitmap_git, static int filter_bitmap(struct bitmap_index *bitmap_git,
struct object_list *tip_objects, struct object_list *tip_objects,
struct bitmap *to_filter, struct bitmap *to_filter,
@ -794,6 +866,14 @@ static int filter_bitmap(struct bitmap_index *bitmap_git,
return 0; return 0;
} }
if (filter->choice == LOFC_BLOB_LIMIT) {
if (bitmap_git)
filter_bitmap_blob_limit(bitmap_git, tip_objects,
to_filter,
filter->blob_limit_value);
return 0;
}
/* filter choice not handled */ /* filter choice not handled */
return -1; return -1;
} }

View File

@ -52,6 +52,11 @@ test_perf 'rev-list count with blob:none' '
--filter=blob:none >/dev/null --filter=blob:none >/dev/null
' '
test_perf 'rev-list count with blob:limit=1k' '
git rev-list --use-bitmap-index --count --objects --all \
--filter=blob:limit=1k >/dev/null
'
test_expect_success 'create partial bitmap state' ' test_expect_success 'create partial bitmap state' '
# pick a commit to represent the repo tip in the past # pick a commit to represent the repo tip in the past
cutoff=$(git rev-list HEAD~100 -1) && cutoff=$(git rev-list HEAD~100 -1) &&

View File

@ -6,8 +6,10 @@ test_description='rev-list combining bitmaps and filters'
test_expect_success 'set up bitmapped repo' ' test_expect_success 'set up bitmapped repo' '
# one commit will have bitmaps, the other will not # one commit will have bitmaps, the other will not
test_commit one && test_commit one &&
test_commit much-larger-blob-one &&
git repack -adb && git repack -adb &&
test_commit two test_commit two &&
test_commit much-larger-blob-two
' '
test_expect_success 'filters fallback to non-bitmap traversal' ' test_expect_success 'filters fallback to non-bitmap traversal' '
@ -35,4 +37,20 @@ test_expect_success 'blob:none filter with specified blob' '
test_bitmap_traversal expect actual test_bitmap_traversal expect actual
' '
test_expect_success 'blob:limit filter' '
git rev-list --objects --filter=blob:limit=5 HEAD >expect &&
git rev-list --use-bitmap-index \
--objects --filter=blob:limit=5 HEAD >actual &&
test_bitmap_traversal expect actual
'
test_expect_success 'blob:limit filter with specified blob' '
git rev-list --objects --filter=blob:limit=5 \
HEAD HEAD:much-larger-blob-two.t >expect &&
git rev-list --use-bitmap-index \
--objects --filter=blob:limit=5 \
HEAD HEAD:much-larger-blob-two.t >actual &&
test_bitmap_traversal expect actual
'
test_done test_done