0df82d99da
The object reachability bitmap machinery and the partial cloning machinery were not prepared to work well together, because some object-filtering criteria that partial clones use inherently rely on object traversal, but the bitmap machinery is an optimization to bypass that object traversal. There however are some cases where they can work together, and they were taught about them. * jk/object-filter-with-bitmap: rev-list --count: comment on the use of count_right++ pack-objects: support filters with bitmaps pack-bitmap: implement BLOB_LIMIT filtering pack-bitmap: implement BLOB_NONE filtering bitmap: add bitmap_unset() function rev-list: use bitmap filters for traversal pack-bitmap: basic noop bitmap filter infrastructure rev-list: allow commit-only bitmap traversals t5310: factor out bitmap traversal comparison rev-list: allow bitmaps when counting objects rev-list: make --count work with --objects rev-list: factor out bitmap-optimized routines pack-bitmap: refuse to do a bitmap traversal with pathspecs rev-list: fallback to non-bitmap traversal when filtering pack-bitmap: fix leak of haves/wants object lists pack-bitmap: factor out type iterator initialization
194 lines
5.7 KiB
C
194 lines
5.7 KiB
C
/**
|
|
* Copyright 2013, GitHub, Inc
|
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon,
|
|
* David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#ifndef __EWOK_BITMAP_H__
|
|
#define __EWOK_BITMAP_H__
|
|
|
|
struct strbuf;
|
|
typedef uint64_t eword_t;
|
|
#define BITS_IN_EWORD (sizeof(eword_t) * 8)
|
|
|
|
/**
|
|
* Do not use __builtin_popcountll. The GCC implementation
|
|
* is notoriously slow on all platforms.
|
|
*
|
|
* See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36041
|
|
*/
|
|
static inline uint32_t ewah_bit_popcount64(uint64_t x)
|
|
{
|
|
x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL);
|
|
x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
|
|
x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >> 4) & 0x0F0F0F0F0F0F0F0FULL);
|
|
return (x * 0x0101010101010101ULL) >> 56;
|
|
}
|
|
|
|
/* __builtin_ctzll was not available until 3.4.0 */
|
|
#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR > 3))
|
|
#define ewah_bit_ctz64(x) __builtin_ctzll(x)
|
|
#else
|
|
static inline int ewah_bit_ctz64(uint64_t x)
|
|
{
|
|
int n = 0;
|
|
if ((x & 0xffffffff) == 0) { x >>= 32; n += 32; }
|
|
if ((x & 0xffff) == 0) { x >>= 16; n += 16; }
|
|
if ((x & 0xff) == 0) { x >>= 8; n += 8; }
|
|
if ((x & 0xf) == 0) { x >>= 4; n += 4; }
|
|
if ((x & 0x3) == 0) { x >>= 2; n += 2; }
|
|
if ((x & 0x1) == 0) { x >>= 1; n += 1; }
|
|
return n + !x;
|
|
}
|
|
#endif
|
|
|
|
struct ewah_bitmap {
|
|
eword_t *buffer;
|
|
size_t buffer_size;
|
|
size_t alloc_size;
|
|
size_t bit_size;
|
|
eword_t *rlw;
|
|
};
|
|
|
|
typedef void (*ewah_callback)(size_t pos, void *);
|
|
|
|
struct ewah_bitmap *ewah_pool_new(void);
|
|
void ewah_pool_free(struct ewah_bitmap *self);
|
|
|
|
/**
|
|
* Allocate a new EWAH Compressed bitmap
|
|
*/
|
|
struct ewah_bitmap *ewah_new(void);
|
|
|
|
/**
|
|
* Free all the memory of the bitmap
|
|
*/
|
|
void ewah_free(struct ewah_bitmap *self);
|
|
|
|
int ewah_serialize_to(struct ewah_bitmap *self,
|
|
int (*write_fun)(void *out, const void *buf, size_t len),
|
|
void *out);
|
|
int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *);
|
|
|
|
ssize_t ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len);
|
|
|
|
uint32_t ewah_checksum(struct ewah_bitmap *self);
|
|
|
|
/**
|
|
* Call the given callback with the position of every single bit
|
|
* that has been set on the bitmap.
|
|
*
|
|
* This is an efficient operation that does not fully decompress
|
|
* the bitmap.
|
|
*/
|
|
void ewah_each_bit(struct ewah_bitmap *self, ewah_callback callback, void *payload);
|
|
|
|
/**
|
|
* Set a given bit on the bitmap.
|
|
*
|
|
* The bit at position `pos` will be set to true. Because of the
|
|
* way that the bitmap is compressed, a set bit cannot be unset
|
|
* later on.
|
|
*
|
|
* Furthermore, since the bitmap uses streaming compression, bits
|
|
* can only set incrementally.
|
|
*
|
|
* E.g.
|
|
* ewah_set(bitmap, 1); // ok
|
|
* ewah_set(bitmap, 76); // ok
|
|
* ewah_set(bitmap, 77); // ok
|
|
* ewah_set(bitmap, 8712800127); // ok
|
|
* ewah_set(bitmap, 25); // failed, assert raised
|
|
*/
|
|
void ewah_set(struct ewah_bitmap *self, size_t i);
|
|
|
|
struct ewah_iterator {
|
|
const eword_t *buffer;
|
|
size_t buffer_size;
|
|
|
|
size_t pointer;
|
|
eword_t compressed, literals;
|
|
eword_t rl, lw;
|
|
int b;
|
|
};
|
|
|
|
/**
|
|
* Initialize a new iterator to run through the bitmap in uncompressed form.
|
|
*
|
|
* The iterator can be stack allocated. The underlying bitmap must not be freed
|
|
* before the iteration is over.
|
|
*
|
|
* E.g.
|
|
*
|
|
* struct ewah_bitmap *bitmap = ewah_new();
|
|
* struct ewah_iterator it;
|
|
*
|
|
* ewah_iterator_init(&it, bitmap);
|
|
*/
|
|
void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent);
|
|
|
|
/**
|
|
* Yield every single word in the bitmap in uncompressed form. This is:
|
|
* yield single words (32-64 bits) where each bit represents an actual
|
|
* bit from the bitmap.
|
|
*
|
|
* Return: true if a word was yield, false if there are no words left
|
|
*/
|
|
int ewah_iterator_next(eword_t *next, struct ewah_iterator *it);
|
|
|
|
void ewah_xor(
|
|
struct ewah_bitmap *ewah_i,
|
|
struct ewah_bitmap *ewah_j,
|
|
struct ewah_bitmap *out);
|
|
|
|
/**
|
|
* Direct word access
|
|
*/
|
|
size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number);
|
|
void ewah_add_dirty_words(
|
|
struct ewah_bitmap *self, const eword_t *buffer, size_t number, int negate);
|
|
size_t ewah_add(struct ewah_bitmap *self, eword_t word);
|
|
|
|
|
|
/**
|
|
* Uncompressed, old-school bitmap that can be efficiently compressed
|
|
* into an `ewah_bitmap`.
|
|
*/
|
|
struct bitmap {
|
|
eword_t *words;
|
|
size_t word_alloc;
|
|
};
|
|
|
|
struct bitmap *bitmap_new(void);
|
|
struct bitmap *bitmap_word_alloc(size_t word_alloc);
|
|
void bitmap_set(struct bitmap *self, size_t pos);
|
|
void bitmap_unset(struct bitmap *self, size_t pos);
|
|
int bitmap_get(struct bitmap *self, size_t pos);
|
|
void bitmap_reset(struct bitmap *self);
|
|
void bitmap_free(struct bitmap *self);
|
|
int bitmap_equals(struct bitmap *self, struct bitmap *other);
|
|
int bitmap_is_subset(struct bitmap *self, struct bitmap *super);
|
|
|
|
struct ewah_bitmap * bitmap_to_ewah(struct bitmap *bitmap);
|
|
struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah);
|
|
|
|
void bitmap_and_not(struct bitmap *self, struct bitmap *other);
|
|
void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other);
|
|
void bitmap_or(struct bitmap *self, const struct bitmap *other);
|
|
|
|
size_t bitmap_popcount(struct bitmap *self);
|
|
|
|
#endif
|