find_unique_abbrev: move logic out of get_short_sha1()
The get_short_sha1() is only about reading short sha1s; we do call it in a loop to check "is this long enough" for each object, but otherwise it should not need to know about things like our default_abbrev setting. So instead of asking it to set default_automatic_abbrev as a side-effect, let's just have find_unique_abbrev() pick the right place to start its loop. This requires a separate approximate_object_count() function, but that naturally belongs with the rest of sha1_file.c. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
e6c587c733
commit
8e3f52d778
7
cache.h
7
cache.h
@ -1204,7 +1204,6 @@ struct object_context {
|
|||||||
#define GET_SHA1_TREEISH 020
|
#define GET_SHA1_TREEISH 020
|
||||||
#define GET_SHA1_BLOB 040
|
#define GET_SHA1_BLOB 040
|
||||||
#define GET_SHA1_FOLLOW_SYMLINKS 0100
|
#define GET_SHA1_FOLLOW_SYMLINKS 0100
|
||||||
#define GET_SHA1_AUTOMATIC 0200
|
|
||||||
#define GET_SHA1_ONLY_TO_DIE 04000
|
#define GET_SHA1_ONLY_TO_DIE 04000
|
||||||
|
|
||||||
#define GET_SHA1_DISAMBIGUATORS \
|
#define GET_SHA1_DISAMBIGUATORS \
|
||||||
@ -1456,6 +1455,12 @@ extern void prepare_packed_git(void);
|
|||||||
extern void reprepare_packed_git(void);
|
extern void reprepare_packed_git(void);
|
||||||
extern void install_packed_git(struct packed_git *pack);
|
extern void install_packed_git(struct packed_git *pack);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Give a rough count of objects in the repository. This sacrifices accuracy
|
||||||
|
* for speed.
|
||||||
|
*/
|
||||||
|
unsigned long approximate_object_count(void);
|
||||||
|
|
||||||
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
|
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
|
||||||
struct packed_git *packs);
|
struct packed_git *packs);
|
||||||
|
|
||||||
|
27
sha1_file.c
27
sha1_file.c
@ -1381,6 +1381,32 @@ static void prepare_packed_git_one(char *objdir, int local)
|
|||||||
strbuf_release(&path);
|
strbuf_release(&path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int approximate_object_count_valid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Give a fast, rough count of the number of objects in the repository. This
|
||||||
|
* ignores loose objects completely. If you have a lot of them, then either
|
||||||
|
* you should repack because your performance will be awful, or they are
|
||||||
|
* all unreachable objects about to be pruned, in which case they're not really
|
||||||
|
* interesting as a measure of repo size in the first place.
|
||||||
|
*/
|
||||||
|
unsigned long approximate_object_count(void)
|
||||||
|
{
|
||||||
|
static unsigned long count;
|
||||||
|
if (!approximate_object_count_valid) {
|
||||||
|
struct packed_git *p;
|
||||||
|
|
||||||
|
prepare_packed_git();
|
||||||
|
count = 0;
|
||||||
|
for (p = packed_git; p; p = p->next) {
|
||||||
|
if (open_pack_index(p))
|
||||||
|
continue;
|
||||||
|
count += p->num_objects;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
static void *get_next_packed_git(const void *p)
|
static void *get_next_packed_git(const void *p)
|
||||||
{
|
{
|
||||||
return ((const struct packed_git *)p)->next;
|
return ((const struct packed_git *)p)->next;
|
||||||
@ -1455,6 +1481,7 @@ void prepare_packed_git(void)
|
|||||||
|
|
||||||
void reprepare_packed_git(void)
|
void reprepare_packed_git(void)
|
||||||
{
|
{
|
||||||
|
approximate_object_count_valid = 0;
|
||||||
prepare_packed_git_run_once = 0;
|
prepare_packed_git_run_once = 0;
|
||||||
prepare_packed_git();
|
prepare_packed_git();
|
||||||
}
|
}
|
||||||
|
60
sha1_name.c
60
sha1_name.c
@ -15,7 +15,6 @@ typedef int (*disambiguate_hint_fn)(const unsigned char *, void *);
|
|||||||
|
|
||||||
struct disambiguate_state {
|
struct disambiguate_state {
|
||||||
int len; /* length of prefix in hex chars */
|
int len; /* length of prefix in hex chars */
|
||||||
unsigned int nrobjects;
|
|
||||||
char hex_pfx[GIT_SHA1_HEXSZ + 1];
|
char hex_pfx[GIT_SHA1_HEXSZ + 1];
|
||||||
unsigned char bin_pfx[GIT_SHA1_RAWSZ];
|
unsigned char bin_pfx[GIT_SHA1_RAWSZ];
|
||||||
|
|
||||||
@ -119,14 +118,6 @@ static void find_short_object_filename(struct disambiguate_state *ds)
|
|||||||
|
|
||||||
if (strlen(de->d_name) != 38)
|
if (strlen(de->d_name) != 38)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
|
||||||
* We only look at the one subdirectory, and we assume
|
|
||||||
* each subdirectory is roughly similar, so each
|
|
||||||
* object we find probably has 255 other objects in
|
|
||||||
* the other fan-out directories.
|
|
||||||
*/
|
|
||||||
ds->nrobjects += 256;
|
|
||||||
if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2))
|
if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2))
|
||||||
continue;
|
continue;
|
||||||
memcpy(hex + 2, de->d_name, 38);
|
memcpy(hex + 2, de->d_name, 38);
|
||||||
@ -160,7 +151,6 @@ static void unique_in_pack(struct packed_git *p,
|
|||||||
|
|
||||||
open_pack_index(p);
|
open_pack_index(p);
|
||||||
num = p->num_objects;
|
num = p->num_objects;
|
||||||
ds->nrobjects += num;
|
|
||||||
last = num;
|
last = num;
|
||||||
while (first < last) {
|
while (first < last) {
|
||||||
uint32_t mid = (first + last) / 2;
|
uint32_t mid = (first + last) / 2;
|
||||||
@ -390,9 +380,6 @@ static int show_ambiguous_object(const unsigned char *sha1, void *data)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* start from our historical default before the automatic abbreviation */
|
|
||||||
static int default_automatic_abbrev = FALLBACK_DEFAULT_ABBREV;
|
|
||||||
|
|
||||||
static int get_short_sha1(const char *name, int len, unsigned char *sha1,
|
static int get_short_sha1(const char *name, int len, unsigned char *sha1,
|
||||||
unsigned flags)
|
unsigned flags)
|
||||||
{
|
{
|
||||||
@ -439,14 +426,6 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1,
|
|||||||
for_each_abbrev(ds.hex_pfx, show_ambiguous_object, &ds);
|
for_each_abbrev(ds.hex_pfx, show_ambiguous_object, &ds);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len < 16 && !status && (flags & GET_SHA1_AUTOMATIC)) {
|
|
||||||
unsigned int expect_collision = 1 << (len * 2);
|
|
||||||
if (ds.nrobjects > expect_collision) {
|
|
||||||
default_automatic_abbrev = len+1;
|
|
||||||
return SHORT_NAME_AMBIGUOUS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -476,22 +455,53 @@ int for_each_abbrev(const char *prefix, each_abbrev_fn fn, void *cb_data)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the slot of the most-significant bit set in "val". There are various
|
||||||
|
* ways to do this quickly with fls() or __builtin_clzl(), but speed is
|
||||||
|
* probably not a big deal here.
|
||||||
|
*/
|
||||||
|
static unsigned msb(unsigned long val)
|
||||||
|
{
|
||||||
|
unsigned r = 0;
|
||||||
|
while (val >>= 1)
|
||||||
|
r++;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
|
int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
|
||||||
{
|
{
|
||||||
int status, exists;
|
int status, exists;
|
||||||
int flags = GET_SHA1_QUIETLY;
|
|
||||||
|
|
||||||
if (len < 0) {
|
if (len < 0) {
|
||||||
flags |= GET_SHA1_AUTOMATIC;
|
unsigned long count = approximate_object_count();
|
||||||
len = default_automatic_abbrev;
|
/*
|
||||||
|
* Add one because the MSB only tells us the highest bit set,
|
||||||
|
* not including the value of all the _other_ bits (so "15"
|
||||||
|
* is only one off of 2^4, but the MSB is the 3rd bit.
|
||||||
|
*/
|
||||||
|
len = msb(count) + 1;
|
||||||
|
/*
|
||||||
|
* We now know we have on the order of 2^len objects, which
|
||||||
|
* expects a collision at 2^(len/2). But we also care about hex
|
||||||
|
* chars, not bits, and there are 4 bits per hex. So all
|
||||||
|
* together we need to divide by 2; but we also want to round
|
||||||
|
* odd numbers up, hence adding one before dividing.
|
||||||
|
*/
|
||||||
|
len = (len + 1) / 2;
|
||||||
|
/*
|
||||||
|
* For very small repos, we stick with our regular fallback.
|
||||||
|
*/
|
||||||
|
if (len < FALLBACK_DEFAULT_ABBREV)
|
||||||
|
len = FALLBACK_DEFAULT_ABBREV;
|
||||||
}
|
}
|
||||||
|
|
||||||
sha1_to_hex_r(hex, sha1);
|
sha1_to_hex_r(hex, sha1);
|
||||||
if (len == 40 || !len)
|
if (len == 40 || !len)
|
||||||
return 40;
|
return 40;
|
||||||
exists = has_sha1_file(sha1);
|
exists = has_sha1_file(sha1);
|
||||||
while (len < 40) {
|
while (len < 40) {
|
||||||
unsigned char sha1_ret[20];
|
unsigned char sha1_ret[20];
|
||||||
status = get_short_sha1(hex, len, sha1_ret, flags);
|
status = get_short_sha1(hex, len, sha1_ret, GET_SHA1_QUIETLY);
|
||||||
if (exists
|
if (exists
|
||||||
? !status
|
? !status
|
||||||
: status == SHORT_NAME_NOT_FOUND) {
|
: status == SHORT_NAME_NOT_FOUND) {
|
||||||
|
Loading…
Reference in New Issue
Block a user