refs: Use binary search to lookup refs faster
Currently we linearly search through lists of refs when we need to find a specific ref. This can be very slow if we need to lookup a large number of refs. By changing to a binary search we can make this faster. In order to be able to use a binary search we need to change from using linked lists to arrays, which we can manage using ALLOC_GROW. We can now also use the standard library qsort function to sort the refs arrays. Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
b4f223c636
commit
e9c4c11165
343
refs.c
343
refs.c
@ -8,14 +8,18 @@
|
||||
#define REF_KNOWS_PEELED 04
|
||||
#define REF_BROKEN 010
|
||||
|
||||
struct ref_list {
|
||||
struct ref_list *next;
|
||||
struct ref_entry {
|
||||
unsigned char flag; /* ISSYMREF? ISPACKED? */
|
||||
unsigned char sha1[20];
|
||||
unsigned char peeled[20];
|
||||
char name[FLEX_ARRAY];
|
||||
};
|
||||
|
||||
struct ref_array {
|
||||
int nr, alloc;
|
||||
struct ref_entry **refs;
|
||||
};
|
||||
|
||||
static const char *parse_ref_line(char *line, unsigned char *sha1)
|
||||
{
|
||||
/*
|
||||
@ -44,108 +48,80 @@ static const char *parse_ref_line(char *line, unsigned char *sha1)
|
||||
return line;
|
||||
}
|
||||
|
||||
static struct ref_list *add_ref(const char *name, const unsigned char *sha1,
|
||||
int flag, struct ref_list *list,
|
||||
struct ref_list **new_entry)
|
||||
static void add_ref(const char *name, const unsigned char *sha1,
|
||||
int flag, struct ref_array *refs,
|
||||
struct ref_entry **new_entry)
|
||||
{
|
||||
int len;
|
||||
struct ref_list *entry;
|
||||
struct ref_entry *entry;
|
||||
|
||||
/* Allocate it and add it in.. */
|
||||
len = strlen(name) + 1;
|
||||
entry = xmalloc(sizeof(struct ref_list) + len);
|
||||
entry = xmalloc(sizeof(struct ref_entry) + len);
|
||||
hashcpy(entry->sha1, sha1);
|
||||
hashclr(entry->peeled);
|
||||
memcpy(entry->name, name, len);
|
||||
entry->flag = flag;
|
||||
entry->next = list;
|
||||
if (new_entry)
|
||||
*new_entry = entry;
|
||||
return entry;
|
||||
ALLOC_GROW(refs->refs, refs->nr + 1, refs->alloc);
|
||||
refs->refs[refs->nr++] = entry;
|
||||
}
|
||||
|
||||
/* merge sort the ref list */
|
||||
static struct ref_list *sort_ref_list(struct ref_list *list)
|
||||
static int ref_entry_cmp(const void *a, const void *b)
|
||||
{
|
||||
int psize, qsize, last_merge_count, cmp;
|
||||
struct ref_list *p, *q, *l, *e;
|
||||
struct ref_list *new_list = list;
|
||||
int k = 1;
|
||||
int merge_count = 0;
|
||||
|
||||
if (!list)
|
||||
return list;
|
||||
|
||||
do {
|
||||
last_merge_count = merge_count;
|
||||
merge_count = 0;
|
||||
|
||||
psize = 0;
|
||||
|
||||
p = new_list;
|
||||
q = new_list;
|
||||
new_list = NULL;
|
||||
l = NULL;
|
||||
|
||||
while (p) {
|
||||
merge_count++;
|
||||
|
||||
while (psize < k && q->next) {
|
||||
q = q->next;
|
||||
psize++;
|
||||
struct ref_entry *one = *(struct ref_entry **)a;
|
||||
struct ref_entry *two = *(struct ref_entry **)b;
|
||||
return strcmp(one->name, two->name);
|
||||
}
|
||||
qsize = k;
|
||||
|
||||
while ((psize > 0) || (qsize > 0 && q)) {
|
||||
if (qsize == 0 || !q) {
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
} else if (psize == 0) {
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
} else {
|
||||
cmp = strcmp(q->name, p->name);
|
||||
if (cmp < 0) {
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
} else if (cmp > 0) {
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
} else {
|
||||
if (hashcmp(q->sha1, p->sha1))
|
||||
static void sort_ref_array(struct ref_array *array)
|
||||
{
|
||||
int i = 0, j = 1;
|
||||
|
||||
/* Nothing to sort unless there are at least two entries */
|
||||
if (array->nr < 2)
|
||||
return;
|
||||
|
||||
qsort(array->refs, array->nr, sizeof(*array->refs), ref_entry_cmp);
|
||||
|
||||
/* Remove any duplicates from the ref_array */
|
||||
for (; j < array->nr; j++) {
|
||||
struct ref_entry *a = array->refs[i];
|
||||
struct ref_entry *b = array->refs[j];
|
||||
if (!strcmp(a->name, b->name)) {
|
||||
if (hashcmp(a->sha1, b->sha1))
|
||||
die("Duplicated ref, and SHA1s don't match: %s",
|
||||
q->name);
|
||||
warning("Duplicated ref: %s", q->name);
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
a->name);
|
||||
warning("Duplicated ref: %s", a->name);
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
array->refs[i] = array->refs[j];
|
||||
}
|
||||
array->nr = i + 1;
|
||||
}
|
||||
|
||||
static struct ref_entry *search_ref_array(struct ref_array *array, const char *name)
|
||||
{
|
||||
struct ref_entry *e, **r;
|
||||
int len;
|
||||
|
||||
if (name == NULL)
|
||||
return NULL;
|
||||
|
||||
len = strlen(name) + 1;
|
||||
e = xmalloc(sizeof(struct ref_entry) + len);
|
||||
memcpy(e->name, name, len);
|
||||
|
||||
r = bsearch(&e, array->refs, array->nr, sizeof(*array->refs), ref_entry_cmp);
|
||||
|
||||
free(e);
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
}
|
||||
}
|
||||
|
||||
e->next = NULL;
|
||||
if (r == NULL)
|
||||
return NULL;
|
||||
|
||||
if (l)
|
||||
l->next = e;
|
||||
if (!new_list)
|
||||
new_list = e;
|
||||
l = e;
|
||||
}
|
||||
|
||||
p = q;
|
||||
};
|
||||
|
||||
k = k * 2;
|
||||
} while ((last_merge_count != merge_count) || (last_merge_count != 1));
|
||||
|
||||
return new_list;
|
||||
return *r;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -155,38 +131,37 @@ static struct ref_list *sort_ref_list(struct ref_list *list)
|
||||
static struct cached_refs {
|
||||
char did_loose;
|
||||
char did_packed;
|
||||
struct ref_list *loose;
|
||||
struct ref_list *packed;
|
||||
struct ref_array loose;
|
||||
struct ref_array packed;
|
||||
} cached_refs, submodule_refs;
|
||||
static struct ref_list *current_ref;
|
||||
static struct ref_entry *current_ref;
|
||||
|
||||
static struct ref_list *extra_refs;
|
||||
static struct ref_array extra_refs;
|
||||
|
||||
static void free_ref_list(struct ref_list *list)
|
||||
static void free_ref_array(struct ref_array *array)
|
||||
{
|
||||
struct ref_list *next;
|
||||
for ( ; list; list = next) {
|
||||
next = list->next;
|
||||
free(list);
|
||||
}
|
||||
int i;
|
||||
for (i = 0; i < array->nr; i++)
|
||||
free(array->refs[i]);
|
||||
free(array->refs);
|
||||
array->nr = array->alloc = 0;
|
||||
array->refs = NULL;
|
||||
}
|
||||
|
||||
static void invalidate_cached_refs(void)
|
||||
{
|
||||
struct cached_refs *ca = &cached_refs;
|
||||
|
||||
if (ca->did_loose && ca->loose)
|
||||
free_ref_list(ca->loose);
|
||||
if (ca->did_packed && ca->packed)
|
||||
free_ref_list(ca->packed);
|
||||
ca->loose = ca->packed = NULL;
|
||||
if (ca->did_loose)
|
||||
free_ref_array(&ca->loose);
|
||||
if (ca->did_packed)
|
||||
free_ref_array(&ca->packed);
|
||||
ca->did_loose = ca->did_packed = 0;
|
||||
}
|
||||
|
||||
static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
|
||||
{
|
||||
struct ref_list *list = NULL;
|
||||
struct ref_list *last = NULL;
|
||||
struct ref_entry *last = NULL;
|
||||
char refline[PATH_MAX];
|
||||
int flag = REF_ISPACKED;
|
||||
|
||||
@ -205,7 +180,7 @@ static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
|
||||
|
||||
name = parse_ref_line(refline, sha1);
|
||||
if (name) {
|
||||
list = add_ref(name, sha1, flag, list, &last);
|
||||
add_ref(name, sha1, flag, &cached_refs->packed, &last);
|
||||
continue;
|
||||
}
|
||||
if (last &&
|
||||
@ -215,21 +190,20 @@ static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
|
||||
!get_sha1_hex(refline + 1, sha1))
|
||||
hashcpy(last->peeled, sha1);
|
||||
}
|
||||
cached_refs->packed = sort_ref_list(list);
|
||||
sort_ref_array(&cached_refs->packed);
|
||||
}
|
||||
|
||||
void add_extra_ref(const char *name, const unsigned char *sha1, int flag)
|
||||
{
|
||||
extra_refs = add_ref(name, sha1, flag, extra_refs, NULL);
|
||||
add_ref(name, sha1, flag, &extra_refs, NULL);
|
||||
}
|
||||
|
||||
void clear_extra_refs(void)
|
||||
{
|
||||
free_ref_list(extra_refs);
|
||||
extra_refs = NULL;
|
||||
free_ref_array(&extra_refs);
|
||||
}
|
||||
|
||||
static struct ref_list *get_packed_refs(const char *submodule)
|
||||
static struct ref_array *get_packed_refs(const char *submodule)
|
||||
{
|
||||
const char *packed_refs_file;
|
||||
struct cached_refs *refs;
|
||||
@ -237,7 +211,7 @@ static struct ref_list *get_packed_refs(const char *submodule)
|
||||
if (submodule) {
|
||||
packed_refs_file = git_path_submodule(submodule, "packed-refs");
|
||||
refs = &submodule_refs;
|
||||
free_ref_list(refs->packed);
|
||||
free_ref_array(&refs->packed);
|
||||
} else {
|
||||
packed_refs_file = git_path("packed-refs");
|
||||
refs = &cached_refs;
|
||||
@ -245,18 +219,17 @@ static struct ref_list *get_packed_refs(const char *submodule)
|
||||
|
||||
if (!refs->did_packed || submodule) {
|
||||
FILE *f = fopen(packed_refs_file, "r");
|
||||
refs->packed = NULL;
|
||||
if (f) {
|
||||
read_packed_refs(f, refs);
|
||||
fclose(f);
|
||||
}
|
||||
refs->did_packed = 1;
|
||||
}
|
||||
return refs->packed;
|
||||
return &refs->packed;
|
||||
}
|
||||
|
||||
static struct ref_list *get_ref_dir(const char *submodule, const char *base,
|
||||
struct ref_list *list)
|
||||
static void get_ref_dir(const char *submodule, const char *base,
|
||||
struct ref_array *array)
|
||||
{
|
||||
DIR *dir;
|
||||
const char *path;
|
||||
@ -299,7 +272,7 @@ static struct ref_list *get_ref_dir(const char *submodule, const char *base,
|
||||
if (stat(refdir, &st) < 0)
|
||||
continue;
|
||||
if (S_ISDIR(st.st_mode)) {
|
||||
list = get_ref_dir(submodule, ref, list);
|
||||
get_ref_dir(submodule, ref, array);
|
||||
continue;
|
||||
}
|
||||
if (submodule) {
|
||||
@ -314,12 +287,11 @@ static struct ref_list *get_ref_dir(const char *submodule, const char *base,
|
||||
hashclr(sha1);
|
||||
flag |= REF_BROKEN;
|
||||
}
|
||||
list = add_ref(ref, sha1, flag, list, NULL);
|
||||
add_ref(ref, sha1, flag, array, NULL);
|
||||
}
|
||||
free(ref);
|
||||
closedir(dir);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
struct warn_if_dangling_data {
|
||||
@ -356,21 +328,21 @@ void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname)
|
||||
for_each_rawref(warn_if_dangling_symref, &data);
|
||||
}
|
||||
|
||||
static struct ref_list *get_loose_refs(const char *submodule)
|
||||
static struct ref_array *get_loose_refs(const char *submodule)
|
||||
{
|
||||
if (submodule) {
|
||||
free_ref_list(submodule_refs.loose);
|
||||
submodule_refs.loose = get_ref_dir(submodule, "refs", NULL);
|
||||
submodule_refs.loose = sort_ref_list(submodule_refs.loose);
|
||||
return submodule_refs.loose;
|
||||
free_ref_array(&submodule_refs.loose);
|
||||
get_ref_dir(submodule, "refs", &submodule_refs.loose);
|
||||
sort_ref_array(&submodule_refs.loose);
|
||||
return &submodule_refs.loose;
|
||||
}
|
||||
|
||||
if (!cached_refs.did_loose) {
|
||||
cached_refs.loose = get_ref_dir(NULL, "refs", NULL);
|
||||
cached_refs.loose = sort_ref_list(cached_refs.loose);
|
||||
get_ref_dir(NULL, "refs", &cached_refs.loose);
|
||||
sort_ref_array(&cached_refs.loose);
|
||||
cached_refs.did_loose = 1;
|
||||
}
|
||||
return cached_refs.loose;
|
||||
return &cached_refs.loose;
|
||||
}
|
||||
|
||||
/* We allow "recursive" symbolic refs. Only within reason, though */
|
||||
@ -381,8 +353,8 @@ static int resolve_gitlink_packed_ref(char *name, int pathlen, const char *refna
|
||||
{
|
||||
FILE *f;
|
||||
struct cached_refs refs;
|
||||
struct ref_list *ref;
|
||||
int retval;
|
||||
struct ref_entry *ref;
|
||||
int retval = -1;
|
||||
|
||||
strcpy(name + pathlen, "packed-refs");
|
||||
f = fopen(name, "r");
|
||||
@ -390,17 +362,12 @@ static int resolve_gitlink_packed_ref(char *name, int pathlen, const char *refna
|
||||
return -1;
|
||||
read_packed_refs(f, &refs);
|
||||
fclose(f);
|
||||
ref = refs.packed;
|
||||
retval = -1;
|
||||
while (ref) {
|
||||
if (!strcmp(ref->name, refname)) {
|
||||
retval = 0;
|
||||
ref = search_ref_array(&refs.packed, refname);
|
||||
if (ref != NULL) {
|
||||
memcpy(result, ref->sha1, 20);
|
||||
break;
|
||||
retval = 0;
|
||||
}
|
||||
ref = ref->next;
|
||||
}
|
||||
free_ref_list(refs.packed);
|
||||
free_ref_array(&refs.packed);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -501,16 +468,14 @@ const char *resolve_ref(const char *ref, unsigned char *sha1, int reading, int *
|
||||
git_snpath(path, sizeof(path), "%s", ref);
|
||||
/* Special case: non-existing file. */
|
||||
if (lstat(path, &st) < 0) {
|
||||
struct ref_list *list = get_packed_refs(NULL);
|
||||
while (list) {
|
||||
if (!strcmp(ref, list->name)) {
|
||||
hashcpy(sha1, list->sha1);
|
||||
struct ref_array *packed = get_packed_refs(NULL);
|
||||
struct ref_entry *r = search_ref_array(packed, ref);
|
||||
if (r != NULL) {
|
||||
hashcpy(sha1, r->sha1);
|
||||
if (flag)
|
||||
*flag |= REF_ISPACKED;
|
||||
return ref;
|
||||
}
|
||||
list = list->next;
|
||||
}
|
||||
if (reading || errno != ENOENT)
|
||||
return NULL;
|
||||
hashclr(sha1);
|
||||
@ -584,7 +549,7 @@ int read_ref(const char *ref, unsigned char *sha1)
|
||||
|
||||
#define DO_FOR_EACH_INCLUDE_BROKEN 01
|
||||
static int do_one_ref(const char *base, each_ref_fn fn, int trim,
|
||||
int flags, void *cb_data, struct ref_list *entry)
|
||||
int flags, void *cb_data, struct ref_entry *entry)
|
||||
{
|
||||
if (strncmp(base, entry->name, trim))
|
||||
return 0;
|
||||
@ -630,19 +595,13 @@ int peel_ref(const char *ref, unsigned char *sha1)
|
||||
return -1;
|
||||
|
||||
if ((flag & REF_ISPACKED)) {
|
||||
struct ref_list *list = get_packed_refs(NULL);
|
||||
struct ref_array *array = get_packed_refs(NULL);
|
||||
struct ref_entry *r = search_ref_array(array, ref);
|
||||
|
||||
while (list) {
|
||||
if (!strcmp(list->name, ref)) {
|
||||
if (list->flag & REF_KNOWS_PEELED) {
|
||||
hashcpy(sha1, list->peeled);
|
||||
if (r != NULL && r->flag & REF_KNOWS_PEELED) {
|
||||
hashcpy(sha1, r->peeled);
|
||||
return 0;
|
||||
}
|
||||
/* older pack-refs did not leave peeled ones */
|
||||
break;
|
||||
}
|
||||
list = list->next;
|
||||
}
|
||||
}
|
||||
|
||||
fallback:
|
||||
@ -660,36 +619,39 @@ fallback:
|
||||
static int do_for_each_ref(const char *submodule, const char *base, each_ref_fn fn,
|
||||
int trim, int flags, void *cb_data)
|
||||
{
|
||||
int retval = 0;
|
||||
struct ref_list *packed = get_packed_refs(submodule);
|
||||
struct ref_list *loose = get_loose_refs(submodule);
|
||||
int retval = 0, i, p = 0, l = 0;
|
||||
struct ref_array *packed = get_packed_refs(submodule);
|
||||
struct ref_array *loose = get_loose_refs(submodule);
|
||||
|
||||
struct ref_list *extra;
|
||||
struct ref_array *extra = &extra_refs;
|
||||
|
||||
for (extra = extra_refs; extra; extra = extra->next)
|
||||
retval = do_one_ref(base, fn, trim, flags, cb_data, extra);
|
||||
for (i = 0; i < extra->nr; i++)
|
||||
retval = do_one_ref(base, fn, trim, flags, cb_data, extra->refs[i]);
|
||||
|
||||
while (packed && loose) {
|
||||
struct ref_list *entry;
|
||||
int cmp = strcmp(packed->name, loose->name);
|
||||
while (p < packed->nr && l < loose->nr) {
|
||||
struct ref_entry *entry;
|
||||
int cmp = strcmp(packed->refs[p]->name, loose->refs[l]->name);
|
||||
if (!cmp) {
|
||||
packed = packed->next;
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
if (cmp > 0) {
|
||||
entry = loose;
|
||||
loose = loose->next;
|
||||
entry = loose->refs[l++];
|
||||
} else {
|
||||
entry = packed;
|
||||
packed = packed->next;
|
||||
entry = packed->refs[p++];
|
||||
}
|
||||
retval = do_one_ref(base, fn, trim, flags, cb_data, entry);
|
||||
if (retval)
|
||||
goto end_each;
|
||||
}
|
||||
|
||||
for (packed = packed ? packed : loose; packed; packed = packed->next) {
|
||||
retval = do_one_ref(base, fn, trim, flags, cb_data, packed);
|
||||
if (l < loose->nr) {
|
||||
p = l;
|
||||
packed = loose;
|
||||
}
|
||||
|
||||
for (; p < packed->nr; p++) {
|
||||
retval = do_one_ref(base, fn, trim, flags, cb_data, packed->refs[p]);
|
||||
if (retval)
|
||||
goto end_each;
|
||||
}
|
||||
@ -980,24 +942,24 @@ static int remove_empty_directories(const char *file)
|
||||
}
|
||||
|
||||
static int is_refname_available(const char *ref, const char *oldref,
|
||||
struct ref_list *list, int quiet)
|
||||
struct ref_array *array, int quiet)
|
||||
{
|
||||
int namlen = strlen(ref); /* e.g. 'foo/bar' */
|
||||
while (list) {
|
||||
/* list->name could be 'foo' or 'foo/bar/baz' */
|
||||
if (!oldref || strcmp(oldref, list->name)) {
|
||||
int len = strlen(list->name);
|
||||
int i, namlen = strlen(ref); /* e.g. 'foo/bar' */
|
||||
for (i = 0; i < array->nr; i++ ) {
|
||||
struct ref_entry *entry = array->refs[i];
|
||||
/* entry->name could be 'foo' or 'foo/bar/baz' */
|
||||
if (!oldref || strcmp(oldref, entry->name)) {
|
||||
int len = strlen(entry->name);
|
||||
int cmplen = (namlen < len) ? namlen : len;
|
||||
const char *lead = (namlen < len) ? list->name : ref;
|
||||
if (!strncmp(ref, list->name, cmplen) &&
|
||||
const char *lead = (namlen < len) ? entry->name : ref;
|
||||
if (!strncmp(ref, entry->name, cmplen) &&
|
||||
lead[cmplen] == '/') {
|
||||
if (!quiet)
|
||||
error("'%s' exists; cannot create '%s'",
|
||||
list->name, ref);
|
||||
entry->name, ref);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
list = list->next;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@ -1104,18 +1066,13 @@ static struct lock_file packlock;
|
||||
|
||||
static int repack_without_ref(const char *refname)
|
||||
{
|
||||
struct ref_list *list, *packed_ref_list;
|
||||
int fd;
|
||||
int found = 0;
|
||||
struct ref_array *packed;
|
||||
struct ref_entry *ref;
|
||||
int fd, i;
|
||||
|
||||
packed_ref_list = get_packed_refs(NULL);
|
||||
for (list = packed_ref_list; list; list = list->next) {
|
||||
if (!strcmp(refname, list->name)) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
packed = get_packed_refs(NULL);
|
||||
ref = search_ref_array(packed, refname);
|
||||
if (ref == NULL)
|
||||
return 0;
|
||||
fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), 0);
|
||||
if (fd < 0) {
|
||||
@ -1123,17 +1080,19 @@ static int repack_without_ref(const char *refname)
|
||||
return error("cannot delete '%s' from packed refs", refname);
|
||||
}
|
||||
|
||||
for (list = packed_ref_list; list; list = list->next) {
|
||||
for (i = 0; i < packed->nr; i++) {
|
||||
char line[PATH_MAX + 100];
|
||||
int len;
|
||||
|
||||
if (!strcmp(refname, list->name))
|
||||
ref = packed->refs[i];
|
||||
|
||||
if (!strcmp(refname, ref->name))
|
||||
continue;
|
||||
len = snprintf(line, sizeof(line), "%s %s\n",
|
||||
sha1_to_hex(list->sha1), list->name);
|
||||
sha1_to_hex(ref->sha1), ref->name);
|
||||
/* this should not happen but just being defensive */
|
||||
if (len > sizeof(line))
|
||||
die("too long a refname '%s'", list->name);
|
||||
die("too long a refname '%s'", ref->name);
|
||||
write_or_die(fd, line, len);
|
||||
}
|
||||
return commit_lock_file(&packlock);
|
||||
|
Loading…
Reference in New Issue
Block a user