Merge branch 'jk/alternate-ref-optim'
Optimizes resource usage while enumerating refs from alternate object store, to help receiving end of "push" that hosts a repository with many "forks". * jk/alternate-ref-optim: receive-pack: avoid duplicates between our refs and alternates receive-pack: treat namespace .have lines like alternates receive-pack: fix misleading namespace/.have comment receive-pack: use oidset to de-duplicate .have lines add oidset API fetch-pack: cache results of for_each_alternate_ref for_each_alternate_ref: replace transport code with for-each-ref for_each_alternate_ref: pass name/oid instead of ref struct for_each_alternate_ref: use strbuf for path allocation for_each_alternate_ref: stop trimming trailing slashes for_each_alternate_ref: handle failure from real_pathdup()
This commit is contained in:
commit
b9c2919f9b
1
Makefile
1
Makefile
@ -781,6 +781,7 @@ LIB_OBJS += notes-cache.o
|
||||
LIB_OBJS += notes-merge.o
|
||||
LIB_OBJS += notes-utils.o
|
||||
LIB_OBJS += object.o
|
||||
LIB_OBJS += oidset.o
|
||||
LIB_OBJS += pack-bitmap.o
|
||||
LIB_OBJS += pack-bitmap-write.o
|
||||
LIB_OBJS += pack-check.o
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "sigchain.h"
|
||||
#include "fsck.h"
|
||||
#include "tmp-objdir.h"
|
||||
#include "oidset.h"
|
||||
|
||||
static const char * const receive_pack_usage[] = {
|
||||
N_("git receive-pack <git-dir>"),
|
||||
@ -250,8 +251,9 @@ static void show_ref(const char *path, const unsigned char *sha1)
|
||||
}
|
||||
|
||||
static int show_ref_cb(const char *path_full, const struct object_id *oid,
|
||||
int flag, void *unused)
|
||||
int flag, void *data)
|
||||
{
|
||||
struct oidset *seen = data;
|
||||
const char *path = strip_namespace(path_full);
|
||||
|
||||
if (ref_is_hidden(path, path_full))
|
||||
@ -260,37 +262,38 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid,
|
||||
/*
|
||||
* Advertise refs outside our current namespace as ".have"
|
||||
* refs, so that the client can use them to minimize data
|
||||
* transfer but will otherwise ignore them. This happens to
|
||||
* cover ".have" that are thrown in by add_one_alternate_ref()
|
||||
* to mark histories that are complete in our alternates as
|
||||
* well.
|
||||
* transfer but will otherwise ignore them.
|
||||
*/
|
||||
if (!path)
|
||||
if (!path) {
|
||||
if (oidset_insert(seen, oid))
|
||||
return 0;
|
||||
path = ".have";
|
||||
} else {
|
||||
oidset_insert(seen, oid);
|
||||
}
|
||||
show_ref(path, oid->hash);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int show_one_alternate_sha1(const unsigned char sha1[20], void *unused)
|
||||
static void show_one_alternate_ref(const char *refname,
|
||||
const struct object_id *oid,
|
||||
void *data)
|
||||
{
|
||||
show_ref(".have", sha1);
|
||||
return 0;
|
||||
}
|
||||
struct oidset *seen = data;
|
||||
|
||||
static void collect_one_alternate_ref(const struct ref *ref, void *data)
|
||||
{
|
||||
struct sha1_array *sa = data;
|
||||
sha1_array_append(sa, ref->old_oid.hash);
|
||||
if (oidset_insert(seen, oid))
|
||||
return;
|
||||
|
||||
show_ref(".have", oid->hash);
|
||||
}
|
||||
|
||||
static void write_head_info(void)
|
||||
{
|
||||
struct sha1_array sa = SHA1_ARRAY_INIT;
|
||||
static struct oidset seen = OIDSET_INIT;
|
||||
|
||||
for_each_alternate_ref(collect_one_alternate_ref, &sa);
|
||||
sha1_array_for_each_unique(&sa, show_one_alternate_sha1, NULL);
|
||||
sha1_array_clear(&sa);
|
||||
for_each_ref(show_ref_cb, NULL);
|
||||
for_each_ref(show_ref_cb, &seen);
|
||||
for_each_alternate_ref(show_one_alternate_ref, &seen);
|
||||
oidset_clear(&seen);
|
||||
if (!sent_capabilities)
|
||||
show_ref("capabilities^{}", null_sha1);
|
||||
|
||||
|
48
fetch-pack.c
48
fetch-pack.c
@ -35,6 +35,7 @@ static const char *alternate_shallow_file;
|
||||
#define COMMON_REF (1U << 2)
|
||||
#define SEEN (1U << 3)
|
||||
#define POPPED (1U << 4)
|
||||
#define ALTERNATE (1U << 5)
|
||||
|
||||
static int marked;
|
||||
|
||||
@ -67,6 +68,41 @@ static inline void print_verbose(const struct fetch_pack_args *args,
|
||||
fputc('\n', stderr);
|
||||
}
|
||||
|
||||
struct alternate_object_cache {
|
||||
struct object **items;
|
||||
size_t nr, alloc;
|
||||
};
|
||||
|
||||
static void cache_one_alternate(const char *refname,
|
||||
const struct object_id *oid,
|
||||
void *vcache)
|
||||
{
|
||||
struct alternate_object_cache *cache = vcache;
|
||||
struct object *obj = parse_object(oid->hash);
|
||||
|
||||
if (!obj || (obj->flags & ALTERNATE))
|
||||
return;
|
||||
|
||||
obj->flags |= ALTERNATE;
|
||||
ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc);
|
||||
cache->items[cache->nr++] = obj;
|
||||
}
|
||||
|
||||
static void for_each_cached_alternate(void (*cb)(struct object *))
|
||||
{
|
||||
static int initialized;
|
||||
static struct alternate_object_cache cache;
|
||||
size_t i;
|
||||
|
||||
if (!initialized) {
|
||||
for_each_alternate_ref(cache_one_alternate, &cache);
|
||||
initialized = 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < cache.nr; i++)
|
||||
cb(cache.items[i]);
|
||||
}
|
||||
|
||||
static void rev_list_push(struct commit *commit, int mark)
|
||||
{
|
||||
if (!(commit->object.flags & mark)) {
|
||||
@ -253,9 +289,9 @@ static void send_request(struct fetch_pack_args *args,
|
||||
write_or_die(fd, buf->buf, buf->len);
|
||||
}
|
||||
|
||||
static void insert_one_alternate_ref(const struct ref *ref, void *unused)
|
||||
static void insert_one_alternate_object(struct object *obj)
|
||||
{
|
||||
rev_list_insert_ref(NULL, ref->old_oid.hash);
|
||||
rev_list_insert_ref(NULL, obj->oid.hash);
|
||||
}
|
||||
|
||||
#define INITIAL_FLUSH 16
|
||||
@ -298,7 +334,7 @@ static int find_common(struct fetch_pack_args *args,
|
||||
marked = 1;
|
||||
|
||||
for_each_ref(rev_list_insert_ref_oid, NULL);
|
||||
for_each_alternate_ref(insert_one_alternate_ref, NULL);
|
||||
for_each_cached_alternate(insert_one_alternate_object);
|
||||
|
||||
fetching = 0;
|
||||
for ( ; refs ; refs = refs->next) {
|
||||
@ -619,9 +655,9 @@ static void filter_refs(struct fetch_pack_args *args,
|
||||
*refs = newlist;
|
||||
}
|
||||
|
||||
static void mark_alternate_complete(const struct ref *ref, void *unused)
|
||||
static void mark_alternate_complete(struct object *obj)
|
||||
{
|
||||
mark_complete(ref->old_oid.hash);
|
||||
mark_complete(obj->oid.hash);
|
||||
}
|
||||
|
||||
static int everything_local(struct fetch_pack_args *args,
|
||||
@ -657,7 +693,7 @@ static int everything_local(struct fetch_pack_args *args,
|
||||
|
||||
if (!args->deepen) {
|
||||
for_each_ref(mark_complete_oid, NULL);
|
||||
for_each_alternate_ref(mark_alternate_complete, NULL);
|
||||
for_each_cached_alternate(mark_alternate_complete);
|
||||
commit_list_sort_by_date(&complete);
|
||||
if (cutoff)
|
||||
mark_recent_complete_commits(args, cutoff);
|
||||
|
2
object.h
2
object.h
@ -29,7 +29,7 @@ struct object_array {
|
||||
/*
|
||||
* object flag allocation:
|
||||
* revision.h: 0---------10 26
|
||||
* fetch-pack.c: 0---4
|
||||
* fetch-pack.c: 0---5
|
||||
* walker.c: 0-2
|
||||
* upload-pack.c: 4 11----------------19
|
||||
* builtin/blame.c: 12-13
|
||||
|
49
oidset.c
Normal file
49
oidset.c
Normal file
@ -0,0 +1,49 @@
|
||||
#include "cache.h"
|
||||
#include "oidset.h"
|
||||
|
||||
struct oidset_entry {
|
||||
struct hashmap_entry hash;
|
||||
struct object_id oid;
|
||||
};
|
||||
|
||||
static int oidset_hashcmp(const void *va, const void *vb,
|
||||
const void *vkey)
|
||||
{
|
||||
const struct oidset_entry *a = va, *b = vb;
|
||||
const struct object_id *key = vkey;
|
||||
return oidcmp(&a->oid, key ? key : &b->oid);
|
||||
}
|
||||
|
||||
int oidset_contains(const struct oidset *set, const struct object_id *oid)
|
||||
{
|
||||
struct hashmap_entry key;
|
||||
|
||||
if (!set->map.cmpfn)
|
||||
return 0;
|
||||
|
||||
hashmap_entry_init(&key, sha1hash(oid->hash));
|
||||
return !!hashmap_get(&set->map, &key, oid);
|
||||
}
|
||||
|
||||
int oidset_insert(struct oidset *set, const struct object_id *oid)
|
||||
{
|
||||
struct oidset_entry *entry;
|
||||
|
||||
if (!set->map.cmpfn)
|
||||
hashmap_init(&set->map, oidset_hashcmp, 0);
|
||||
|
||||
if (oidset_contains(set, oid))
|
||||
return 1;
|
||||
|
||||
entry = xmalloc(sizeof(*entry));
|
||||
hashmap_entry_init(&entry->hash, sha1hash(oid->hash));
|
||||
oidcpy(&entry->oid, oid);
|
||||
|
||||
hashmap_add(&set->map, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void oidset_clear(struct oidset *set)
|
||||
{
|
||||
hashmap_free(&set->map, 1);
|
||||
}
|
45
oidset.h
Normal file
45
oidset.h
Normal file
@ -0,0 +1,45 @@
|
||||
#ifndef OIDSET_H
|
||||
#define OIDSET_H
|
||||
|
||||
/**
|
||||
* This API is similar to sha1-array, in that it maintains a set of object ids
|
||||
* in a memory-efficient way. The major differences are:
|
||||
*
|
||||
* 1. It uses a hash, so we can do online duplicate removal, rather than
|
||||
* sort-and-uniq at the end. This can reduce memory footprint if you have
|
||||
* a large list of oids with many duplicates.
|
||||
*
|
||||
* 2. The per-unique-oid memory footprint is slightly higher due to hash
|
||||
* table overhead.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A single oidset; should be zero-initialized (or use OIDSET_INIT).
|
||||
*/
|
||||
struct oidset {
|
||||
struct hashmap map;
|
||||
};
|
||||
|
||||
#define OIDSET_INIT { { NULL } }
|
||||
|
||||
/**
|
||||
* Returns true iff `set` contains `oid`.
|
||||
*/
|
||||
int oidset_contains(const struct oidset *set, const struct object_id *oid);
|
||||
|
||||
/**
|
||||
* Insert the oid into the set; a copy is made, so "oid" does not need
|
||||
* to persist after this function is called.
|
||||
*
|
||||
* Returns 1 if the oid was already in the set, 0 otherwise. This can be used
|
||||
* to perform an efficient check-and-add.
|
||||
*/
|
||||
int oidset_insert(struct oidset *set, const struct object_id *oid);
|
||||
|
||||
/**
|
||||
* Remove all entries from the oidset, freeing any resources associated with
|
||||
* it.
|
||||
*/
|
||||
void oidset_clear(struct oidset *set);
|
||||
|
||||
#endif /* OIDSET_H */
|
@ -255,4 +255,42 @@ test_expect_success 'deny pushing to delete current branch' '
|
||||
)
|
||||
'
|
||||
|
||||
extract_ref_advertisement () {
|
||||
perl -lne '
|
||||
# \\ is there to skip capabilities after \0
|
||||
/push< ([^\\]+)/ or next;
|
||||
exit 0 if $1 eq "0000";
|
||||
print $1;
|
||||
'
|
||||
}
|
||||
|
||||
test_expect_success 'receive-pack de-dupes .have lines' '
|
||||
git init shared &&
|
||||
git -C shared commit --allow-empty -m both &&
|
||||
git clone -s shared fork &&
|
||||
(
|
||||
cd shared &&
|
||||
git checkout -b only-shared &&
|
||||
git commit --allow-empty -m only-shared &&
|
||||
git update-ref refs/heads/foo HEAD
|
||||
) &&
|
||||
|
||||
# Notable things in this expectation:
|
||||
# - local refs are not de-duped
|
||||
# - .have does not duplicate locals
|
||||
# - .have does not duplicate itself
|
||||
local=$(git -C fork rev-parse HEAD) &&
|
||||
shared=$(git -C shared rev-parse only-shared) &&
|
||||
cat >expect <<-EOF &&
|
||||
$local refs/heads/master
|
||||
$local refs/remotes/origin/HEAD
|
||||
$local refs/remotes/origin/master
|
||||
$shared .have
|
||||
EOF
|
||||
|
||||
GIT_TRACE_PACKET=$(pwd)/trace git push fork HEAD:foo &&
|
||||
extract_ref_advertisement <trace >refs &&
|
||||
test_cmp expect refs
|
||||
'
|
||||
|
||||
test_done
|
||||
|
72
transport.c
72
transport.c
@ -1206,6 +1206,42 @@ literal_copy:
|
||||
return xstrdup(url);
|
||||
}
|
||||
|
||||
static void read_alternate_refs(const char *path,
|
||||
alternate_ref_fn *cb,
|
||||
void *data)
|
||||
{
|
||||
struct child_process cmd = CHILD_PROCESS_INIT;
|
||||
struct strbuf line = STRBUF_INIT;
|
||||
FILE *fh;
|
||||
|
||||
cmd.git_cmd = 1;
|
||||
argv_array_pushf(&cmd.args, "--git-dir=%s", path);
|
||||
argv_array_push(&cmd.args, "for-each-ref");
|
||||
argv_array_push(&cmd.args, "--format=%(objectname) %(refname)");
|
||||
cmd.env = local_repo_env;
|
||||
cmd.out = -1;
|
||||
|
||||
if (start_command(&cmd))
|
||||
return;
|
||||
|
||||
fh = xfdopen(cmd.out, "r");
|
||||
while (strbuf_getline_lf(&line, fh) != EOF) {
|
||||
struct object_id oid;
|
||||
|
||||
if (get_oid_hex(line.buf, &oid) ||
|
||||
line.buf[GIT_SHA1_HEXSZ] != ' ') {
|
||||
warning("invalid line while parsing alternate refs: %s",
|
||||
line.buf);
|
||||
break;
|
||||
}
|
||||
|
||||
cb(line.buf + GIT_SHA1_HEXSZ + 1, &oid, data);
|
||||
}
|
||||
|
||||
fclose(fh);
|
||||
finish_command(&cmd);
|
||||
}
|
||||
|
||||
struct alternate_refs_data {
|
||||
alternate_ref_fn *fn;
|
||||
void *data;
|
||||
@ -1214,34 +1250,26 @@ struct alternate_refs_data {
|
||||
static int refs_from_alternate_cb(struct alternate_object_database *e,
|
||||
void *data)
|
||||
{
|
||||
char *other;
|
||||
size_t len;
|
||||
struct remote *remote;
|
||||
struct transport *transport;
|
||||
const struct ref *extra;
|
||||
struct strbuf path = STRBUF_INIT;
|
||||
size_t base_len;
|
||||
struct alternate_refs_data *cb = data;
|
||||
|
||||
other = real_pathdup(e->path);
|
||||
len = strlen(other);
|
||||
if (!strbuf_realpath(&path, e->path, 0))
|
||||
goto out;
|
||||
if (!strbuf_strip_suffix(&path, "/objects"))
|
||||
goto out;
|
||||
base_len = path.len;
|
||||
|
||||
while (other[len-1] == '/')
|
||||
other[--len] = '\0';
|
||||
if (len < 8 || memcmp(other + len - 8, "/objects", 8))
|
||||
goto out;
|
||||
/* Is this a git repository with refs? */
|
||||
memcpy(other + len - 8, "/refs", 6);
|
||||
if (!is_directory(other))
|
||||
strbuf_addstr(&path, "/refs");
|
||||
if (!is_directory(path.buf))
|
||||
goto out;
|
||||
other[len - 8] = '\0';
|
||||
remote = remote_get(other);
|
||||
transport = transport_get(remote, other);
|
||||
for (extra = transport_get_remote_refs(transport);
|
||||
extra;
|
||||
extra = extra->next)
|
||||
cb->fn(extra, cb->data);
|
||||
transport_disconnect(transport);
|
||||
strbuf_setlen(&path, base_len);
|
||||
|
||||
read_alternate_refs(path.buf, cb->fn, cb->data);
|
||||
|
||||
out:
|
||||
free(other);
|
||||
strbuf_release(&path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -255,6 +255,6 @@ int transport_refs_pushed(struct ref *ref);
|
||||
void transport_print_push_status(const char *dest, struct ref *refs,
|
||||
int verbose, int porcelain, unsigned int *reject_reasons);
|
||||
|
||||
typedef void alternate_ref_fn(const struct ref *, void *);
|
||||
typedef void alternate_ref_fn(const char *refname, const struct object_id *oid, void *);
|
||||
extern void for_each_alternate_ref(alternate_ref_fn, void *);
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user