Merge branch 'jk/alternate-ref-optim'

Optimizes resource usage while enumerating refs from alternate
object store, to help receiving end of "push" that hosts a
repository with many "forks".

* jk/alternate-ref-optim:
  receive-pack: avoid duplicates between our refs and alternates
  receive-pack: treat namespace .have lines like alternates
  receive-pack: fix misleading namespace/.have comment
  receive-pack: use oidset to de-duplicate .have lines
  add oidset API
  fetch-pack: cache results of for_each_alternate_ref
  for_each_alternate_ref: replace transport code with for-each-ref
  for_each_alternate_ref: pass name/oid instead of ref struct
  for_each_alternate_ref: use strbuf for path allocation
  for_each_alternate_ref: stop trimming trailing slashes
  for_each_alternate_ref: handle failure from real_pathdup()
This commit is contained in:
Junio C Hamano 2017-02-27 13:57:13 -08:00
commit b9c2919f9b
9 changed files with 249 additions and 49 deletions

View File

@ -781,6 +781,7 @@ LIB_OBJS += notes-cache.o
LIB_OBJS += notes-merge.o
LIB_OBJS += notes-utils.o
LIB_OBJS += object.o
LIB_OBJS += oidset.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-check.o

View File

@ -21,6 +21,7 @@
#include "sigchain.h"
#include "fsck.h"
#include "tmp-objdir.h"
#include "oidset.h"
static const char * const receive_pack_usage[] = {
N_("git receive-pack <git-dir>"),
@ -250,8 +251,9 @@ static void show_ref(const char *path, const unsigned char *sha1)
}
static int show_ref_cb(const char *path_full, const struct object_id *oid,
int flag, void *unused)
int flag, void *data)
{
struct oidset *seen = data;
const char *path = strip_namespace(path_full);
if (ref_is_hidden(path, path_full))
@ -260,37 +262,38 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid,
/*
* Advertise refs outside our current namespace as ".have"
* refs, so that the client can use them to minimize data
* transfer but will otherwise ignore them. This happens to
* cover ".have" that are thrown in by add_one_alternate_ref()
* to mark histories that are complete in our alternates as
* well.
* transfer but will otherwise ignore them.
*/
if (!path)
if (!path) {
if (oidset_insert(seen, oid))
return 0;
path = ".have";
} else {
oidset_insert(seen, oid);
}
show_ref(path, oid->hash);
return 0;
}
static int show_one_alternate_sha1(const unsigned char sha1[20], void *unused)
static void show_one_alternate_ref(const char *refname,
const struct object_id *oid,
void *data)
{
show_ref(".have", sha1);
return 0;
}
struct oidset *seen = data;
static void collect_one_alternate_ref(const struct ref *ref, void *data)
{
struct sha1_array *sa = data;
sha1_array_append(sa, ref->old_oid.hash);
if (oidset_insert(seen, oid))
return;
show_ref(".have", oid->hash);
}
static void write_head_info(void)
{
struct sha1_array sa = SHA1_ARRAY_INIT;
static struct oidset seen = OIDSET_INIT;
for_each_alternate_ref(collect_one_alternate_ref, &sa);
sha1_array_for_each_unique(&sa, show_one_alternate_sha1, NULL);
sha1_array_clear(&sa);
for_each_ref(show_ref_cb, NULL);
for_each_ref(show_ref_cb, &seen);
for_each_alternate_ref(show_one_alternate_ref, &seen);
oidset_clear(&seen);
if (!sent_capabilities)
show_ref("capabilities^{}", null_sha1);

View File

@ -35,6 +35,7 @@ static const char *alternate_shallow_file;
#define COMMON_REF (1U << 2)
#define SEEN (1U << 3)
#define POPPED (1U << 4)
#define ALTERNATE (1U << 5)
static int marked;
@ -67,6 +68,41 @@ static inline void print_verbose(const struct fetch_pack_args *args,
fputc('\n', stderr);
}
struct alternate_object_cache {
struct object **items;
size_t nr, alloc;
};
static void cache_one_alternate(const char *refname,
const struct object_id *oid,
void *vcache)
{
struct alternate_object_cache *cache = vcache;
struct object *obj = parse_object(oid->hash);
if (!obj || (obj->flags & ALTERNATE))
return;
obj->flags |= ALTERNATE;
ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc);
cache->items[cache->nr++] = obj;
}
static void for_each_cached_alternate(void (*cb)(struct object *))
{
static int initialized;
static struct alternate_object_cache cache;
size_t i;
if (!initialized) {
for_each_alternate_ref(cache_one_alternate, &cache);
initialized = 1;
}
for (i = 0; i < cache.nr; i++)
cb(cache.items[i]);
}
static void rev_list_push(struct commit *commit, int mark)
{
if (!(commit->object.flags & mark)) {
@ -253,9 +289,9 @@ static void send_request(struct fetch_pack_args *args,
write_or_die(fd, buf->buf, buf->len);
}
static void insert_one_alternate_ref(const struct ref *ref, void *unused)
static void insert_one_alternate_object(struct object *obj)
{
rev_list_insert_ref(NULL, ref->old_oid.hash);
rev_list_insert_ref(NULL, obj->oid.hash);
}
#define INITIAL_FLUSH 16
@ -298,7 +334,7 @@ static int find_common(struct fetch_pack_args *args,
marked = 1;
for_each_ref(rev_list_insert_ref_oid, NULL);
for_each_alternate_ref(insert_one_alternate_ref, NULL);
for_each_cached_alternate(insert_one_alternate_object);
fetching = 0;
for ( ; refs ; refs = refs->next) {
@ -619,9 +655,9 @@ static void filter_refs(struct fetch_pack_args *args,
*refs = newlist;
}
static void mark_alternate_complete(const struct ref *ref, void *unused)
static void mark_alternate_complete(struct object *obj)
{
mark_complete(ref->old_oid.hash);
mark_complete(obj->oid.hash);
}
static int everything_local(struct fetch_pack_args *args,
@ -657,7 +693,7 @@ static int everything_local(struct fetch_pack_args *args,
if (!args->deepen) {
for_each_ref(mark_complete_oid, NULL);
for_each_alternate_ref(mark_alternate_complete, NULL);
for_each_cached_alternate(mark_alternate_complete);
commit_list_sort_by_date(&complete);
if (cutoff)
mark_recent_complete_commits(args, cutoff);

View File

@ -29,7 +29,7 @@ struct object_array {
/*
* object flag allocation:
* revision.h: 0---------10 26
* fetch-pack.c: 0---4
* fetch-pack.c: 0---5
* walker.c: 0-2
* upload-pack.c: 4 11----------------19
* builtin/blame.c: 12-13

49
oidset.c Normal file
View File

@ -0,0 +1,49 @@
#include "cache.h"
#include "oidset.h"
struct oidset_entry {
struct hashmap_entry hash;
struct object_id oid;
};
static int oidset_hashcmp(const void *va, const void *vb,
const void *vkey)
{
const struct oidset_entry *a = va, *b = vb;
const struct object_id *key = vkey;
return oidcmp(&a->oid, key ? key : &b->oid);
}
int oidset_contains(const struct oidset *set, const struct object_id *oid)
{
struct hashmap_entry key;
if (!set->map.cmpfn)
return 0;
hashmap_entry_init(&key, sha1hash(oid->hash));
return !!hashmap_get(&set->map, &key, oid);
}
int oidset_insert(struct oidset *set, const struct object_id *oid)
{
struct oidset_entry *entry;
if (!set->map.cmpfn)
hashmap_init(&set->map, oidset_hashcmp, 0);
if (oidset_contains(set, oid))
return 1;
entry = xmalloc(sizeof(*entry));
hashmap_entry_init(&entry->hash, sha1hash(oid->hash));
oidcpy(&entry->oid, oid);
hashmap_add(&set->map, entry);
return 0;
}
void oidset_clear(struct oidset *set)
{
hashmap_free(&set->map, 1);
}

45
oidset.h Normal file
View File

@ -0,0 +1,45 @@
#ifndef OIDSET_H
#define OIDSET_H
/**
* This API is similar to sha1-array, in that it maintains a set of object ids
* in a memory-efficient way. The major differences are:
*
* 1. It uses a hash, so we can do online duplicate removal, rather than
* sort-and-uniq at the end. This can reduce memory footprint if you have
* a large list of oids with many duplicates.
*
* 2. The per-unique-oid memory footprint is slightly higher due to hash
* table overhead.
*/
/**
* A single oidset; should be zero-initialized (or use OIDSET_INIT).
*/
struct oidset {
struct hashmap map;
};
#define OIDSET_INIT { { NULL } }
/**
* Returns true iff `set` contains `oid`.
*/
int oidset_contains(const struct oidset *set, const struct object_id *oid);
/**
* Insert the oid into the set; a copy is made, so "oid" does not need
* to persist after this function is called.
*
* Returns 1 if the oid was already in the set, 0 otherwise. This can be used
* to perform an efficient check-and-add.
*/
int oidset_insert(struct oidset *set, const struct object_id *oid);
/**
* Remove all entries from the oidset, freeing any resources associated with
* it.
*/
void oidset_clear(struct oidset *set);
#endif /* OIDSET_H */

View File

@ -255,4 +255,42 @@ test_expect_success 'deny pushing to delete current branch' '
)
'
extract_ref_advertisement () {
perl -lne '
# \\ is there to skip capabilities after \0
/push< ([^\\]+)/ or next;
exit 0 if $1 eq "0000";
print $1;
'
}
test_expect_success 'receive-pack de-dupes .have lines' '
git init shared &&
git -C shared commit --allow-empty -m both &&
git clone -s shared fork &&
(
cd shared &&
git checkout -b only-shared &&
git commit --allow-empty -m only-shared &&
git update-ref refs/heads/foo HEAD
) &&
# Notable things in this expectation:
# - local refs are not de-duped
# - .have does not duplicate locals
# - .have does not duplicate itself
local=$(git -C fork rev-parse HEAD) &&
shared=$(git -C shared rev-parse only-shared) &&
cat >expect <<-EOF &&
$local refs/heads/master
$local refs/remotes/origin/HEAD
$local refs/remotes/origin/master
$shared .have
EOF
GIT_TRACE_PACKET=$(pwd)/trace git push fork HEAD:foo &&
extract_ref_advertisement <trace >refs &&
test_cmp expect refs
'
test_done

View File

@ -1206,6 +1206,42 @@ literal_copy:
return xstrdup(url);
}
static void read_alternate_refs(const char *path,
alternate_ref_fn *cb,
void *data)
{
struct child_process cmd = CHILD_PROCESS_INIT;
struct strbuf line = STRBUF_INIT;
FILE *fh;
cmd.git_cmd = 1;
argv_array_pushf(&cmd.args, "--git-dir=%s", path);
argv_array_push(&cmd.args, "for-each-ref");
argv_array_push(&cmd.args, "--format=%(objectname) %(refname)");
cmd.env = local_repo_env;
cmd.out = -1;
if (start_command(&cmd))
return;
fh = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, fh) != EOF) {
struct object_id oid;
if (get_oid_hex(line.buf, &oid) ||
line.buf[GIT_SHA1_HEXSZ] != ' ') {
warning("invalid line while parsing alternate refs: %s",
line.buf);
break;
}
cb(line.buf + GIT_SHA1_HEXSZ + 1, &oid, data);
}
fclose(fh);
finish_command(&cmd);
}
struct alternate_refs_data {
alternate_ref_fn *fn;
void *data;
@ -1214,34 +1250,26 @@ struct alternate_refs_data {
static int refs_from_alternate_cb(struct alternate_object_database *e,
void *data)
{
char *other;
size_t len;
struct remote *remote;
struct transport *transport;
const struct ref *extra;
struct strbuf path = STRBUF_INIT;
size_t base_len;
struct alternate_refs_data *cb = data;
other = real_pathdup(e->path);
len = strlen(other);
if (!strbuf_realpath(&path, e->path, 0))
goto out;
if (!strbuf_strip_suffix(&path, "/objects"))
goto out;
base_len = path.len;
while (other[len-1] == '/')
other[--len] = '\0';
if (len < 8 || memcmp(other + len - 8, "/objects", 8))
goto out;
/* Is this a git repository with refs? */
memcpy(other + len - 8, "/refs", 6);
if (!is_directory(other))
strbuf_addstr(&path, "/refs");
if (!is_directory(path.buf))
goto out;
other[len - 8] = '\0';
remote = remote_get(other);
transport = transport_get(remote, other);
for (extra = transport_get_remote_refs(transport);
extra;
extra = extra->next)
cb->fn(extra, cb->data);
transport_disconnect(transport);
strbuf_setlen(&path, base_len);
read_alternate_refs(path.buf, cb->fn, cb->data);
out:
free(other);
strbuf_release(&path);
return 0;
}

View File

@ -255,6 +255,6 @@ int transport_refs_pushed(struct ref *ref);
void transport_print_push_status(const char *dest, struct ref *refs,
int verbose, int porcelain, unsigned int *reject_reasons);
typedef void alternate_ref_fn(const struct ref *, void *);
typedef void alternate_ref_fn(const char *refname, const struct object_id *oid, void *);
extern void for_each_alternate_ref(alternate_ref_fn, void *);
#endif