Merge branch 'ds/push-sparse-tree-walk'

"git pack-objects" learned another algorithm to compute the set of
objects to send, that trades the resulting packfile off to save
traversal cost to favor small pushes.

* ds/push-sparse-tree-walk:
  pack-objects: create GIT_TEST_PACK_SPARSE
  pack-objects: create pack.useSparse setting
  revision: implement sparse algorithm
  list-objects: consume sparse tree walk
  revision: add mark_tree_uninteresting_sparse
This commit is contained in:
Junio C Hamano 2019-02-06 22:05:24 -08:00
commit 5fda343321
12 changed files with 379 additions and 18 deletions

View File

@ -105,6 +105,15 @@ pack.useBitmaps::
true. You should not generally need to turn this off unless
you are debugging pack bitmaps.
pack.useSparse::
When true, git will default to using the '--sparse' option in
'git pack-objects' when the '--revs' option is present. This
algorithm only walks trees that appear in paths that introduce new
objects. This can have significant performance benefits when
computing a pack to send a small change. However, it is possible
that extra objects are added to the pack-file if the included
commits contain certain types of direct renames.
pack.writeBitmaps (deprecated)::
This is a deprecated synonym for `repack.writeBitmaps`.

View File

@ -14,7 +14,7 @@ SYNOPSIS
[--local] [--incremental] [--window=<n>] [--depth=<n>]
[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
[--stdout [--filter=<filter-spec>] | base-name]
[--shallow] [--keep-true-parents] < object-list
[--shallow] [--keep-true-parents] [--sparse] < object-list
DESCRIPTION
@ -196,6 +196,15 @@ depth is 4095.
Add --no-reuse-object if you want to force a uniform compression
level on all data no matter the source.
--sparse::
Use the "sparse" algorithm to determine which objects to include in
the pack, when combined with the "--revs" option. This algorithm
only walks trees that appear in paths that introduce new objects.
This can have significant performance benefits when computing
a pack to send a small change. However, it is possible that extra
objects are added to the pack-file if the included commits contain
certain types of direct renames.
--thin::
Create a "thin" pack by omitting the common objects between a
sender and a receiver in order to reduce network transfer. This

View File

@ -658,7 +658,7 @@ static void bisect_common(struct rev_info *revs)
if (prepare_revision_walk(revs))
die("revision walk setup failed");
if (revs->tree_objects)
mark_edges_uninteresting(revs, NULL);
mark_edges_uninteresting(revs, NULL, 0);
}
static void exit_if_skipped_commits(struct commit_list *tried,

View File

@ -84,6 +84,7 @@ static unsigned long pack_size_limit;
static int depth = 50;
static int delta_search_threads;
static int pack_to_stdout;
static int sparse;
static int thin;
static int num_preferred_base;
static struct progress *progress_state;
@ -2703,6 +2704,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
use_bitmap_index_default = git_config_bool(k, v);
return 0;
}
if (!strcmp(k, "pack.usesparse")) {
sparse = git_config_bool(k, v);
return 0;
}
if (!strcmp(k, "pack.threads")) {
delta_search_threads = git_config_int(k, v);
if (delta_search_threads < 0)
@ -3130,7 +3135,7 @@ static void get_object_list(int ac, const char **av)
if (prepare_revision_walk(&revs))
die(_("revision walk setup failed"));
mark_edges_uninteresting(&revs, show_edge);
mark_edges_uninteresting(&revs, show_edge, sparse);
if (!fn_show_object)
fn_show_object = show_object;
@ -3287,6 +3292,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{ OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),
N_("unpack unreachable objects newer than <time>"),
PARSE_OPT_OPTARG, option_parse_unpack_unreachable },
OPT_BOOL(0, "sparse", &sparse,
N_("use the sparse reachability algorithm")),
OPT_BOOL(0, "thin", &thin,
N_("create thin packs")),
OPT_BOOL(0, "shallow", &shallow,
@ -3319,6 +3326,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
read_replace_refs = 0;
sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0);
reset_pack_idx_option(&pack_idx_opts);
git_config(git_pack_config, NULL);

View File

@ -546,7 +546,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
if (revs.tree_objects)
mark_edges_uninteresting(&revs, show_edge);
mark_edges_uninteresting(&revs, show_edge, 0);
if (bisect_list) {
int reaches, all;

View File

@ -1933,7 +1933,7 @@ int cmd_main(int argc, const char **argv)
pushing = 0;
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
mark_edges_uninteresting(&revs, NULL);
mark_edges_uninteresting(&revs, NULL, 0);
objects_to_send = get_delta(&revs, ref_lock);
finish_all_active_slots();

View File

@ -226,25 +226,73 @@ static void mark_edge_parents_uninteresting(struct commit *commit,
}
}
void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
static void add_edge_parents(struct commit *commit,
struct rev_info *revs,
show_edge_fn show_edge,
struct oidset *set)
{
struct commit_list *parents;
for (parents = commit->parents; parents; parents = parents->next) {
struct commit *parent = parents->item;
struct tree *tree = get_commit_tree(parent);
if (!tree)
continue;
oidset_insert(set, &tree->object.oid);
if (!(parent->object.flags & UNINTERESTING))
continue;
tree->object.flags |= UNINTERESTING;
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
parent->object.flags |= SHOWN;
show_edge(parent);
}
}
}
void mark_edges_uninteresting(struct rev_info *revs,
show_edge_fn show_edge,
int sparse)
{
struct commit_list *list;
int i;
for (list = revs->commits; list; list = list->next) {
struct commit *commit = list->item;
if (sparse) {
struct oidset set;
oidset_init(&set, 16);
if (commit->object.flags & UNINTERESTING) {
mark_tree_uninteresting(revs->repo,
get_commit_tree(commit));
if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
commit->object.flags |= SHOWN;
show_edge(commit);
}
continue;
for (list = revs->commits; list; list = list->next) {
struct commit *commit = list->item;
struct tree *tree = get_commit_tree(commit);
if (commit->object.flags & UNINTERESTING)
tree->object.flags |= UNINTERESTING;
oidset_insert(&set, &tree->object.oid);
add_edge_parents(commit, revs, show_edge, &set);
}
mark_trees_uninteresting_sparse(revs->repo, &set);
oidset_clear(&set);
} else {
for (list = revs->commits; list; list = list->next) {
struct commit *commit = list->item;
if (commit->object.flags & UNINTERESTING) {
mark_tree_uninteresting(revs->repo,
get_commit_tree(commit));
if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
commit->object.flags |= SHOWN;
show_edge(commit);
}
continue;
}
mark_edge_parents_uninteresting(commit, revs, show_edge);
}
mark_edge_parents_uninteresting(commit, revs, show_edge);
}
if (revs->edge_hint_aggressive) {
for (i = 0; i < revs->cmdline.nr; i++) {
struct object *obj = revs->cmdline.rev[i].item;

View File

@ -10,7 +10,9 @@ typedef void (*show_object_fn)(struct object *, const char *, void *);
void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
typedef void (*show_edge_fn)(struct commit *);
void mark_edges_uninteresting(struct rev_info *, show_edge_fn);
void mark_edges_uninteresting(struct rev_info *revs,
show_edge_fn show_edge,
int sparse);
struct oidset;
struct list_objects_filter_options;

View File

@ -27,6 +27,7 @@
#include "commit-reach.h"
#include "commit-graph.h"
#include "prio-queue.h"
#include "hashmap.h"
volatile show_early_output_fn_t show_early_output;
@ -99,6 +100,148 @@ void mark_tree_uninteresting(struct repository *r, struct tree *tree)
mark_tree_contents_uninteresting(r, tree);
}
struct path_and_oids_entry {
struct hashmap_entry ent;
char *path;
struct oidset trees;
};
static int path_and_oids_cmp(const void *hashmap_cmp_fn_data,
const struct path_and_oids_entry *e1,
const struct path_and_oids_entry *e2,
const void *keydata)
{
return strcmp(e1->path, e2->path);
}
static void paths_and_oids_init(struct hashmap *map)
{
hashmap_init(map, (hashmap_cmp_fn) path_and_oids_cmp, NULL, 0);
}
static void paths_and_oids_clear(struct hashmap *map)
{
struct hashmap_iter iter;
struct path_and_oids_entry *entry;
hashmap_iter_init(map, &iter);
while ((entry = (struct path_and_oids_entry *)hashmap_iter_next(&iter))) {
oidset_clear(&entry->trees);
free(entry->path);
}
hashmap_free(map, 1);
}
static void paths_and_oids_insert(struct hashmap *map,
const char *path,
const struct object_id *oid)
{
int hash = strhash(path);
struct path_and_oids_entry key;
struct path_and_oids_entry *entry;
hashmap_entry_init(&key, hash);
/* use a shallow copy for the lookup */
key.path = (char *)path;
oidset_init(&key.trees, 0);
if (!(entry = (struct path_and_oids_entry *)hashmap_get(map, &key, NULL))) {
entry = xcalloc(1, sizeof(struct path_and_oids_entry));
hashmap_entry_init(entry, hash);
entry->path = xstrdup(key.path);
oidset_init(&entry->trees, 16);
hashmap_put(map, entry);
}
oidset_insert(&entry->trees, oid);
}
static void add_children_by_path(struct repository *r,
struct tree *tree,
struct hashmap *map)
{
struct tree_desc desc;
struct name_entry entry;
if (!tree)
return;
if (parse_tree_gently(tree, 1) < 0)
return;
init_tree_desc(&desc, tree->buffer, tree->size);
while (tree_entry(&desc, &entry)) {
switch (object_type(entry.mode)) {
case OBJ_TREE:
paths_and_oids_insert(map, entry.path, &entry.oid);
if (tree->object.flags & UNINTERESTING) {
struct tree *child = lookup_tree(r, &entry.oid);
if (child)
child->object.flags |= UNINTERESTING;
}
break;
case OBJ_BLOB:
if (tree->object.flags & UNINTERESTING) {
struct blob *child = lookup_blob(r, &entry.oid);
if (child)
child->object.flags |= UNINTERESTING;
}
break;
default:
/* Subproject commit - not in this repository */
break;
}
}
free_tree_buffer(tree);
}
void mark_trees_uninteresting_sparse(struct repository *r,
struct oidset *trees)
{
unsigned has_interesting = 0, has_uninteresting = 0;
struct hashmap map;
struct hashmap_iter map_iter;
struct path_and_oids_entry *entry;
struct object_id *oid;
struct oidset_iter iter;
oidset_iter_init(trees, &iter);
while ((!has_interesting || !has_uninteresting) &&
(oid = oidset_iter_next(&iter))) {
struct tree *tree = lookup_tree(r, oid);
if (!tree)
continue;
if (tree->object.flags & UNINTERESTING)
has_uninteresting = 1;
else
has_interesting = 1;
}
/* Do not walk unless we have both types of trees. */
if (!has_uninteresting || !has_interesting)
return;
paths_and_oids_init(&map);
oidset_iter_init(trees, &iter);
while ((oid = oidset_iter_next(&iter))) {
struct tree *tree = lookup_tree(r, oid);
add_children_by_path(r, tree, &map);
}
hashmap_iter_init(&map, &map_iter);
while ((entry = hashmap_iter_next(&map_iter)))
mark_trees_uninteresting_sparse(r, &entry->trees);
paths_and_oids_clear(&map);
}
struct commit_stack {
struct commit **items;
size_t nr, alloc;

View File

@ -67,6 +67,7 @@ struct rev_cmdline_info {
#define REVISION_WALK_NO_WALK_SORTED 1
#define REVISION_WALK_NO_WALK_UNSORTED 2
struct oidset;
struct topo_walk_info;
struct rev_info {
@ -327,6 +328,7 @@ void put_revision_mark(const struct rev_info *revs,
void mark_parents_uninteresting(struct commit *commit);
void mark_tree_uninteresting(struct repository *r, struct tree *tree);
void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees);
void show_object_with_name(FILE *, struct object *, const char *);

View File

@ -358,6 +358,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path
for the index version specified. Can be set to any valid version
(currently 2, 3, or 4).
GIT_TEST_PACK_SPARSE=<boolean> if enabled will default the pack-objects
builtin to use the sparse object walk. This can still be overridden by
the --no-sparse command-line argument.
GIT_TEST_PRELOAD_INDEX=<boolean> exercises the preload-index code path
by overriding the minimum number of cache entries required per thread.

136
t/t5322-pack-objects-sparse.sh Executable file
View File

@ -0,0 +1,136 @@
#!/bin/sh
test_description='pack-objects object selection using sparse algorithm'
. ./test-lib.sh
test_expect_success 'setup repo' '
test_commit initial &&
for i in $(test_seq 1 3)
do
mkdir f$i &&
for j in $(test_seq 1 3)
do
mkdir f$i/f$j &&
echo $j >f$i/f$j/data.txt
done
done &&
git add . &&
git commit -m "Initialized trees" &&
for i in $(test_seq 1 3)
do
git checkout -b topic$i master &&
echo change-$i >f$i/f$i/data.txt &&
git commit -a -m "Changed f$i/f$i/data.txt"
done &&
cat >packinput.txt <<-EOF &&
topic1
^topic2
^topic3
EOF
git rev-parse \
topic1 \
topic1^{tree} \
topic1:f1 \
topic1:f1/f1 \
topic1:f1/f1/data.txt | sort >expect_objects.txt
'
test_expect_success 'non-sparse pack-objects' '
git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack &&
git index-pack -o nonsparse.idx nonsparse.pack &&
git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt &&
test_cmp expect_objects.txt nonsparse_objects.txt
'
test_expect_success 'sparse pack-objects' '
git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack &&
git index-pack -o sparse.idx sparse.pack &&
git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
test_cmp expect_objects.txt sparse_objects.txt
'
test_expect_success 'duplicate a folder from f3 and commit to topic1' '
git checkout topic1 &&
echo change-3 >f3/f3/data.txt &&
git commit -a -m "Changed f3/f3/data.txt" &&
git rev-parse \
topic1~1 \
topic1~1^{tree} \
topic1^{tree} \
topic1 \
topic1:f1 \
topic1:f1/f1 \
topic1:f1/f1/data.txt | sort >required_objects.txt
'
test_expect_success 'non-sparse pack-objects' '
git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack &&
git index-pack -o nonsparse.idx nonsparse.pack &&
git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt &&
comm -1 -2 required_objects.txt nonsparse_objects.txt >nonsparse_required_objects.txt &&
test_cmp required_objects.txt nonsparse_required_objects.txt
'
test_expect_success 'sparse pack-objects' '
git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack &&
git index-pack -o sparse.idx sparse.pack &&
git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
comm -1 -2 required_objects.txt sparse_objects.txt >sparse_required_objects.txt &&
test_cmp required_objects.txt sparse_required_objects.txt
'
# Demonstrate that the algorithms differ when we copy a tree wholesale
# from one folder to another.
test_expect_success 'duplicate a folder from f1 into f3' '
mkdir f3/f4 &&
cp -r f1/f1/* f3/f4 &&
git add f3/f4 &&
git commit -m "Copied f1/f1 to f3/f4" &&
cat >packinput.txt <<-EOF &&
topic1
^topic1~1
EOF
git rev-parse \
topic1 \
topic1^{tree} \
topic1:f3 | sort >required_objects.txt
'
test_expect_success 'non-sparse pack-objects' '
git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack &&
git index-pack -o nonsparse.idx nonsparse.pack &&
git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt &&
comm -1 -2 required_objects.txt nonsparse_objects.txt >nonsparse_required_objects.txt &&
test_cmp required_objects.txt nonsparse_required_objects.txt
'
test_expect_success 'sparse pack-objects' '
git rev-parse \
topic1 \
topic1^{tree} \
topic1:f3 \
topic1:f3/f4 \
topic1:f3/f4/data.txt | sort >expect_sparse_objects.txt &&
git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack &&
git index-pack -o sparse.idx sparse.pack &&
git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
test_cmp expect_sparse_objects.txt sparse_objects.txt
'
test_expect_success 'pack.useSparse enables algorithm' '
git config pack.useSparse true &&
git pack-objects --stdout --revs <packinput.txt >sparse.pack &&
git index-pack -o sparse.idx sparse.pack &&
git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
test_cmp expect_sparse_objects.txt sparse_objects.txt
'
test_expect_success 'pack.useSparse overridden' '
git pack-objects --stdout --revs --no-sparse <packinput.txt >sparse.pack &&
git index-pack -o sparse.idx sparse.pack &&
git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
test_cmp required_objects.txt sparse_objects.txt
'
test_done