Merge branch 'jt/repack-promisor-packs'

After a partial clone, repeated fetches from promisor remote would
have accumulated many packfiles marked with .promisor bit without
getting them coalesced into fewer packfiles, hurting performance.
"git repack" now learned to repack them.

* jt/repack-promisor-packs:
  repack: repack promisor objects if -a or -A is set
  repack: refactor setup of pack-objects cmd
This commit is contained in:
Junio C Hamano 2018-08-20 11:33:55 -07:00
commit 36f0f344e7
3 changed files with 213 additions and 58 deletions

View File

@ -40,6 +40,11 @@ OPTIONS
Note that users fetching over dumb protocols will have to fetch the Note that users fetching over dumb protocols will have to fetch the
whole new pack in order to get any contained object, no matter how many whole new pack in order to get any contained object, no matter how many
other objects in that pack they already have locally. other objects in that pack they already have locally.
+
Promisor packfiles are repacked separately: if there are packfiles that
have an associated ".promisor" file, these packfiles will be repacked
into another separate pack, and an empty ".promisor" file corresponding
to the new separate pack will be written.
-A:: -A::
Same as `-a`, unless `-d` is used. Then any unreachable Same as `-a`, unless `-d` is used. Then any unreachable

View File

@ -8,6 +8,8 @@
#include "strbuf.h" #include "strbuf.h"
#include "string-list.h" #include "string-list.h"
#include "argv-array.h" #include "argv-array.h"
#include "packfile.h"
#include "object-store.h"
static int delta_base_offset = 1; static int delta_base_offset = 1;
static int pack_kept_objects = -1; static int pack_kept_objects = -1;
@ -83,7 +85,7 @@ static void remove_pack_on_signal(int signo)
/* /*
* Adds all packs hex strings to the fname list, which do not * Adds all packs hex strings to the fname list, which do not
* have a corresponding .keep or .promisor file. These packs are not to * have a corresponding .keep file. These packs are not to
* be kept if we are going to pack everything into one file. * be kept if we are going to pack everything into one file.
*/ */
static void get_non_kept_pack_filenames(struct string_list *fname_list, static void get_non_kept_pack_filenames(struct string_list *fname_list,
@ -111,8 +113,7 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
fname = xmemdupz(e->d_name, len); fname = xmemdupz(e->d_name, len);
if (!file_exists(mkpath("%s/%s.keep", packdir, fname)) && if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
!file_exists(mkpath("%s/%s.promisor", packdir, fname)))
string_list_append_nodup(fname_list, fname); string_list_append_nodup(fname_list, fname);
else else
free(fname); free(fname);
@ -122,7 +123,7 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
static void remove_redundant_pack(const char *dir_name, const char *base_name) static void remove_redundant_pack(const char *dir_name, const char *base_name)
{ {
const char *exts[] = {".pack", ".idx", ".keep", ".bitmap"}; const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"};
int i; int i;
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
size_t plen; size_t plen;
@ -138,6 +139,117 @@ static void remove_redundant_pack(const char *dir_name, const char *base_name)
strbuf_release(&buf); strbuf_release(&buf);
} }
struct pack_objects_args {
const char *window;
const char *window_memory;
const char *depth;
const char *threads;
const char *max_pack_size;
int no_reuse_delta;
int no_reuse_object;
int quiet;
int local;
};
static void prepare_pack_objects(struct child_process *cmd,
const struct pack_objects_args *args)
{
argv_array_push(&cmd->args, "pack-objects");
if (args->window)
argv_array_pushf(&cmd->args, "--window=%s", args->window);
if (args->window_memory)
argv_array_pushf(&cmd->args, "--window-memory=%s", args->window_memory);
if (args->depth)
argv_array_pushf(&cmd->args, "--depth=%s", args->depth);
if (args->threads)
argv_array_pushf(&cmd->args, "--threads=%s", args->threads);
if (args->max_pack_size)
argv_array_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
if (args->no_reuse_delta)
argv_array_pushf(&cmd->args, "--no-reuse-delta");
if (args->no_reuse_object)
argv_array_pushf(&cmd->args, "--no-reuse-object");
if (args->local)
argv_array_push(&cmd->args, "--local");
if (args->quiet)
argv_array_push(&cmd->args, "--quiet");
if (delta_base_offset)
argv_array_push(&cmd->args, "--delta-base-offset");
argv_array_push(&cmd->args, packtmp);
cmd->git_cmd = 1;
cmd->out = -1;
}
/*
* Write oid to the given struct child_process's stdin, starting it first if
* necessary.
*/
static int write_oid(const struct object_id *oid, struct packed_git *pack,
uint32_t pos, void *data)
{
struct child_process *cmd = data;
if (cmd->in == -1) {
if (start_command(cmd))
die("Could not start pack-objects to repack promisor objects");
}
xwrite(cmd->in, oid_to_hex(oid), GIT_SHA1_HEXSZ);
xwrite(cmd->in, "\n", 1);
return 0;
}
static void repack_promisor_objects(const struct pack_objects_args *args,
struct string_list *names)
{
struct child_process cmd = CHILD_PROCESS_INIT;
FILE *out;
struct strbuf line = STRBUF_INIT;
prepare_pack_objects(&cmd, args);
cmd.in = -1;
/*
* NEEDSWORK: Giving pack-objects only the OIDs without any ordering
* hints may result in suboptimal deltas in the resulting pack. See if
* the OIDs can be sent with fake paths such that pack-objects can use a
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
for_each_packed_object(write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1)
/* No packed objects; cmd was never started */
return;
close(cmd.in);
out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
char *promisor_name;
int fd;
if (line.len != 40)
die("repack: Expecting 40 character sha1 lines only from pack-objects.");
string_list_append(names, line.buf);
/*
* pack-objects creates the .pack and .idx files, but not the
* .promisor file. Create the .promisor file, which is empty.
*/
promisor_name = mkpathdup("%s-%s.promisor", packtmp,
line.buf);
fd = open(promisor_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
if (fd < 0)
die_errno("unable to create '%s'", promisor_name);
close(fd);
free(promisor_name);
}
fclose(out);
if (finish_command(&cmd))
die("Could not finish pack-objects to repack promisor objects");
}
#define ALL_INTO_ONE 1 #define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2 #define LOOSEN_UNREACHABLE 2
@ -150,6 +262,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
{".pack"}, {".pack"},
{".idx"}, {".idx"},
{".bitmap", 1}, {".bitmap", 1},
{".promisor", 1},
}; };
struct child_process cmd = CHILD_PROCESS_INIT; struct child_process cmd = CHILD_PROCESS_INIT;
struct string_list_item *item; struct string_list_item *item;
@ -165,15 +278,9 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
int delete_redundant = 0; int delete_redundant = 0;
const char *unpack_unreachable = NULL; const char *unpack_unreachable = NULL;
int keep_unreachable = 0; int keep_unreachable = 0;
const char *window = NULL, *window_memory = NULL;
const char *depth = NULL;
const char *threads = NULL;
const char *max_pack_size = NULL;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
int no_reuse_delta = 0, no_reuse_object = 0;
int no_update_server_info = 0; int no_update_server_info = 0;
int quiet = 0; struct pack_objects_args po_args = {NULL};
int local = 0;
struct option builtin_repack_options[] = { struct option builtin_repack_options[] = {
OPT_BIT('a', NULL, &pack_everything, OPT_BIT('a', NULL, &pack_everything,
@ -183,14 +290,14 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
LOOSEN_UNREACHABLE | ALL_INTO_ONE), LOOSEN_UNREACHABLE | ALL_INTO_ONE),
OPT_BOOL('d', NULL, &delete_redundant, OPT_BOOL('d', NULL, &delete_redundant,
N_("remove redundant packs, and run git-prune-packed")), N_("remove redundant packs, and run git-prune-packed")),
OPT_BOOL('f', NULL, &no_reuse_delta, OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
N_("pass --no-reuse-delta to git-pack-objects")), N_("pass --no-reuse-delta to git-pack-objects")),
OPT_BOOL('F', NULL, &no_reuse_object, OPT_BOOL('F', NULL, &po_args.no_reuse_object,
N_("pass --no-reuse-object to git-pack-objects")), N_("pass --no-reuse-object to git-pack-objects")),
OPT_BOOL('n', NULL, &no_update_server_info, OPT_BOOL('n', NULL, &no_update_server_info,
N_("do not run git-update-server-info")), N_("do not run git-update-server-info")),
OPT__QUIET(&quiet, N_("be quiet")), OPT__QUIET(&po_args.quiet, N_("be quiet")),
OPT_BOOL('l', "local", &local, OPT_BOOL('l', "local", &po_args.local,
N_("pass --local to git-pack-objects")), N_("pass --local to git-pack-objects")),
OPT_BOOL('b', "write-bitmap-index", &write_bitmaps, OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
N_("write bitmap index")), N_("write bitmap index")),
@ -198,15 +305,15 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("with -A, do not loosen objects older than this")), N_("with -A, do not loosen objects older than this")),
OPT_BOOL('k', "keep-unreachable", &keep_unreachable, OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
N_("with -a, repack unreachable objects")), N_("with -a, repack unreachable objects")),
OPT_STRING(0, "window", &window, N_("n"), OPT_STRING(0, "window", &po_args.window, N_("n"),
N_("size of the window used for delta compression")), N_("size of the window used for delta compression")),
OPT_STRING(0, "window-memory", &window_memory, N_("bytes"), OPT_STRING(0, "window-memory", &po_args.window_memory, N_("bytes"),
N_("same as the above, but limit memory size instead of entries count")), N_("same as the above, but limit memory size instead of entries count")),
OPT_STRING(0, "depth", &depth, N_("n"), OPT_STRING(0, "depth", &po_args.depth, N_("n"),
N_("limits the maximum delta depth")), N_("limits the maximum delta depth")),
OPT_STRING(0, "threads", &threads, N_("n"), OPT_STRING(0, "threads", &po_args.threads, N_("n"),
N_("limits the maximum number of threads")), N_("limits the maximum number of threads")),
OPT_STRING(0, "max-pack-size", &max_pack_size, N_("bytes"), OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
N_("maximum size of each packfile")), N_("maximum size of each packfile")),
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
N_("repack objects in packs marked with .keep")), N_("repack objects in packs marked with .keep")),
@ -238,7 +345,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
sigchain_push_common(remove_pack_on_signal); sigchain_push_common(remove_pack_on_signal);
argv_array_push(&cmd.args, "pack-objects"); prepare_pack_objects(&cmd, &po_args);
argv_array_push(&cmd.args, "--keep-true-parents"); argv_array_push(&cmd.args, "--keep-true-parents");
if (!pack_kept_objects) if (!pack_kept_objects)
argv_array_push(&cmd.args, "--honor-pack-keep"); argv_array_push(&cmd.args, "--honor-pack-keep");
@ -251,26 +359,14 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
argv_array_push(&cmd.args, "--indexed-objects"); argv_array_push(&cmd.args, "--indexed-objects");
if (repository_format_partial_clone) if (repository_format_partial_clone)
argv_array_push(&cmd.args, "--exclude-promisor-objects"); argv_array_push(&cmd.args, "--exclude-promisor-objects");
if (window)
argv_array_pushf(&cmd.args, "--window=%s", window);
if (window_memory)
argv_array_pushf(&cmd.args, "--window-memory=%s", window_memory);
if (depth)
argv_array_pushf(&cmd.args, "--depth=%s", depth);
if (threads)
argv_array_pushf(&cmd.args, "--threads=%s", threads);
if (max_pack_size)
argv_array_pushf(&cmd.args, "--max-pack-size=%s", max_pack_size);
if (no_reuse_delta)
argv_array_pushf(&cmd.args, "--no-reuse-delta");
if (no_reuse_object)
argv_array_pushf(&cmd.args, "--no-reuse-object");
if (write_bitmaps) if (write_bitmaps)
argv_array_push(&cmd.args, "--write-bitmap-index"); argv_array_push(&cmd.args, "--write-bitmap-index");
if (pack_everything & ALL_INTO_ONE) { if (pack_everything & ALL_INTO_ONE) {
get_non_kept_pack_filenames(&existing_packs, &keep_pack_list); get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
repack_promisor_objects(&po_args, &names);
if (existing_packs.nr && delete_redundant) { if (existing_packs.nr && delete_redundant) {
if (unpack_unreachable) { if (unpack_unreachable) {
argv_array_pushf(&cmd.args, argv_array_pushf(&cmd.args,
@ -292,17 +388,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
argv_array_push(&cmd.args, "--incremental"); argv_array_push(&cmd.args, "--incremental");
} }
if (local)
argv_array_push(&cmd.args, "--local");
if (quiet)
argv_array_push(&cmd.args, "--quiet");
if (delta_base_offset)
argv_array_push(&cmd.args, "--delta-base-offset");
argv_array_push(&cmd.args, packtmp);
cmd.git_cmd = 1;
cmd.out = -1;
cmd.no_stdin = 1; cmd.no_stdin = 1;
ret = start_command(&cmd); ret = start_command(&cmd);
@ -320,7 +405,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (ret) if (ret)
return ret; return ret;
if (!names.nr && !quiet) if (!names.nr && !po_args.quiet)
printf("Nothing new to pack.\n"); printf("Nothing new to pack.\n");
/* /*
@ -429,6 +514,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
/* End of pack replacement. */ /* End of pack replacement. */
reprepare_packed_git(the_repository);
if (delete_redundant) { if (delete_redundant) {
int opts = 0; int opts = 0;
string_list_sort(&names); string_list_sort(&names);
@ -441,7 +528,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (!string_list_has_string(&names, sha1)) if (!string_list_has_string(&names, sha1))
remove_redundant_pack(packdir, item->string); remove_redundant_pack(packdir, item->string);
} }
if (!quiet && isatty(2)) if (!po_args.quiet && isatty(2))
opts |= PRUNE_PACKED_VERBOSE; opts |= PRUNE_PACKED_VERBOSE;
prune_packed_objects(opts); prune_packed_objects(opts);
} }

View File

@ -271,28 +271,91 @@ test_expect_success 'rev-list accepts missing and promised objects on command li
git -C repo rev-list --exclude-promisor-objects --objects "$COMMIT" "$TREE" "$BLOB" git -C repo rev-list --exclude-promisor-objects --objects "$COMMIT" "$TREE" "$BLOB"
' '
test_expect_success 'gc does not repack promisor objects' ' test_expect_success 'gc repacks promisor objects separately from non-promisor objects' '
rm -rf repo && rm -rf repo &&
test_create_repo repo && test_create_repo repo &&
test_commit -C repo my_commit && test_commit -C repo one &&
test_commit -C repo two &&
TREE_HASH=$(git -C repo rev-parse HEAD^{tree}) && TREE_ONE=$(git -C repo rev-parse one^{tree}) &&
HASH=$(printf "$TREE_HASH\n" | pack_as_from_promisor) && printf "$TREE_ONE\n" | pack_as_from_promisor &&
TREE_TWO=$(git -C repo rev-parse two^{tree}) &&
printf "$TREE_TWO\n" | pack_as_from_promisor &&
git -C repo config core.repositoryformatversion 1 && git -C repo config core.repositoryformatversion 1 &&
git -C repo config extensions.partialclone "arbitrary string" && git -C repo config extensions.partialclone "arbitrary string" &&
git -C repo gc && git -C repo gc &&
# Ensure that the promisor packfile still exists, and remove it # Ensure that exactly one promisor packfile exists, and that it
test -e repo/.git/objects/pack/pack-$HASH.pack && # contains the trees but not the commits
rm repo/.git/objects/pack/pack-$HASH.* && ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
test_line_count = 1 promisorlist &&
PROMISOR_PACKFILE=$(sed "s/.promisor/.pack/" <promisorlist) &&
git verify-pack $PROMISOR_PACKFILE -v >out &&
grep "$TREE_ONE" out &&
grep "$TREE_TWO" out &&
! grep "$(git -C repo rev-parse one)" out &&
! grep "$(git -C repo rev-parse two)" out &&
# Ensure that the single other pack contains the commit, but not the tree # Remove the promisor packfile and associated files
rm $(sed "s/.promisor//" <promisorlist).* &&
# Ensure that the single other pack contains the commits, but not the
# trees
ls repo/.git/objects/pack/pack-*.pack >packlist && ls repo/.git/objects/pack/pack-*.pack >packlist &&
test_line_count = 1 packlist && test_line_count = 1 packlist &&
git verify-pack repo/.git/objects/pack/pack-*.pack -v >out && git verify-pack repo/.git/objects/pack/pack-*.pack -v >out &&
grep "$(git -C repo rev-parse HEAD)" out && grep "$(git -C repo rev-parse one)" out &&
! grep "$TREE_HASH" out grep "$(git -C repo rev-parse two)" out &&
! grep "$TREE_ONE" out &&
! grep "$TREE_TWO" out
'
test_expect_success 'gc does not repack promisor objects if there are none' '
rm -rf repo &&
test_create_repo repo &&
test_commit -C repo one &&
git -C repo config core.repositoryformatversion 1 &&
git -C repo config extensions.partialclone "arbitrary string" &&
git -C repo gc &&
# Ensure that only one pack exists
ls repo/.git/objects/pack/pack-*.pack >packlist &&
test_line_count = 1 packlist
'
repack_and_check () {
rm -rf repo2 &&
cp -r repo repo2 &&
git -C repo2 repack $1 -d &&
git -C repo2 fsck &&
git -C repo2 cat-file -e $2 &&
git -C repo2 cat-file -e $3
}
test_expect_success 'repack -d does not irreversibly delete promisor objects' '
rm -rf repo &&
test_create_repo repo &&
git -C repo config core.repositoryformatversion 1 &&
git -C repo config extensions.partialclone "arbitrary string" &&
git -C repo commit --allow-empty -m one &&
git -C repo commit --allow-empty -m two &&
git -C repo commit --allow-empty -m three &&
git -C repo commit --allow-empty -m four &&
ONE=$(git -C repo rev-parse HEAD^^^) &&
TWO=$(git -C repo rev-parse HEAD^^) &&
THREE=$(git -C repo rev-parse HEAD^) &&
printf "$TWO\n" | pack_as_from_promisor &&
printf "$THREE\n" | pack_as_from_promisor &&
delete_object repo "$ONE" &&
repack_and_check -a "$TWO" "$THREE" &&
repack_and_check -A "$TWO" "$THREE" &&
repack_and_check -l "$TWO" "$THREE"
' '
test_expect_success 'gc stops traversal when a missing but promised object is reached' ' test_expect_success 'gc stops traversal when a missing but promised object is reached' '