git-commit-vandalism/builtin/repack.c
Taylor Blau 2fcb03b52d builtin/repack.c: don't move existing packs out of the way
When 'git repack' creates a pack with the same name as any existing
pack, it moves the existing one to 'old-pack-xxx.{pack,idx,...}' and
then renames the new one into place.

Eventually, it would be nice to have 'git repack' allow for writing a
multi-pack index at the critical time (after the new packs have been
written / moved into place, but before the old ones have been deleted).
Guessing that this option might be called '--write-midx', this makes the
following situation (where repacks are issued back-to-back without any
new objects) impossible:

    $ git repack -adb
    $ git repack -adb --write-midx

In the second repack, the existing packs are overwritten verbatim with
the same rename-to-old sequence. At that point, the current MIDX is
invalidated, since it refers to now-missing packs. So that code wants to
be run after the MIDX is re-written. But (prior to this patch) the new
MIDX can't be written until the new packs are moved into place. So, we
have a circular dependency.

This is all hypothetical, since no code currently exists to write a MIDX
safely during a 'git repack' (the 'GIT_TEST_MULTI_PACK_INDEX' does so
unsafely). Putting hypothetical aside, though: why do we need to rename
existing packs to be prefixed with 'old-' anyway?

This behavior dates all the way back to 2ad47d6 (git-repack: Be
careful when updating the same pack as an existing one., 2006-06-25).
2ad47d6 is mainly concerned about a case where a newly written pack
would have a different structure than its index. This used to be
possible when the pack name was a hash of the set of objects. Under this
naming scheme, two packs that store the same set of objects could differ
in delta selection, object positioning, or both. If this happened, then
any such packs would be unreadable in the instant between copying the
new pack and new index (i.e., either the index or pack will be stale
depending on the order that they were copied).

But since 1190a1a (pack-objects: name pack files after trailer hash,
2013-12-05), this is no longer possible, since pack files are named not
after their logical contents (i.e., the set of objects), but by the
actual checksum of their contents. So, this old- behavior can safely go,
which allows us to avoid our circular dependency above.

In addition to avoiding the circular dependency, this patch also makes
'git repack' a lot simpler, since we don't have to deal with failures
encountered when renaming existing packs to be prefixed with 'old-'.

This patch is mostly limited to removing code paths that deal with the
'old' prefixing, with the exception of files that include the pack's
name in their own filename, like .idx, .bitmap, and related files. The
exception is that we want to continue to trust what pack-objects wrote.
That is, it is not the case that we pretend as if pack-objects didn't
write files identical to ones that already exist, but rather that we
respect what pack-objects wrote as the source of truth. That cuts two
ways:

  - If pack-objects produced an identical pack to one that already
    exists with a bitmap, but did not produce a bitmap, we remove the
    bitmap that already exists. (This behavior is codified in t7700.14).

  - If pack-objects produced an identical pack to one that already
    exists, we trust the just-written version of the coresponding .idx,
    .promisor, and other files over the ones that already exist. This
    ensures that we use the most up-to-date versions of this files,
    which is safe even in the face of format changes in, say, the .idx
    file (which would not be reflected in the .idx file's name).

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-11-17 13:31:55 -08:00

536 lines
15 KiB
C

#include "builtin.h"
#include "cache.h"
#include "config.h"
#include "dir.h"
#include "parse-options.h"
#include "run-command.h"
#include "sigchain.h"
#include "strbuf.h"
#include "string-list.h"
#include "strvec.h"
#include "midx.h"
#include "packfile.h"
#include "prune-packed.h"
#include "object-store.h"
#include "promisor-remote.h"
#include "shallow.h"
static int delta_base_offset = 1;
static int pack_kept_objects = -1;
static int write_bitmaps = -1;
static int use_delta_islands;
static char *packdir, *packtmp;
static const char *const git_repack_usage[] = {
N_("git repack [<options>]"),
NULL
};
static const char incremental_bitmap_conflict_error[] = N_(
"Incremental repacks are incompatible with bitmap indexes. Use\n"
"--no-write-bitmap-index or disable the pack.writebitmaps configuration."
);
static int repack_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "repack.usedeltabaseoffset")) {
delta_base_offset = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "repack.packkeptobjects")) {
pack_kept_objects = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "repack.writebitmaps") ||
!strcmp(var, "pack.writebitmaps")) {
write_bitmaps = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "repack.usedeltaislands")) {
use_delta_islands = git_config_bool(var, value);
return 0;
}
return git_default_config(var, value, cb);
}
/*
* Remove temporary $GIT_OBJECT_DIRECTORY/pack/.tmp-$$-pack-* files.
*/
static void remove_temporary_files(void)
{
struct strbuf buf = STRBUF_INIT;
size_t dirlen, prefixlen;
DIR *dir;
struct dirent *e;
dir = opendir(packdir);
if (!dir)
return;
/* Point at the slash at the end of ".../objects/pack/" */
dirlen = strlen(packdir) + 1;
strbuf_addstr(&buf, packtmp);
/* Hold the length of ".tmp-%d-pack-" */
prefixlen = buf.len - dirlen;
while ((e = readdir(dir))) {
if (strncmp(e->d_name, buf.buf + dirlen, prefixlen))
continue;
strbuf_setlen(&buf, dirlen);
strbuf_addstr(&buf, e->d_name);
unlink(buf.buf);
}
closedir(dir);
strbuf_release(&buf);
}
static void remove_pack_on_signal(int signo)
{
remove_temporary_files();
sigchain_pop(signo);
raise(signo);
}
/*
* Adds all packs hex strings to the fname list, which do not
* have a corresponding .keep file. These packs are not to
* be kept if we are going to pack everything into one file.
*/
static void get_non_kept_pack_filenames(struct string_list *fname_list,
const struct string_list *extra_keep)
{
DIR *dir;
struct dirent *e;
char *fname;
if (!(dir = opendir(packdir)))
return;
while ((e = readdir(dir)) != NULL) {
size_t len;
int i;
for (i = 0; i < extra_keep->nr; i++)
if (!fspathcmp(e->d_name, extra_keep->items[i].string))
break;
if (extra_keep->nr > 0 && i < extra_keep->nr)
continue;
if (!strip_suffix(e->d_name, ".pack", &len))
continue;
fname = xmemdupz(e->d_name, len);
if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
string_list_append_nodup(fname_list, fname);
else
free(fname);
}
closedir(dir);
}
static void remove_redundant_pack(const char *dir_name, const char *base_name)
{
struct strbuf buf = STRBUF_INIT;
struct multi_pack_index *m = get_local_multi_pack_index(the_repository);
strbuf_addf(&buf, "%s.pack", base_name);
if (m && midx_contains_pack(m, buf.buf))
clear_midx_file(the_repository);
strbuf_insertf(&buf, 0, "%s/", dir_name);
unlink_pack_path(buf.buf, 1);
strbuf_release(&buf);
}
struct pack_objects_args {
const char *window;
const char *window_memory;
const char *depth;
const char *threads;
const char *max_pack_size;
int no_reuse_delta;
int no_reuse_object;
int quiet;
int local;
};
static void prepare_pack_objects(struct child_process *cmd,
const struct pack_objects_args *args)
{
strvec_push(&cmd->args, "pack-objects");
if (args->window)
strvec_pushf(&cmd->args, "--window=%s", args->window);
if (args->window_memory)
strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory);
if (args->depth)
strvec_pushf(&cmd->args, "--depth=%s", args->depth);
if (args->threads)
strvec_pushf(&cmd->args, "--threads=%s", args->threads);
if (args->max_pack_size)
strvec_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
if (args->no_reuse_delta)
strvec_pushf(&cmd->args, "--no-reuse-delta");
if (args->no_reuse_object)
strvec_pushf(&cmd->args, "--no-reuse-object");
if (args->local)
strvec_push(&cmd->args, "--local");
if (args->quiet)
strvec_push(&cmd->args, "--quiet");
if (delta_base_offset)
strvec_push(&cmd->args, "--delta-base-offset");
strvec_push(&cmd->args, packtmp);
cmd->git_cmd = 1;
cmd->out = -1;
}
/*
* Write oid to the given struct child_process's stdin, starting it first if
* necessary.
*/
static int write_oid(const struct object_id *oid, struct packed_git *pack,
uint32_t pos, void *data)
{
struct child_process *cmd = data;
if (cmd->in == -1) {
if (start_command(cmd))
die(_("could not start pack-objects to repack promisor objects"));
}
xwrite(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz);
xwrite(cmd->in, "\n", 1);
return 0;
}
static struct {
const char *name;
unsigned optional:1;
} exts[] = {
{".pack"},
{".idx"},
{".bitmap", 1},
{".promisor", 1},
};
static unsigned populate_pack_exts(char *name)
{
struct stat statbuf;
struct strbuf path = STRBUF_INIT;
unsigned ret = 0;
int i;
for (i = 0; i < ARRAY_SIZE(exts); i++) {
strbuf_reset(&path);
strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name);
if (stat(path.buf, &statbuf))
continue;
ret |= (1 << i);
}
strbuf_release(&path);
return ret;
}
static void repack_promisor_objects(const struct pack_objects_args *args,
struct string_list *names)
{
struct child_process cmd = CHILD_PROCESS_INIT;
FILE *out;
struct strbuf line = STRBUF_INIT;
prepare_pack_objects(&cmd, args);
cmd.in = -1;
/*
* NEEDSWORK: Giving pack-objects only the OIDs without any ordering
* hints may result in suboptimal deltas in the resulting pack. See if
* the OIDs can be sent with fake paths such that pack-objects can use a
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
for_each_packed_object(write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1)
/* No packed objects; cmd was never started */
return;
close(cmd.in);
out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
struct string_list_item *item;
char *promisor_name;
int fd;
if (line.len != the_hash_algo->hexsz)
die(_("repack: Expecting full hex object ID lines only from pack-objects."));
item = string_list_append(names, line.buf);
/*
* pack-objects creates the .pack and .idx files, but not the
* .promisor file. Create the .promisor file, which is empty.
*
* NEEDSWORK: fetch-pack sometimes generates non-empty
* .promisor files containing the ref names and associated
* hashes at the point of generation of the corresponding
* packfile, but this would not preserve their contents. Maybe
* concatenate the contents of all .promisor files instead of
* just creating a new empty file.
*/
promisor_name = mkpathdup("%s-%s.promisor", packtmp,
line.buf);
fd = open(promisor_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
if (fd < 0)
die_errno(_("unable to create '%s'"), promisor_name);
close(fd);
item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
free(promisor_name);
}
fclose(out);
if (finish_command(&cmd))
die(_("could not finish pack-objects to repack promisor objects"));
}
#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2
int cmd_repack(int argc, const char **argv, const char *prefix)
{
struct child_process cmd = CHILD_PROCESS_INIT;
struct string_list_item *item;
struct string_list names = STRING_LIST_INIT_DUP;
struct string_list rollback = STRING_LIST_INIT_NODUP;
struct string_list existing_packs = STRING_LIST_INIT_DUP;
struct strbuf line = STRBUF_INIT;
int i, ext, ret;
FILE *out;
/* variables to be filled by option parsing */
int pack_everything = 0;
int delete_redundant = 0;
const char *unpack_unreachable = NULL;
int keep_unreachable = 0;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
int no_update_server_info = 0;
struct pack_objects_args po_args = {NULL};
struct option builtin_repack_options[] = {
OPT_BIT('a', NULL, &pack_everything,
N_("pack everything in a single pack"), ALL_INTO_ONE),
OPT_BIT('A', NULL, &pack_everything,
N_("same as -a, and turn unreachable objects loose"),
LOOSEN_UNREACHABLE | ALL_INTO_ONE),
OPT_BOOL('d', NULL, &delete_redundant,
N_("remove redundant packs, and run git-prune-packed")),
OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
N_("pass --no-reuse-delta to git-pack-objects")),
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
N_("pass --no-reuse-object to git-pack-objects")),
OPT_BOOL('n', NULL, &no_update_server_info,
N_("do not run git-update-server-info")),
OPT__QUIET(&po_args.quiet, N_("be quiet")),
OPT_BOOL('l', "local", &po_args.local,
N_("pass --local to git-pack-objects")),
OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
N_("write bitmap index")),
OPT_BOOL('i', "delta-islands", &use_delta_islands,
N_("pass --delta-islands to git-pack-objects")),
OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, N_("approxidate"),
N_("with -A, do not loosen objects older than this")),
OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
N_("with -a, repack unreachable objects")),
OPT_STRING(0, "window", &po_args.window, N_("n"),
N_("size of the window used for delta compression")),
OPT_STRING(0, "window-memory", &po_args.window_memory, N_("bytes"),
N_("same as the above, but limit memory size instead of entries count")),
OPT_STRING(0, "depth", &po_args.depth, N_("n"),
N_("limits the maximum delta depth")),
OPT_STRING(0, "threads", &po_args.threads, N_("n"),
N_("limits the maximum number of threads")),
OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
N_("maximum size of each packfile")),
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
N_("repack objects in packs marked with .keep")),
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
N_("do not repack this pack")),
OPT_END()
};
git_config(repack_config, NULL);
argc = parse_options(argc, argv, prefix, builtin_repack_options,
git_repack_usage, 0);
if (delete_redundant && repository_format_precious_objects)
die(_("cannot delete packs in a precious-objects repo"));
if (keep_unreachable &&
(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE)))
die(_("--keep-unreachable and -A are incompatible"));
if (write_bitmaps < 0) {
if (!(pack_everything & ALL_INTO_ONE) ||
!is_bare_repository())
write_bitmaps = 0;
}
if (pack_kept_objects < 0)
pack_kept_objects = write_bitmaps > 0;
if (write_bitmaps && !(pack_everything & ALL_INTO_ONE))
die(_(incremental_bitmap_conflict_error));
packdir = mkpathdup("%s/pack", get_object_directory());
packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid());
sigchain_push_common(remove_pack_on_signal);
prepare_pack_objects(&cmd, &po_args);
strvec_push(&cmd.args, "--keep-true-parents");
if (!pack_kept_objects)
strvec_push(&cmd.args, "--honor-pack-keep");
for (i = 0; i < keep_pack_list.nr; i++)
strvec_pushf(&cmd.args, "--keep-pack=%s",
keep_pack_list.items[i].string);
strvec_push(&cmd.args, "--non-empty");
strvec_push(&cmd.args, "--all");
strvec_push(&cmd.args, "--reflog");
strvec_push(&cmd.args, "--indexed-objects");
if (has_promisor_remote())
strvec_push(&cmd.args, "--exclude-promisor-objects");
if (write_bitmaps > 0)
strvec_push(&cmd.args, "--write-bitmap-index");
else if (write_bitmaps < 0)
strvec_push(&cmd.args, "--write-bitmap-index-quiet");
if (use_delta_islands)
strvec_push(&cmd.args, "--delta-islands");
if (pack_everything & ALL_INTO_ONE) {
get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
repack_promisor_objects(&po_args, &names);
if (existing_packs.nr && delete_redundant) {
if (unpack_unreachable) {
strvec_pushf(&cmd.args,
"--unpack-unreachable=%s",
unpack_unreachable);
strvec_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
} else if (pack_everything & LOOSEN_UNREACHABLE) {
strvec_push(&cmd.args,
"--unpack-unreachable");
} else if (keep_unreachable) {
strvec_push(&cmd.args, "--keep-unreachable");
strvec_push(&cmd.args, "--pack-loose-unreachable");
} else {
strvec_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
}
}
} else {
strvec_push(&cmd.args, "--unpacked");
strvec_push(&cmd.args, "--incremental");
}
cmd.no_stdin = 1;
ret = start_command(&cmd);
if (ret)
return ret;
out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
if (line.len != the_hash_algo->hexsz)
die(_("repack: Expecting full hex object ID lines only from pack-objects."));
string_list_append(&names, line.buf);
}
fclose(out);
ret = finish_command(&cmd);
if (ret)
return ret;
if (!names.nr && !po_args.quiet)
printf_ln(_("Nothing new to pack."));
for_each_string_list_item(item, &names) {
item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
}
close_object_store(the_repository->objects);
/*
* Ok we have prepared all new packfiles.
*/
for_each_string_list_item(item, &names) {
for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
char *fname, *fname_old;
fname = mkpathdup("%s/pack-%s%s",
packdir, item->string, exts[ext].name);
fname_old = mkpathdup("%s-%s%s",
packtmp, item->string, exts[ext].name);
if (((uintptr_t)item->util) & (1 << ext)) {
struct stat statbuffer;
if (!stat(fname_old, &statbuffer)) {
statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
chmod(fname_old, statbuffer.st_mode);
}
if (rename(fname_old, fname))
die_errno(_("renaming '%s' failed"), fname_old);
} else if (!exts[ext].optional)
die(_("missing required file: %s"), fname_old);
else if (unlink(fname) < 0 && errno != ENOENT)
die_errno(_("could not unlink: %s"), fname);
free(fname);
free(fname_old);
}
}
/* End of pack replacement. */
reprepare_packed_git(the_repository);
if (delete_redundant) {
const int hexsz = the_hash_algo->hexsz;
int opts = 0;
string_list_sort(&names);
for_each_string_list_item(item, &existing_packs) {
char *sha1;
size_t len = strlen(item->string);
if (len < hexsz)
continue;
sha1 = item->string + len - hexsz;
if (!string_list_has_string(&names, sha1))
remove_redundant_pack(packdir, item->string);
}
if (!po_args.quiet && isatty(2))
opts |= PRUNE_PACKED_VERBOSE;
prune_packed_objects(opts);
if (!keep_unreachable &&
(!(pack_everything & LOOSEN_UNREACHABLE) ||
unpack_unreachable) &&
is_repository_shallow(the_repository))
prune_shallow(PRUNE_QUICK);
}
if (!no_update_server_info)
update_server_info(0);
remove_temporary_files();
if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0))
write_midx_file(get_object_directory(), 0);
string_list_clear(&names, 0);
string_list_clear(&rollback, 0);
string_list_clear(&existing_packs, 0);
strbuf_release(&line);
return 0;
}