Merge branch 'jk/prune-with-corrupt-refs'
"git prune" used to largely ignore broken refs when deciding which objects are still being used, which could spread an existing small damage and make it a larger one. * jk/prune-with-corrupt-refs: refs.c: drop curate_packed_refs repack: turn on "ref paranoia" when doing a destructive repack prune: turn on ref_paranoia flag refs: introduce a "ref paranoia" flag t5312: test object deletion code paths in a corrupted repository
This commit is contained in:
commit
05e816e37f
@ -1027,6 +1027,17 @@ GIT_ICASE_PATHSPECS::
|
||||
variable when it is invoked as the top level command by the
|
||||
end user, to be recorded in the body of the reflog.
|
||||
|
||||
`GIT_REF_PARANOIA`::
|
||||
If set to `1`, include broken or badly named refs when iterating
|
||||
over lists of refs. In a normal, non-corrupted repository, this
|
||||
does nothing. However, enabling it may help git to detect and
|
||||
abort some operations in the presence of broken refs. Git sets
|
||||
this variable automatically when performing destructive
|
||||
operations like linkgit:git-prune[1]. You should not need to set
|
||||
it yourself unless you want to be paranoid about making sure
|
||||
an operation has touched every ref (e.g., because you are
|
||||
cloning a repository to make a backup).
|
||||
|
||||
|
||||
Discussion[[Discussion]]
|
||||
------------------------
|
||||
|
@ -115,6 +115,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
|
||||
expire = ULONG_MAX;
|
||||
save_commit_buffer = 0;
|
||||
check_replace_refs = 0;
|
||||
ref_paranoia = 1;
|
||||
init_revisions(&revs, prefix);
|
||||
|
||||
argc = parse_options(argc, argv, prefix, options, prune_usage, 0);
|
||||
|
@ -228,13 +228,17 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
|
||||
get_non_kept_pack_filenames(&existing_packs);
|
||||
|
||||
if (existing_packs.nr && delete_redundant) {
|
||||
if (unpack_unreachable)
|
||||
if (unpack_unreachable) {
|
||||
argv_array_pushf(&cmd.args,
|
||||
"--unpack-unreachable=%s",
|
||||
unpack_unreachable);
|
||||
else if (pack_everything & LOOSEN_UNREACHABLE)
|
||||
argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
|
||||
} else if (pack_everything & LOOSEN_UNREACHABLE) {
|
||||
argv_array_push(&cmd.args,
|
||||
"--unpack-unreachable");
|
||||
} else {
|
||||
argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
argv_array_push(&cmd.args, "--unpacked");
|
||||
|
8
cache.h
8
cache.h
@ -613,6 +613,14 @@ extern int precomposed_unicode;
|
||||
extern int protect_hfs;
|
||||
extern int protect_ntfs;
|
||||
|
||||
/*
|
||||
* Include broken refs in all ref iterations, which will
|
||||
* generally choke dangerous operations rather than letting
|
||||
* them silently proceed without taking the broken ref into
|
||||
* account.
|
||||
*/
|
||||
extern int ref_paranoia;
|
||||
|
||||
/*
|
||||
* The character that begins a commented line in user-editable file
|
||||
* that is subject to stripspace.
|
||||
|
@ -24,6 +24,7 @@ int is_bare_repository_cfg = -1; /* unspecified */
|
||||
int log_all_ref_updates = -1; /* unspecified */
|
||||
int warn_ambiguous_refs = 1;
|
||||
int warn_on_object_refname_ambiguity = 1;
|
||||
int ref_paranoia = -1;
|
||||
int repository_format_version;
|
||||
const char *git_commit_encoding;
|
||||
const char *git_log_output_encoding;
|
||||
|
72
refs.c
72
refs.c
@ -1934,6 +1934,11 @@ static int do_for_each_ref(struct ref_cache *refs, const char *base,
|
||||
data.fn = fn;
|
||||
data.cb_data = cb_data;
|
||||
|
||||
if (ref_paranoia < 0)
|
||||
ref_paranoia = git_env_bool("GIT_REF_PARANOIA", 0);
|
||||
if (ref_paranoia)
|
||||
data.flags |= DO_FOR_EACH_INCLUDE_BROKEN;
|
||||
|
||||
return do_for_each_entry(refs, base, do_one_ref, &data);
|
||||
}
|
||||
|
||||
@ -2616,68 +2621,10 @@ int pack_refs(unsigned int flags)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If entry is no longer needed in packed-refs, add it to the string
|
||||
* list pointed to by cb_data. Reasons for deleting entries:
|
||||
*
|
||||
* - Entry is broken.
|
||||
* - Entry is overridden by a loose ref.
|
||||
* - Entry does not point at a valid object.
|
||||
*
|
||||
* In the first and third cases, also emit an error message because these
|
||||
* are indications of repository corruption.
|
||||
*/
|
||||
static int curate_packed_ref_fn(struct ref_entry *entry, void *cb_data)
|
||||
{
|
||||
struct string_list *refs_to_delete = cb_data;
|
||||
|
||||
if (entry->flag & REF_ISBROKEN) {
|
||||
/* This shouldn't happen to packed refs. */
|
||||
error("%s is broken!", entry->name);
|
||||
string_list_append(refs_to_delete, entry->name);
|
||||
return 0;
|
||||
}
|
||||
if (!has_sha1_file(entry->u.value.sha1)) {
|
||||
unsigned char sha1[20];
|
||||
int flags;
|
||||
|
||||
if (read_ref_full(entry->name, 0, sha1, &flags))
|
||||
/* We should at least have found the packed ref. */
|
||||
die("Internal error");
|
||||
if ((flags & REF_ISSYMREF) || !(flags & REF_ISPACKED)) {
|
||||
/*
|
||||
* This packed reference is overridden by a
|
||||
* loose reference, so it is OK that its value
|
||||
* is no longer valid; for example, it might
|
||||
* refer to an object that has been garbage
|
||||
* collected. For this purpose we don't even
|
||||
* care whether the loose reference itself is
|
||||
* invalid, broken, symbolic, etc. Silently
|
||||
* remove the packed reference.
|
||||
*/
|
||||
string_list_append(refs_to_delete, entry->name);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* There is no overriding loose reference, so the fact
|
||||
* that this reference doesn't refer to a valid object
|
||||
* indicates some kind of repository corruption.
|
||||
* Report the problem, then omit the reference from
|
||||
* the output.
|
||||
*/
|
||||
error("%s does not point to a valid object!", entry->name);
|
||||
string_list_append(refs_to_delete, entry->name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int repack_without_refs(struct string_list *refnames, struct strbuf *err)
|
||||
{
|
||||
struct ref_dir *packed;
|
||||
struct string_list refs_to_delete = STRING_LIST_INIT_DUP;
|
||||
struct string_list_item *refname, *ref_to_delete;
|
||||
struct string_list_item *refname;
|
||||
int ret, needs_repacking = 0, removed = 0;
|
||||
|
||||
assert(err);
|
||||
@ -2713,13 +2660,6 @@ int repack_without_refs(struct string_list *refnames, struct strbuf *err)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove any other accumulated cruft */
|
||||
do_for_each_entry_in_dir(packed, 0, curate_packed_ref_fn, &refs_to_delete);
|
||||
for_each_string_list_item(ref_to_delete, &refs_to_delete) {
|
||||
if (remove_entry(packed, ref_to_delete->string) == -1)
|
||||
die("internal error");
|
||||
}
|
||||
|
||||
/* Write what remains */
|
||||
ret = commit_packed_refs();
|
||||
if (ret)
|
||||
|
114
t/t5312-prune-corruption.sh
Executable file
114
t/t5312-prune-corruption.sh
Executable file
@ -0,0 +1,114 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='
|
||||
Test pruning of repositories with minor corruptions. The goal
|
||||
here is that we should always be erring on the side of safety. So
|
||||
if we see, for example, a ref with a bogus name, it is OK either to
|
||||
bail out or to proceed using it as a reachable tip, but it is _not_
|
||||
OK to proceed as if it did not exist. Otherwise we might silently
|
||||
delete objects that cannot be recovered.
|
||||
'
|
||||
. ./test-lib.sh
|
||||
|
||||
test_expect_success 'disable reflogs' '
|
||||
git config core.logallrefupdates false &&
|
||||
rm -rf .git/logs
|
||||
'
|
||||
|
||||
test_expect_success 'create history reachable only from a bogus-named ref' '
|
||||
test_tick && git commit --allow-empty -m master &&
|
||||
base=$(git rev-parse HEAD) &&
|
||||
test_tick && git commit --allow-empty -m bogus &&
|
||||
bogus=$(git rev-parse HEAD) &&
|
||||
git cat-file commit $bogus >saved &&
|
||||
echo $bogus >.git/refs/heads/bogus..name &&
|
||||
git reset --hard HEAD^
|
||||
'
|
||||
|
||||
test_expect_success 'pruning does not drop bogus object' '
|
||||
test_when_finished "git hash-object -w -t commit saved" &&
|
||||
test_might_fail git prune --expire=now &&
|
||||
verbose git cat-file -e $bogus
|
||||
'
|
||||
|
||||
test_expect_success 'put bogus object into pack' '
|
||||
git tag reachable $bogus &&
|
||||
git repack -ad &&
|
||||
git tag -d reachable &&
|
||||
verbose git cat-file -e $bogus
|
||||
'
|
||||
|
||||
test_expect_success 'destructive repack keeps packed object' '
|
||||
test_might_fail git repack -Ad --unpack-unreachable=now &&
|
||||
verbose git cat-file -e $bogus &&
|
||||
test_might_fail git repack -ad &&
|
||||
verbose git cat-file -e $bogus
|
||||
'
|
||||
|
||||
# subsequent tests will have different corruptions
|
||||
test_expect_success 'clean up bogus ref' '
|
||||
rm .git/refs/heads/bogus..name
|
||||
'
|
||||
|
||||
# We create two new objects here, "one" and "two". Our
|
||||
# master branch points to "two", which is deleted,
|
||||
# corrupting the repository. But we'd like to make sure
|
||||
# that the otherwise unreachable "one" is not pruned
|
||||
# (since it is the user's best bet for recovering
|
||||
# from the corruption).
|
||||
#
|
||||
# Note that we also point HEAD somewhere besides "two",
|
||||
# as we want to make sure we test the case where we
|
||||
# pick up the reference to "two" by iterating the refs,
|
||||
# not by resolving HEAD.
|
||||
test_expect_success 'create history with missing tip commit' '
|
||||
test_tick && git commit --allow-empty -m one &&
|
||||
recoverable=$(git rev-parse HEAD) &&
|
||||
git cat-file commit $recoverable >saved &&
|
||||
test_tick && git commit --allow-empty -m two &&
|
||||
missing=$(git rev-parse HEAD) &&
|
||||
git checkout --detach $base &&
|
||||
rm .git/objects/$(echo $missing | sed "s,..,&/,") &&
|
||||
test_must_fail git cat-file -e $missing
|
||||
'
|
||||
|
||||
test_expect_success 'pruning with a corrupted tip does not drop history' '
|
||||
test_when_finished "git hash-object -w -t commit saved" &&
|
||||
test_might_fail git prune --expire=now &&
|
||||
verbose git cat-file -e $recoverable
|
||||
'
|
||||
|
||||
test_expect_success 'pack-refs does not silently delete broken loose ref' '
|
||||
git pack-refs --all --prune &&
|
||||
echo $missing >expect &&
|
||||
git rev-parse refs/heads/master >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
# we do not want to count on running pack-refs to
|
||||
# actually pack it, as it is perfectly reasonable to
|
||||
# skip processing a broken ref
|
||||
test_expect_success 'create packed-refs file with broken ref' '
|
||||
rm -f .git/refs/heads/master &&
|
||||
cat >.git/packed-refs <<-EOF &&
|
||||
$missing refs/heads/master
|
||||
$recoverable refs/heads/other
|
||||
EOF
|
||||
echo $missing >expect &&
|
||||
git rev-parse refs/heads/master >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'pack-refs does not silently delete broken packed ref' '
|
||||
git pack-refs --all --prune &&
|
||||
git rev-parse refs/heads/master >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'pack-refs does not drop broken refs during deletion' '
|
||||
git update-ref -d refs/heads/other &&
|
||||
git rev-parse refs/heads/master >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue
Block a user