Merge branch 'jk/prune-with-corrupt-refs'

"git prune" used to largely ignore broken refs when deciding which
objects are still being used, which could spread an existing small
damage and make it a larger one.

* jk/prune-with-corrupt-refs:
  refs.c: drop curate_packed_refs
  repack: turn on "ref paranoia" when doing a destructive repack
  prune: turn on ref_paranoia flag
  refs: introduce a "ref paranoia" flag
  t5312: test object deletion code paths in a corrupted repository
This commit is contained in:
Junio C Hamano 2015-03-25 12:54:26 -07:00
commit 05e816e37f
7 changed files with 147 additions and 68 deletions

View File

@ -1027,6 +1027,17 @@ GIT_ICASE_PATHSPECS::
variable when it is invoked as the top level command by the
end user, to be recorded in the body of the reflog.
`GIT_REF_PARANOIA`::
If set to `1`, include broken or badly named refs when iterating
over lists of refs. In a normal, non-corrupted repository, this
does nothing. However, enabling it may help git to detect and
abort some operations in the presence of broken refs. Git sets
this variable automatically when performing destructive
operations like linkgit:git-prune[1]. You should not need to set
it yourself unless you want to be paranoid about making sure
an operation has touched every ref (e.g., because you are
cloning a repository to make a backup).
Discussion[[Discussion]]
------------------------

View File

@ -115,6 +115,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
expire = ULONG_MAX;
save_commit_buffer = 0;
check_replace_refs = 0;
ref_paranoia = 1;
init_revisions(&revs, prefix);
argc = parse_options(argc, argv, prefix, options, prune_usage, 0);

View File

@ -228,13 +228,17 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
get_non_kept_pack_filenames(&existing_packs);
if (existing_packs.nr && delete_redundant) {
if (unpack_unreachable)
if (unpack_unreachable) {
argv_array_pushf(&cmd.args,
"--unpack-unreachable=%s",
unpack_unreachable);
else if (pack_everything & LOOSEN_UNREACHABLE)
argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
} else if (pack_everything & LOOSEN_UNREACHABLE) {
argv_array_push(&cmd.args,
"--unpack-unreachable");
} else {
argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
}
}
} else {
argv_array_push(&cmd.args, "--unpacked");

View File

@ -613,6 +613,14 @@ extern int precomposed_unicode;
extern int protect_hfs;
extern int protect_ntfs;
/*
* Include broken refs in all ref iterations, which will
* generally choke dangerous operations rather than letting
* them silently proceed without taking the broken ref into
* account.
*/
extern int ref_paranoia;
/*
* The character that begins a commented line in user-editable file
* that is subject to stripspace.

View File

@ -24,6 +24,7 @@ int is_bare_repository_cfg = -1; /* unspecified */
int log_all_ref_updates = -1; /* unspecified */
int warn_ambiguous_refs = 1;
int warn_on_object_refname_ambiguity = 1;
int ref_paranoia = -1;
int repository_format_version;
const char *git_commit_encoding;
const char *git_log_output_encoding;

72
refs.c
View File

@ -1934,6 +1934,11 @@ static int do_for_each_ref(struct ref_cache *refs, const char *base,
data.fn = fn;
data.cb_data = cb_data;
if (ref_paranoia < 0)
ref_paranoia = git_env_bool("GIT_REF_PARANOIA", 0);
if (ref_paranoia)
data.flags |= DO_FOR_EACH_INCLUDE_BROKEN;
return do_for_each_entry(refs, base, do_one_ref, &data);
}
@ -2616,68 +2621,10 @@ int pack_refs(unsigned int flags)
return 0;
}
/*
* If entry is no longer needed in packed-refs, add it to the string
* list pointed to by cb_data. Reasons for deleting entries:
*
* - Entry is broken.
* - Entry is overridden by a loose ref.
* - Entry does not point at a valid object.
*
* In the first and third cases, also emit an error message because these
* are indications of repository corruption.
*/
static int curate_packed_ref_fn(struct ref_entry *entry, void *cb_data)
{
struct string_list *refs_to_delete = cb_data;
if (entry->flag & REF_ISBROKEN) {
/* This shouldn't happen to packed refs. */
error("%s is broken!", entry->name);
string_list_append(refs_to_delete, entry->name);
return 0;
}
if (!has_sha1_file(entry->u.value.sha1)) {
unsigned char sha1[20];
int flags;
if (read_ref_full(entry->name, 0, sha1, &flags))
/* We should at least have found the packed ref. */
die("Internal error");
if ((flags & REF_ISSYMREF) || !(flags & REF_ISPACKED)) {
/*
* This packed reference is overridden by a
* loose reference, so it is OK that its value
* is no longer valid; for example, it might
* refer to an object that has been garbage
* collected. For this purpose we don't even
* care whether the loose reference itself is
* invalid, broken, symbolic, etc. Silently
* remove the packed reference.
*/
string_list_append(refs_to_delete, entry->name);
return 0;
}
/*
* There is no overriding loose reference, so the fact
* that this reference doesn't refer to a valid object
* indicates some kind of repository corruption.
* Report the problem, then omit the reference from
* the output.
*/
error("%s does not point to a valid object!", entry->name);
string_list_append(refs_to_delete, entry->name);
return 0;
}
return 0;
}
int repack_without_refs(struct string_list *refnames, struct strbuf *err)
{
struct ref_dir *packed;
struct string_list refs_to_delete = STRING_LIST_INIT_DUP;
struct string_list_item *refname, *ref_to_delete;
struct string_list_item *refname;
int ret, needs_repacking = 0, removed = 0;
assert(err);
@ -2713,13 +2660,6 @@ int repack_without_refs(struct string_list *refnames, struct strbuf *err)
return 0;
}
/* Remove any other accumulated cruft */
do_for_each_entry_in_dir(packed, 0, curate_packed_ref_fn, &refs_to_delete);
for_each_string_list_item(ref_to_delete, &refs_to_delete) {
if (remove_entry(packed, ref_to_delete->string) == -1)
die("internal error");
}
/* Write what remains */
ret = commit_packed_refs();
if (ret)

114
t/t5312-prune-corruption.sh Executable file
View File

@ -0,0 +1,114 @@
#!/bin/sh
test_description='
Test pruning of repositories with minor corruptions. The goal
here is that we should always be erring on the side of safety. So
if we see, for example, a ref with a bogus name, it is OK either to
bail out or to proceed using it as a reachable tip, but it is _not_
OK to proceed as if it did not exist. Otherwise we might silently
delete objects that cannot be recovered.
'
. ./test-lib.sh
test_expect_success 'disable reflogs' '
git config core.logallrefupdates false &&
rm -rf .git/logs
'
test_expect_success 'create history reachable only from a bogus-named ref' '
test_tick && git commit --allow-empty -m master &&
base=$(git rev-parse HEAD) &&
test_tick && git commit --allow-empty -m bogus &&
bogus=$(git rev-parse HEAD) &&
git cat-file commit $bogus >saved &&
echo $bogus >.git/refs/heads/bogus..name &&
git reset --hard HEAD^
'
test_expect_success 'pruning does not drop bogus object' '
test_when_finished "git hash-object -w -t commit saved" &&
test_might_fail git prune --expire=now &&
verbose git cat-file -e $bogus
'
test_expect_success 'put bogus object into pack' '
git tag reachable $bogus &&
git repack -ad &&
git tag -d reachable &&
verbose git cat-file -e $bogus
'
test_expect_success 'destructive repack keeps packed object' '
test_might_fail git repack -Ad --unpack-unreachable=now &&
verbose git cat-file -e $bogus &&
test_might_fail git repack -ad &&
verbose git cat-file -e $bogus
'
# subsequent tests will have different corruptions
test_expect_success 'clean up bogus ref' '
rm .git/refs/heads/bogus..name
'
# We create two new objects here, "one" and "two". Our
# master branch points to "two", which is deleted,
# corrupting the repository. But we'd like to make sure
# that the otherwise unreachable "one" is not pruned
# (since it is the user's best bet for recovering
# from the corruption).
#
# Note that we also point HEAD somewhere besides "two",
# as we want to make sure we test the case where we
# pick up the reference to "two" by iterating the refs,
# not by resolving HEAD.
test_expect_success 'create history with missing tip commit' '
test_tick && git commit --allow-empty -m one &&
recoverable=$(git rev-parse HEAD) &&
git cat-file commit $recoverable >saved &&
test_tick && git commit --allow-empty -m two &&
missing=$(git rev-parse HEAD) &&
git checkout --detach $base &&
rm .git/objects/$(echo $missing | sed "s,..,&/,") &&
test_must_fail git cat-file -e $missing
'
test_expect_success 'pruning with a corrupted tip does not drop history' '
test_when_finished "git hash-object -w -t commit saved" &&
test_might_fail git prune --expire=now &&
verbose git cat-file -e $recoverable
'
test_expect_success 'pack-refs does not silently delete broken loose ref' '
git pack-refs --all --prune &&
echo $missing >expect &&
git rev-parse refs/heads/master >actual &&
test_cmp expect actual
'
# we do not want to count on running pack-refs to
# actually pack it, as it is perfectly reasonable to
# skip processing a broken ref
test_expect_success 'create packed-refs file with broken ref' '
rm -f .git/refs/heads/master &&
cat >.git/packed-refs <<-EOF &&
$missing refs/heads/master
$recoverable refs/heads/other
EOF
echo $missing >expect &&
git rev-parse refs/heads/master >actual &&
test_cmp expect actual
'
test_expect_success 'pack-refs does not silently delete broken packed ref' '
git pack-refs --all --prune &&
git rev-parse refs/heads/master >actual &&
test_cmp expect actual
'
test_expect_success 'pack-refs does not drop broken refs during deletion' '
git update-ref -d refs/heads/other &&
git rev-parse refs/heads/master >actual &&
test_cmp expect actual
'
test_done