Merge branch 'ab/fsck-skiplist'

Update fsck.skipList implementation and documentation.

* ab/fsck-skiplist:
  fsck: support comments & empty lines in skipList
  fsck: use oidset instead of oid_array for skipList
  fsck: use strbuf_getline() to read skiplist file
  fsck: add a performance test for skipList
  fsck: add a performance test
  fsck: document that skipList input must be unabbreviated
  fsck: document and test commented & empty line skipList input
  fsck: document and test sorted skipList input
  fsck tests: add a test for no skipList input
  fsck tests: setup of bogus commit object
This commit is contained in:
Junio C Hamano 2018-10-10 12:37:16 +09:00
commit 66ec2373fe
6 changed files with 171 additions and 45 deletions

View File

@ -1567,12 +1567,16 @@ doing the same for `receive.fsck.<msg-id>` and `fetch.fsck.<msg-id>`
will only cause git to warn. will only cause git to warn.
fsck.skipList:: fsck.skipList::
The path to a sorted list of object names (i.e. one SHA-1 per The path to a list of object names (i.e. one unabbreviated SHA-1 per
line) that are known to be broken in a non-fatal way and should line) that are known to be broken in a non-fatal way and should
be ignored. This feature is useful when an established project be ignored. On versions of Git 2.20 and later comments ('#'), empty
should be accepted despite early commits containing errors that lines, and any leading and trailing whitespace is ignored. Everything
can be safely ignored such as invalid committer email addresses. but a SHA-1 per line will error out on older versions.
Note: corrupt objects cannot be skipped with this setting. +
This feature is useful when an established project should be accepted
despite early commits containing errors that can be safely ignored
such as invalid committer email addresses. Note: corrupt objects
cannot be skipped with this setting.
+ +
Like `fsck.<msg-id>` this variable has corresponding Like `fsck.<msg-id>` this variable has corresponding
`receive.fsck.skipList` and `fetch.fsck.skipList` variants. `receive.fsck.skipList` and `fetch.fsck.skipList` variants.
@ -1582,6 +1586,15 @@ Unlike variables like `color.ui` and `core.editor` the
fall back on the `fsck.skipList` configuration if they aren't set. To fall back on the `fsck.skipList` configuration if they aren't set. To
uniformly configure the same fsck settings in different circumstances uniformly configure the same fsck settings in different circumstances
all three of them they must all set to the same values. all three of them they must all set to the same values.
+
Older versions of Git (before 2.20) documented that the object names
list should be sorted. This was never a requirement, the object names
could appear in any order, but when reading the list we tracked whether
the list was sorted for the purposes of an internal binary search
implementation, which could save itself some work with an already sorted
list. Unless you had a humongous list there was no reason to go out of
your way to pre-sort the list. After Git version 2.20 a hash implementation
is used instead, so there's now no reason to pre-sort the list.
gc.aggressiveDepth:: gc.aggressiveDepth::
The depth parameter used in the delta compression The depth parameter used in the delta compression

60
fsck.c
View File

@ -10,7 +10,6 @@
#include "fsck.h" #include "fsck.h"
#include "refs.h" #include "refs.h"
#include "utf8.h" #include "utf8.h"
#include "sha1-array.h"
#include "decorate.h" #include "decorate.h"
#include "oidset.h" #include "oidset.h"
#include "packfile.h" #include "packfile.h"
@ -184,40 +183,37 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
static void init_skiplist(struct fsck_options *options, const char *path) static void init_skiplist(struct fsck_options *options, const char *path)
{ {
static struct oid_array skiplist = OID_ARRAY_INIT; FILE *fp;
int sorted, fd; struct strbuf sb = STRBUF_INIT;
char buffer[GIT_MAX_HEXSZ + 1];
struct object_id oid; struct object_id oid;
if (options->skiplist) fp = fopen(path, "r");
sorted = options->skiplist->sorted; if (!fp)
else {
sorted = 1;
options->skiplist = &skiplist;
}
fd = open(path, O_RDONLY);
if (fd < 0)
die("Could not open skip list: %s", path); die("Could not open skip list: %s", path);
for (;;) { while (!strbuf_getline(&sb, fp)) {
const char *p; const char *p;
int result = read_in_full(fd, buffer, sizeof(buffer)); const char *hash;
if (result < 0)
die_errno("Could not read '%s'", path);
if (!result)
break;
if (parse_oid_hex(buffer, &oid, &p) || *p != '\n')
die("Invalid SHA-1: %s", buffer);
oid_array_append(&skiplist, &oid);
if (sorted && skiplist.nr > 1 &&
oidcmp(&skiplist.oid[skiplist.nr - 2],
&oid) > 0)
sorted = 0;
}
close(fd);
if (sorted) /*
skiplist.sorted = 1; * Allow trailing comments, leading whitespace
* (including before commits), and empty or whitespace
* only lines.
*/
hash = strchr(sb.buf, '#');
if (hash)
strbuf_setlen(&sb, hash - sb.buf);
strbuf_trim(&sb);
if (!sb.len)
continue;
if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
die("Invalid SHA-1: %s", sb.buf);
oidset_insert(&options->skiplist, &oid);
}
if (ferror(fp))
die_errno("Could not read '%s'", path);
fclose(fp);
strbuf_release(&sb);
} }
static int parse_msg_type(const char *str) static int parse_msg_type(const char *str)
@ -322,9 +318,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id)
static int object_on_skiplist(struct fsck_options *opts, struct object *obj) static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
{ {
if (opts && opts->skiplist && obj) return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
return oid_array_lookup(opts->skiplist, &obj->oid) >= 0;
return 0;
} }
__attribute__((format (printf, 4, 5))) __attribute__((format (printf, 4, 5)))

8
fsck.h
View File

@ -1,6 +1,8 @@
#ifndef GIT_FSCK_H #ifndef GIT_FSCK_H
#define GIT_FSCK_H #define GIT_FSCK_H
#include "oidset.h"
#define FSCK_ERROR 1 #define FSCK_ERROR 1
#define FSCK_WARN 2 #define FSCK_WARN 2
#define FSCK_IGNORE 3 #define FSCK_IGNORE 3
@ -35,12 +37,12 @@ struct fsck_options {
fsck_error error_func; fsck_error error_func;
unsigned strict:1; unsigned strict:1;
int *msg_type; int *msg_type;
struct oid_array *skiplist; struct oidset skiplist;
struct decoration *object_names; struct decoration *object_names;
}; };
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL } #define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL } #define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT }
/* descend in all linked child objects /* descend in all linked child objects
* the return value is: * the return value is:

13
t/perf/p1450-fsck.sh Executable file
View File

@ -0,0 +1,13 @@
#!/bin/sh
test_description='Test fsck performance'
. ./perf-lib.sh
test_perf_large_repo
test_perf 'fsck' '
git fsck
'
test_done

40
t/perf/p1451-fsck-skip-list.sh Executable file
View File

@ -0,0 +1,40 @@
#!/bin/sh
test_description='Test fsck skipList performance'
. ./perf-lib.sh
test_perf_fresh_repo
n=1000000
test_expect_success "setup $n bad commits" '
for i in $(test_seq 1 $n)
do
echo "commit refs/heads/master" &&
echo "committer C <c@example.com> 1234567890 +0000" &&
echo "data <<EOF" &&
echo "$i.Q." &&
echo "EOF"
done | q_to_nul | git fast-import
'
skip=0
while test $skip -le $n
do
test_expect_success "create skipList for $skip bad commits" '
git log --format=%H --max-count=$skip |
sort >skiplist
'
test_perf "fsck with $skip skipped bad commits" '
git -c fsck.skipList=skiplist fsck
'
case $skip in
0) skip=1 ;;
*) skip=${skip}0 ;;
esac
done
test_done

View File

@ -133,6 +133,34 @@ committer Bugs Bunny <bugs@bun.ni> 1234567890 +0000
This commit object intentionally broken This commit object intentionally broken
EOF EOF
test_expect_success 'setup bogus commit' '
commit="$(git hash-object -t commit -w --stdin <bogus-commit)"
'
test_expect_success 'fsck with no skipList input' '
test_must_fail git fsck 2>err &&
test_i18ngrep "missingEmail" err
'
test_expect_success 'setup sorted and unsorted skipLists' '
cat >SKIP.unsorted <<-EOF &&
0000000000000000000000000000000000000004
0000000000000000000000000000000000000002
$commit
0000000000000000000000000000000000000001
0000000000000000000000000000000000000003
EOF
sort SKIP.unsorted >SKIP.sorted
'
test_expect_success 'fsck with sorted skipList' '
git -c fsck.skipList=SKIP.sorted fsck
'
test_expect_success 'fsck with unsorted skipList' '
git -c fsck.skipList=SKIP.unsorted fsck
'
test_expect_success 'fsck with invalid or bogus skipList input' ' test_expect_success 'fsck with invalid or bogus skipList input' '
git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck && git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck &&
test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err && test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err &&
@ -141,8 +169,47 @@ test_expect_success 'fsck with invalid or bogus skipList input' '
test_i18ngrep "Invalid SHA-1: \[core\]" err test_i18ngrep "Invalid SHA-1: \[core\]" err
' '
test_expect_success 'fsck with other accepted skipList input (comments & empty lines)' '
cat >SKIP.with-comment <<-EOF &&
# Some bad commit
0000000000000000000000000000000000000001
EOF
test_must_fail git -c fsck.skipList=SKIP.with-comment fsck 2>err-with-comment &&
test_i18ngrep "missingEmail" err-with-comment &&
cat >SKIP.with-empty-line <<-EOF &&
0000000000000000000000000000000000000001
0000000000000000000000000000000000000002
EOF
test_must_fail git -c fsck.skipList=SKIP.with-empty-line fsck 2>err-with-empty-line &&
test_i18ngrep "missingEmail" err-with-empty-line
'
test_expect_success 'fsck no garbage output from comments & empty lines errors' '
test_line_count = 1 err-with-comment &&
test_line_count = 1 err-with-empty-line
'
test_expect_success 'fsck with invalid abbreviated skipList input' '
echo $commit | test_copy_bytes 20 >SKIP.abbreviated &&
test_must_fail git -c fsck.skipList=SKIP.abbreviated fsck 2>err-abbreviated &&
test_i18ngrep "^fatal: Invalid SHA-1: " err-abbreviated
'
test_expect_success 'fsck with exhaustive accepted skipList input (various types of comments etc.)' '
>SKIP.exhaustive &&
echo "# A commented line" >>SKIP.exhaustive &&
echo "" >>SKIP.exhaustive &&
echo " " >>SKIP.exhaustive &&
echo " # Comment after whitespace" >>SKIP.exhaustive &&
echo "$commit # Our bad commit (with leading whitespace and trailing comment)" >>SKIP.exhaustive &&
echo "# Some bad commit (leading whitespace)" >>SKIP.exhaustive &&
echo " 0000000000000000000000000000000000000001" >>SKIP.exhaustive &&
git -c fsck.skipList=SKIP.exhaustive fsck 2>err &&
test_must_be_empty err
'
test_expect_success 'push with receive.fsck.skipList' ' test_expect_success 'push with receive.fsck.skipList' '
commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
git push . $commit:refs/heads/bogus && git push . $commit:refs/heads/bogus &&
rm -rf dst && rm -rf dst &&
git init dst && git init dst &&
@ -169,7 +236,6 @@ test_expect_success 'push with receive.fsck.skipList' '
' '
test_expect_success 'fetch with fetch.fsck.skipList' ' test_expect_success 'fetch with fetch.fsck.skipList' '
commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
refspec=refs/heads/bogus:refs/heads/bogus && refspec=refs/heads/bogus:refs/heads/bogus &&
git push . $commit:refs/heads/bogus && git push . $commit:refs/heads/bogus &&
rm -rf dst && rm -rf dst &&
@ -204,7 +270,6 @@ test_expect_success 'fsck.<unknownmsg-id> dies' '
' '
test_expect_success 'push with receive.fsck.missingEmail=warn' ' test_expect_success 'push with receive.fsck.missingEmail=warn' '
commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
git push . $commit:refs/heads/bogus && git push . $commit:refs/heads/bogus &&
rm -rf dst && rm -rf dst &&
git init dst && git init dst &&
@ -232,7 +297,6 @@ test_expect_success 'push with receive.fsck.missingEmail=warn' '
' '
test_expect_success 'fetch with fetch.fsck.missingEmail=warn' ' test_expect_success 'fetch with fetch.fsck.missingEmail=warn' '
commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
refspec=refs/heads/bogus:refs/heads/bogus && refspec=refs/heads/bogus:refs/heads/bogus &&
git push . $commit:refs/heads/bogus && git push . $commit:refs/heads/bogus &&
rm -rf dst && rm -rf dst &&