b897bf5f37
Our anonymize_mem() function is careful to take a ptr/len pair to allow storing binary tokens like object ids, as well as partial strings (e.g., just "foo" of "foo/bar"). But it duplicates the hash key using xstrdup()! That means that: - for a partial string, we'd store all bytes up to the NUL, even though we'd never look at anything past "len". This didn't produce wrong behavior, but was wasteful. - for a binary oid that doesn't contain a zero byte, we'd copy garbage bytes off the end of the array (though as long as nothing complained about reading uninitialized bytes, further reads would be limited by "len", and we'd produce the correct results) - for a binary oid that does contain a zero byte, we'd copy _fewer_ bytes than intended into the hashmap struct. When we later try to look up a value, we'd access uninitialized memory and potentially falsely claim that a particular oid is not present. The most common reason to store an oid is an anonymized gitlink, but our test case doesn't have any gitlinks at all. So let's add one whose oid contains a NUL and is present at two different paths. ASan catches the memory error, but even without it we can detect the bug because the oid is not anonymized the same way for both paths. And of course the fix is to copy the correct number of bytes. We don't technically need the appended NUL from xmemdupz(), but it doesn't hurt as an extra protection against anybody treating it like a string (plus a future patch will push us more in that direction). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
124 lines
3.4 KiB
Bash
Executable File
124 lines
3.4 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
test_description='basic tests for fast-export --anonymize'
|
|
. ./test-lib.sh
|
|
|
|
test_expect_success 'setup simple repo' '
|
|
test_commit base &&
|
|
test_commit foo &&
|
|
git checkout -b other HEAD^ &&
|
|
mkdir subdir &&
|
|
test_commit subdir/bar &&
|
|
test_commit subdir/xyzzy &&
|
|
fake_commit=$(echo $ZERO_OID | sed s/0/a/) &&
|
|
git update-index --add --cacheinfo 160000,$fake_commit,link1 &&
|
|
git update-index --add --cacheinfo 160000,$fake_commit,link2 &&
|
|
git commit -m "add gitlink" &&
|
|
git tag -m "annotated tag" mytag
|
|
'
|
|
|
|
test_expect_success 'export anonymized stream' '
|
|
git fast-export --anonymize --all >stream
|
|
'
|
|
|
|
# this also covers commit messages
|
|
test_expect_success 'stream omits path names' '
|
|
! grep base stream &&
|
|
! grep foo stream &&
|
|
! grep subdir stream &&
|
|
! grep bar stream &&
|
|
! grep xyzzy stream
|
|
'
|
|
|
|
test_expect_success 'stream omits gitlink oids' '
|
|
# avoid relying on the whole oid to remain hash-agnostic; this is
|
|
# plenty to be unique within our test case
|
|
! grep a000000000000000000 stream
|
|
'
|
|
|
|
test_expect_success 'stream allows master as refname' '
|
|
grep master stream
|
|
'
|
|
|
|
test_expect_success 'stream omits other refnames' '
|
|
! grep other stream &&
|
|
! grep mytag stream
|
|
'
|
|
|
|
test_expect_success 'stream omits identities' '
|
|
! grep "$GIT_COMMITTER_NAME" stream &&
|
|
! grep "$GIT_COMMITTER_EMAIL" stream &&
|
|
! grep "$GIT_AUTHOR_NAME" stream &&
|
|
! grep "$GIT_AUTHOR_EMAIL" stream
|
|
'
|
|
|
|
test_expect_success 'stream omits tag message' '
|
|
! grep "annotated tag" stream
|
|
'
|
|
|
|
# NOTE: we chdir to the new, anonymized repository
|
|
# after this. All further tests should assume this.
|
|
test_expect_success 'import stream to new repository' '
|
|
git init new &&
|
|
cd new &&
|
|
git fast-import <../stream
|
|
'
|
|
|
|
test_expect_success 'result has two branches' '
|
|
git for-each-ref --format="%(refname)" refs/heads >branches &&
|
|
test_line_count = 2 branches &&
|
|
other_branch=$(grep -v refs/heads/master branches)
|
|
'
|
|
|
|
test_expect_success 'repo has original shape and timestamps' '
|
|
shape () {
|
|
git log --format="%m %ct" --left-right --boundary "$@"
|
|
} &&
|
|
(cd .. && shape master...other) >expect &&
|
|
shape master...$other_branch >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'root tree has original shape' '
|
|
# the output entries are not necessarily in the same
|
|
# order, but we should at least have the same set of
|
|
# object types.
|
|
git -C .. ls-tree HEAD >orig-root &&
|
|
cut -d" " -f2 <orig-root | sort >expect &&
|
|
git ls-tree $other_branch >root &&
|
|
cut -d" " -f2 <root | sort >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'paths in subdir ended up in one tree' '
|
|
git -C .. ls-tree other:subdir >orig-subdir &&
|
|
cut -d" " -f2 <orig-subdir | sort >expect &&
|
|
tree=$(grep tree root | cut -f2) &&
|
|
git ls-tree $other_branch:$tree >tree &&
|
|
cut -d" " -f2 <tree >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'identical gitlinks got identical oid' '
|
|
awk "/commit/ { print \$3 }" <root | sort -u >commits &&
|
|
test_line_count = 1 commits
|
|
'
|
|
|
|
test_expect_success 'tag points to branch tip' '
|
|
git rev-parse $other_branch >expect &&
|
|
git for-each-ref --format="%(*objectname)" | grep . >actual &&
|
|
test_cmp expect actual
|
|
'
|
|
|
|
test_expect_success 'idents are shared' '
|
|
git log --all --format="%an <%ae>" >authors &&
|
|
sort -u authors >unique &&
|
|
test_line_count = 1 unique &&
|
|
git log --all --format="%cn <%ce>" >committers &&
|
|
sort -u committers >unique &&
|
|
test_line_count = 1 unique &&
|
|
! test_cmp authors committers
|
|
'
|
|
|
|
test_done
|