git-commit-vandalism/t/t7002-grep.sh

332 lines
6.7 KiB
Bash
Raw Normal View History

#!/bin/sh
#
# Copyright (c) 2006 Junio C Hamano
#
test_description='git grep various.
'
. ./test-lib.sh
cat >hello.c <<EOF
#include <stdio.h>
int main(int argc, const char **argv)
{
printf("Hello world.\n");
return 0;
}
EOF
test_expect_success setup '
{
echo foo mmap bar
echo foo_mmap bar
echo foo_mmap bar mmap
echo foo mmap bar_mmap
echo foo_mmap bar mmap baz
} >file &&
echo vvv >v &&
echo ww w >w &&
echo x x xx x >x &&
echo y yy >y &&
echo zzz > z &&
mkdir t &&
echo test >t/t &&
echo vvv >t/v &&
mkdir t/a &&
echo vvv >t/a/v &&
git add . &&
log --author/--committer: really match only with name part When we tried to find commits done by AUTHOR, the first implementation tried to pattern match a line with "^author .*AUTHOR", which later was enhanced to strip leading caret and look for "^author AUTHOR" when the search pattern was anchored at the left end (i.e. --author="^AUTHOR"). This had a few problems: * When looking for fixed strings (e.g. "git log -F --author=x --grep=y"), the regexp internally used "^author .*x" would never match anything; * To match at the end (e.g. "git log --author='google.com>$'"), the generated regexp has to also match the trailing timestamp part the commit header lines have. Also, in order to determine if the '$' at the end means "match at the end of the line" or just a literal dollar sign (probably backslash-quoted), we would need to parse the regexp ourselves. An earlier alternative tried to make sure that a line matches "^author " (to limit by field name) and the user supplied pattern at the same time. While it solved the -F problem by introducing a special override for matching the "^author ", it did not solve the trailing timestamp nor tail match problem. It also would have matched every commit if --author=author was asked for, not because the author's email part had this string, but because every commit header line that talks about the author begins with that field name, regardleses of who wrote it. Instead of piling more hacks on top of hacks, this rethinks the grep machinery that is used to look for strings in the commit header, and makes sure that (1) field name matches literally at the beginning of the line, followed by a SP, and (2) the user supplied pattern is matched against the remainder of the line, excluding the trailing timestamp data. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-05 07:15:02 +02:00
test_tick &&
git commit -m initial
'
test_expect_success 'grep should not segfault with a bad input' '
test_must_fail git grep "("
'
for H in HEAD ''
do
case "$H" in
HEAD) HC='HEAD:' L='HEAD' ;;
'') HC= L='in working tree' ;;
esac
test_expect_success "grep -w $L" '
{
echo ${HC}file:1:foo mmap bar
echo ${HC}file:3:foo_mmap bar mmap
echo ${HC}file:4:foo mmap bar_mmap
echo ${HC}file:5:foo_mmap bar mmap baz
} >expected &&
git grep -n -w -e mmap $H >actual &&
diff expected actual
'
test_expect_success "grep -w $L (w)" '
: >expected &&
! git grep -n -w -e "^w" >actual &&
test_cmp expected actual
'
test_expect_success "grep -w $L (x)" '
{
echo ${HC}x:1:x x xx x
} >expected &&
git grep -n -w -e "x xx* x" $H >actual &&
diff expected actual
'
test_expect_success "grep -w $L (y-1)" '
{
echo ${HC}y:1:y yy
} >expected &&
git grep -n -w -e "^y" $H >actual &&
diff expected actual
'
test_expect_success "grep -w $L (y-2)" '
: >expected &&
if git grep -n -w -e "^y y" $H >actual
then
echo should not have matched
cat actual
false
else
diff expected actual
fi
'
test_expect_success "grep -w $L (z)" '
: >expected &&
if git grep -n -w -e "^z" $H >actual
then
echo should not have matched
cat actual
false
else
diff expected actual
fi
'
test_expect_success "grep $L (t-1)" '
echo "${HC}t/t:1:test" >expected &&
git grep -n -e test $H >actual &&
diff expected actual
'
test_expect_success "grep $L (t-2)" '
echo "${HC}t:1:test" >expected &&
(
cd t &&
git grep -n -e test $H
) >actual &&
diff expected actual
'
test_expect_success "grep $L (t-3)" '
echo "${HC}t/t:1:test" >expected &&
(
cd t &&
git grep --full-name -n -e test $H
) >actual &&
diff expected actual
'
Sane use of test_expect_failure Originally, test_expect_failure was designed to be the opposite of test_expect_success, but this was a bad decision. Most tests run a series of commands that leads to the single command that needs to be tested, like this: test_expect_{success,failure} 'test title' ' setup1 && setup2 && setup3 && what is to be tested ' And expecting a failure exit from the whole sequence misses the point of writing tests. Your setup$N that are supposed to succeed may have failed without even reaching what you are trying to test. The only valid use of test_expect_failure is to check a trivial single command that is expected to fail, which is a minority in tests of Porcelain-ish commands. This large-ish patch rewrites all uses of test_expect_failure to use test_expect_success and rewrites the condition of what is tested, like this: test_expect_success 'test title' ' setup1 && setup2 && setup3 && ! this command should fail ' test_expect_failure is redefined to serve as a reminder that that test *should* succeed but due to a known breakage in git it currently does not pass. So if git-foo command should create a file 'bar' but you discovered a bug that it doesn't, you can write a test like this: test_expect_failure 'git-foo should create bar' ' rm -f bar && git foo && test -f bar ' This construct acts similar to test_expect_success, but instead of reporting "ok/FAIL" like test_expect_success does, the outcome is reported as "FIXED/still broken". Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-01 10:50:53 +01:00
test_expect_success "grep -c $L (no /dev/null)" '
! git grep -c test $H | grep /dev/null
'
test_expect_success "grep --max-depth -1 $L" '
{
echo ${HC}t/a/v:1:vvv
echo ${HC}t/v:1:vvv
echo ${HC}v:1:vvv
} >expected &&
git grep --max-depth -1 -n -e vvv $H >actual &&
test_cmp expected actual
'
test_expect_success "grep --max-depth 0 $L" '
{
echo ${HC}v:1:vvv
} >expected &&
git grep --max-depth 0 -n -e vvv $H >actual &&
test_cmp expected actual
'
test_expect_success "grep --max-depth 0 -- '*' $L" '
{
echo ${HC}t/a/v:1:vvv
echo ${HC}t/v:1:vvv
echo ${HC}v:1:vvv
} >expected &&
git grep --max-depth 0 -n -e vvv $H -- "*" >actual &&
test_cmp expected actual
'
test_expect_success "grep --max-depth 1 $L" '
{
echo ${HC}t/v:1:vvv
echo ${HC}v:1:vvv
} >expected &&
git grep --max-depth 1 -n -e vvv $H >actual &&
test_cmp expected actual
'
test_expect_success "grep --max-depth 0 -- t $L" '
{
echo ${HC}t/v:1:vvv
} >expected &&
git grep --max-depth 0 -n -e vvv $H -- t >actual &&
test_cmp expected actual
'
done
cat >expected <<EOF
file:foo mmap bar_mmap
EOF
test_expect_success 'grep -e A --and -e B' '
git grep -e "foo mmap" --and -e bar_mmap >actual &&
test_cmp expected actual
'
cat >expected <<EOF
file:foo_mmap bar mmap
file:foo_mmap bar mmap baz
EOF
test_expect_success 'grep ( -e A --or -e B ) --and -e B' '
git grep \( -e foo_ --or -e baz \) \
--and -e " mmap" >actual &&
test_cmp expected actual
'
cat >expected <<EOF
file:foo mmap bar
EOF
test_expect_success 'grep -e A --and --not -e B' '
git grep -e "foo mmap" --and --not -e bar_mmap >actual &&
test_cmp expected actual
'
cat >expected <<EOF
y:y yy
--
z:zzz
EOF
# Create 1024 file names that sort between "y" and "z" to make sure
# the two files are handled by different calls to an external grep.
# This depends on MAXARGS in builtin-grep.c being 1024 or less.
c32="0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v"
test_expect_success 'grep -C1, hunk mark between files' '
for a in $c32; do for b in $c32; do : >y-$a$b; done; done &&
git add y-?? &&
git grep -C1 "^[yz]" >actual &&
test_cmp expected actual
'
test_expect_success 'grep -C1 --no-ext-grep, hunk mark between files' '
git grep -C1 --no-ext-grep "^[yz]" >actual &&
test_cmp expected actual
'
log --author/--committer: really match only with name part When we tried to find commits done by AUTHOR, the first implementation tried to pattern match a line with "^author .*AUTHOR", which later was enhanced to strip leading caret and look for "^author AUTHOR" when the search pattern was anchored at the left end (i.e. --author="^AUTHOR"). This had a few problems: * When looking for fixed strings (e.g. "git log -F --author=x --grep=y"), the regexp internally used "^author .*x" would never match anything; * To match at the end (e.g. "git log --author='google.com>$'"), the generated regexp has to also match the trailing timestamp part the commit header lines have. Also, in order to determine if the '$' at the end means "match at the end of the line" or just a literal dollar sign (probably backslash-quoted), we would need to parse the regexp ourselves. An earlier alternative tried to make sure that a line matches "^author " (to limit by field name) and the user supplied pattern at the same time. While it solved the -F problem by introducing a special override for matching the "^author ", it did not solve the trailing timestamp nor tail match problem. It also would have matched every commit if --author=author was asked for, not because the author's email part had this string, but because every commit header line that talks about the author begins with that field name, regardleses of who wrote it. Instead of piling more hacks on top of hacks, this rethinks the grep machinery that is used to look for strings in the commit header, and makes sure that (1) field name matches literally at the beginning of the line, followed by a SP, and (2) the user supplied pattern is matched against the remainder of the line, excluding the trailing timestamp data. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-05 07:15:02 +02:00
test_expect_success 'log grep setup' '
echo a >>file &&
test_tick &&
GIT_AUTHOR_NAME="With * Asterisk" \
GIT_AUTHOR_EMAIL="xyzzy@frotz.com" \
git commit -a -m "second" &&
echo a >>file &&
test_tick &&
git commit -a -m "third"
'
test_expect_success 'log grep (1)' '
git log --author=author --pretty=tformat:%s >actual &&
( echo third ; echo initial ) >expect &&
test_cmp expect actual
'
test_expect_success 'log grep (2)' '
git log --author=" * " -F --pretty=tformat:%s >actual &&
( echo second ) >expect &&
test_cmp expect actual
'
test_expect_success 'log grep (3)' '
git log --author="^A U" --pretty=tformat:%s >actual &&
( echo third ; echo initial ) >expect &&
test_cmp expect actual
'
test_expect_success 'log grep (4)' '
git log --author="frotz\.com>$" --pretty=tformat:%s >actual &&
( echo second ) >expect &&
test_cmp expect actual
'
test_expect_success 'log grep (5)' '
git log --author=Thor -F --grep=Thu --pretty=tformat:%s >actual &&
( echo third ; echo initial ) >expect &&
test_cmp expect actual
'
test_expect_success 'log grep (6)' '
git log --author=-0700 --pretty=tformat:%s >actual &&
>expect &&
test_cmp expect actual
'
log --author/--committer: really match only with name part When we tried to find commits done by AUTHOR, the first implementation tried to pattern match a line with "^author .*AUTHOR", which later was enhanced to strip leading caret and look for "^author AUTHOR" when the search pattern was anchored at the left end (i.e. --author="^AUTHOR"). This had a few problems: * When looking for fixed strings (e.g. "git log -F --author=x --grep=y"), the regexp internally used "^author .*x" would never match anything; * To match at the end (e.g. "git log --author='google.com>$'"), the generated regexp has to also match the trailing timestamp part the commit header lines have. Also, in order to determine if the '$' at the end means "match at the end of the line" or just a literal dollar sign (probably backslash-quoted), we would need to parse the regexp ourselves. An earlier alternative tried to make sure that a line matches "^author " (to limit by field name) and the user supplied pattern at the same time. While it solved the -F problem by introducing a special override for matching the "^author ", it did not solve the trailing timestamp nor tail match problem. It also would have matched every commit if --author=author was asked for, not because the author's email part had this string, but because every commit header line that talks about the author begins with that field name, regardleses of who wrote it. Instead of piling more hacks on top of hacks, this rethinks the grep machinery that is used to look for strings in the commit header, and makes sure that (1) field name matches literally at the beginning of the line, followed by a SP, and (2) the user supplied pattern is matched against the remainder of the line, excluding the trailing timestamp data. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-05 07:15:02 +02:00
test_expect_success 'grep with CE_VALID file' '
git update-index --assume-unchanged t/t &&
rm t/t &&
test "$(git grep --no-ext-grep test)" = "t/t:test" &&
git update-index --no-assume-unchanged t/t &&
git checkout t/t
log --author/--committer: really match only with name part When we tried to find commits done by AUTHOR, the first implementation tried to pattern match a line with "^author .*AUTHOR", which later was enhanced to strip leading caret and look for "^author AUTHOR" when the search pattern was anchored at the left end (i.e. --author="^AUTHOR"). This had a few problems: * When looking for fixed strings (e.g. "git log -F --author=x --grep=y"), the regexp internally used "^author .*x" would never match anything; * To match at the end (e.g. "git log --author='google.com>$'"), the generated regexp has to also match the trailing timestamp part the commit header lines have. Also, in order to determine if the '$' at the end means "match at the end of the line" or just a literal dollar sign (probably backslash-quoted), we would need to parse the regexp ourselves. An earlier alternative tried to make sure that a line matches "^author " (to limit by field name) and the user supplied pattern at the same time. While it solved the -F problem by introducing a special override for matching the "^author ", it did not solve the trailing timestamp nor tail match problem. It also would have matched every commit if --author=author was asked for, not because the author's email part had this string, but because every commit header line that talks about the author begins with that field name, regardleses of who wrote it. Instead of piling more hacks on top of hacks, this rethinks the grep machinery that is used to look for strings in the commit header, and makes sure that (1) field name matches literally at the beginning of the line, followed by a SP, and (2) the user supplied pattern is matched against the remainder of the line, excluding the trailing timestamp data. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-05 07:15:02 +02:00
'
cat >expected <<EOF
hello.c=#include <stdio.h>
hello.c: return 0;
EOF
test_expect_success 'grep -p with userdiff' '
git config diff.custom.funcname "^#" &&
echo "hello.c diff=custom" >.gitattributes &&
git grep -p return >actual &&
test_cmp expected actual
'
cat >expected <<EOF
hello.c=int main(int argc, const char **argv)
hello.c: return 0;
EOF
test_expect_success 'grep -p' '
rm -f .gitattributes &&
git grep -p return >actual &&
test_cmp expected actual
'
cat >expected <<EOF
hello.c-#include <stdio.h>
hello.c=int main(int argc, const char **argv)
hello.c-{
hello.c- printf("Hello world.\n");
hello.c: return 0;
EOF
test_expect_success 'grep -p -B5' '
git grep -p -B5 return >actual &&
test_cmp expected actual
'
test_done