2006-10-12 14:22:14 +02:00
|
|
|
#!/bin/sh
|
|
|
|
#
|
|
|
|
# Copyright (c) 2006 Johannes E. Schindelin
|
|
|
|
#
|
|
|
|
|
|
|
|
test_description='Test special whitespace in diff engine.
|
|
|
|
|
|
|
|
'
|
diff.c: fix a double-free regression in a18d66cefb
My a18d66cefb9 (diff.c: free "buf" in diff_words_flush(), 2022-03-04)
has what it retrospect is a rather obvious bug (I don't know what I
was thinking, if it all): We use the "emitted_symbols" allocation in
append_emitted_diff_symbol() N times, but starting with a18d66cefb9
we'd free it after its first use!
The correct way to free this data would have been to add the free() to
the existing free_diff_words_data() function, so let's do that. The
"ecbdata->diff_words->opt->emitted_symbols" might be NULL, so let's
add a trivial free_emitted_diff_symbols() helper next to the function
that appends to it.
This fixes the "no effect on show from" leak tested for in the
preceding commit. Perhaps confusingly this change will skip that test
under SANITIZE=leak, but otherwise opt-in the
"t4015-diff-whitespace.sh" test.
The reason is that a18d66cefb9 "fixed" the leak in the preceding "no
effect on diff" test, but for the first call to diff_words_flush() the
"wol->buf" would be NULL, so we wouldn't double-free (and
SANITIZE=address would see nothing amiss). With this change we'll
still pass that test, showing that we've also fixed leaks on this
codepath.
We then have to skip the new "no effect on show" test because it
happens to trip over an unrelated memory leak (in revision.c). The
same goes for "move detection with submodules". Both of them pass with
SANITIZE=address though, which would error on the "no effect on show"
test before this change.
Reported-by: Michael J Gruber <git@grubix.eu>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-17 15:55:35 +01:00
|
|
|
|
|
|
|
TEST_PASSES_SANITIZE_LEAK=true
|
2006-10-12 14:22:14 +02:00
|
|
|
. ./test-lib.sh
|
2021-02-12 14:29:40 +01:00
|
|
|
. "$TEST_DIRECTORY"/lib-diff.sh
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
test_expect_success "Ray Lehtiniemi's example" '
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
do {
|
|
|
|
nothing;
|
|
|
|
} while (0);
|
|
|
|
EOF
|
|
|
|
git update-index --add x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
old_hash_x=$(git hash-object x) &&
|
|
|
|
before=$(git rev-parse --short "$old_hash_x") &&
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
cat <<-\EOF >x &&
|
|
|
|
do
|
|
|
|
{
|
|
|
|
nothing;
|
|
|
|
}
|
|
|
|
while (0);
|
|
|
|
EOF
|
2019-11-27 20:53:24 +01:00
|
|
|
new_hash_x=$(git hash-object x) &&
|
|
|
|
after=$(git rev-parse --short "$new_hash_x") &&
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
cat <<-EOF >expect &&
|
2015-05-26 20:19:52 +02:00
|
|
|
diff --git a/x b/x
|
2019-10-28 01:59:00 +01:00
|
|
|
index $before..$after 100644
|
2015-05-26 20:19:52 +02:00
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,3 +1,5 @@
|
|
|
|
-do {
|
|
|
|
+do
|
|
|
|
+{
|
|
|
|
nothing;
|
|
|
|
-} while (0);
|
|
|
|
+}
|
|
|
|
+while (0);
|
|
|
|
EOF
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
git diff >out &&
|
|
|
|
test_cmp expect out &&
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
git diff -w >out &&
|
|
|
|
test_cmp expect out &&
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
git diff -b >out &&
|
|
|
|
test_cmp expect out
|
|
|
|
'
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
test_expect_success 'another test, without options' '
|
|
|
|
tr Q "\015" <<-\EOF >x &&
|
|
|
|
whitespace at beginning
|
|
|
|
whitespace change
|
|
|
|
whitespace in the middle
|
|
|
|
whitespace at end
|
|
|
|
unchanged line
|
|
|
|
CR at endQ
|
|
|
|
EOF
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
git update-index x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
old_hash_x=$(git hash-object x) &&
|
|
|
|
before=$(git rev-parse --short "$old_hash_x") &&
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
tr "_" " " <<-\EOF >x &&
|
|
|
|
_ whitespace at beginning
|
|
|
|
whitespace change
|
|
|
|
white space in the middle
|
|
|
|
whitespace at end__
|
|
|
|
unchanged line
|
|
|
|
CR at end
|
|
|
|
EOF
|
2019-11-27 20:53:24 +01:00
|
|
|
new_hash_x=$(git hash-object x) &&
|
|
|
|
after=$(git rev-parse --short "$new_hash_x") &&
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
tr "Q_" "\015 " <<-EOF >expect &&
|
2015-05-26 20:19:52 +02:00
|
|
|
diff --git a/x b/x
|
2019-10-28 01:59:00 +01:00
|
|
|
index $before..$after 100644
|
2015-05-26 20:19:52 +02:00
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,6 +1,6 @@
|
|
|
|
-whitespace at beginning
|
|
|
|
-whitespace change
|
|
|
|
-whitespace in the middle
|
|
|
|
-whitespace at end
|
|
|
|
+ whitespace at beginning
|
|
|
|
+whitespace change
|
|
|
|
+white space in the middle
|
|
|
|
+whitespace at end__
|
|
|
|
unchanged line
|
|
|
|
-CR at endQ
|
|
|
|
+CR at end
|
|
|
|
EOF
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
git diff >out &&
|
|
|
|
test_cmp expect out &&
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2015-05-26 20:19:52 +02:00
|
|
|
git diff -w >out &&
|
2018-07-27 19:48:11 +02:00
|
|
|
test_must_be_empty out &&
|
2015-05-26 20:19:52 +02:00
|
|
|
|
|
|
|
git diff -w -b >out &&
|
2018-07-27 19:48:11 +02:00
|
|
|
test_must_be_empty out &&
|
2015-05-26 20:19:52 +02:00
|
|
|
|
|
|
|
git diff -w --ignore-space-at-eol >out &&
|
2018-07-27 19:48:11 +02:00
|
|
|
test_must_be_empty out &&
|
2015-05-26 20:19:52 +02:00
|
|
|
|
|
|
|
git diff -w -b --ignore-space-at-eol >out &&
|
2018-07-27 19:48:11 +02:00
|
|
|
test_must_be_empty out &&
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2017-10-26 08:32:27 +02:00
|
|
|
git diff -w --ignore-cr-at-eol >out &&
|
2018-07-27 19:48:11 +02:00
|
|
|
test_must_be_empty out &&
|
2015-05-26 20:19:52 +02:00
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
tr "Q_" "\015 " <<-EOF >expect &&
|
2015-05-26 20:19:52 +02:00
|
|
|
diff --git a/x b/x
|
2019-10-28 01:59:00 +01:00
|
|
|
index $before..$after 100644
|
2015-05-26 20:19:52 +02:00
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,6 +1,6 @@
|
|
|
|
-whitespace at beginning
|
|
|
|
+_ whitespace at beginning
|
|
|
|
whitespace change
|
|
|
|
-whitespace in the middle
|
|
|
|
+white space in the middle
|
|
|
|
whitespace at end__
|
|
|
|
unchanged line
|
|
|
|
CR at end
|
|
|
|
EOF
|
|
|
|
git diff -b >out &&
|
|
|
|
test_cmp expect out &&
|
|
|
|
|
|
|
|
git diff -b --ignore-space-at-eol >out &&
|
|
|
|
test_cmp expect out &&
|
|
|
|
|
2017-10-26 08:32:27 +02:00
|
|
|
git diff -b --ignore-cr-at-eol >out &&
|
|
|
|
test_cmp expect out &&
|
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
tr "Q_" "\015 " <<-EOF >expect &&
|
2015-05-26 20:19:52 +02:00
|
|
|
diff --git a/x b/x
|
2019-10-28 01:59:00 +01:00
|
|
|
index $before..$after 100644
|
2015-05-26 20:19:52 +02:00
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,6 +1,6 @@
|
|
|
|
-whitespace at beginning
|
|
|
|
-whitespace change
|
|
|
|
-whitespace in the middle
|
|
|
|
+_ whitespace at beginning
|
|
|
|
+whitespace change
|
|
|
|
+white space in the middle
|
|
|
|
whitespace at end__
|
|
|
|
unchanged line
|
|
|
|
CR at end
|
|
|
|
EOF
|
|
|
|
git diff --ignore-space-at-eol >out &&
|
2017-10-26 08:32:27 +02:00
|
|
|
test_cmp expect out &&
|
|
|
|
|
|
|
|
git diff --ignore-space-at-eol --ignore-cr-at-eol >out &&
|
|
|
|
test_cmp expect out &&
|
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
tr "Q_" "\015 " <<-EOF >expect &&
|
2017-10-26 08:32:27 +02:00
|
|
|
diff --git a/x b/x
|
2019-10-28 01:59:00 +01:00
|
|
|
index_$before..$after 100644
|
2017-10-26 08:32:27 +02:00
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,6 +1,6 @@
|
|
|
|
-whitespace at beginning
|
|
|
|
-whitespace change
|
|
|
|
-whitespace in the middle
|
|
|
|
-whitespace at end
|
|
|
|
+_ whitespace at beginning
|
|
|
|
+whitespace_ _change
|
|
|
|
+white space in the middle
|
|
|
|
+whitespace at end__
|
|
|
|
unchanged line
|
|
|
|
CR at end
|
|
|
|
EOF
|
|
|
|
git diff --ignore-cr-at-eol >out &&
|
2015-05-26 20:19:52 +02:00
|
|
|
test_cmp expect out
|
|
|
|
'
|
2006-10-12 14:22:14 +02:00
|
|
|
|
2013-06-19 20:46:07 +02:00
|
|
|
test_expect_success 'ignore-blank-lines: only new lines' '
|
|
|
|
test_seq 5 >x &&
|
|
|
|
git update-index x &&
|
2013-10-27 22:26:48 +01:00
|
|
|
test_seq 5 | sed "/3/i\\
|
2013-06-19 20:46:07 +02:00
|
|
|
" >x &&
|
|
|
|
git diff --ignore-blank-lines >out &&
|
2018-07-27 19:48:11 +02:00
|
|
|
test_must_be_empty out
|
2013-06-19 20:46:07 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ignore-blank-lines: only new lines with space' '
|
|
|
|
test_seq 5 >x &&
|
|
|
|
git update-index x &&
|
2013-10-27 22:26:48 +01:00
|
|
|
test_seq 5 | sed "/3/i\\
|
|
|
|
" >x &&
|
2013-06-19 20:46:07 +02:00
|
|
|
git diff -w --ignore-blank-lines >out &&
|
2018-07-27 19:48:11 +02:00
|
|
|
test_must_be_empty out
|
2013-06-19 20:46:07 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ignore-blank-lines: after change' '
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
1
|
|
|
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
7
|
|
|
|
EOF
|
|
|
|
git update-index x &&
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
change
|
|
|
|
|
|
|
|
1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
EOF
|
|
|
|
git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
diff --git a/x b/x
|
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,6 +1,7 @@
|
|
|
|
+change
|
|
|
|
+
|
|
|
|
1
|
|
|
|
2
|
|
|
|
-
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
EOF
|
|
|
|
compare_diff_patch expected out.tmp
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ignore-blank-lines: before change' '
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
1
|
|
|
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
6
|
|
|
|
7
|
|
|
|
EOF
|
|
|
|
git update-index x &&
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
|
|
|
|
1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
7
|
|
|
|
change
|
|
|
|
EOF
|
|
|
|
git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
diff --git a/x b/x
|
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -4,5 +4,7 @@
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
+
|
|
|
|
6
|
|
|
|
7
|
|
|
|
+change
|
|
|
|
EOF
|
|
|
|
compare_diff_patch expected out.tmp
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ignore-blank-lines: between changes' '
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
|
|
|
|
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8
|
|
|
|
9
|
|
|
|
10
|
|
|
|
EOF
|
|
|
|
git update-index x &&
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
10
|
|
|
|
change
|
|
|
|
EOF
|
|
|
|
git diff --ignore-blank-lines >out.tmp &&
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
diff --git a/x b/x
|
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,5 +1,7 @@
|
|
|
|
+change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
+
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
@@ -8,5 +8,7 @@
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8
|
|
|
|
+
|
|
|
|
9
|
|
|
|
10
|
|
|
|
+change
|
|
|
|
EOF
|
|
|
|
compare_diff_patch expected out.tmp
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ignore-blank-lines: between changes (with interhunkctx)' '
|
|
|
|
test_seq 10 >x &&
|
|
|
|
git update-index x &&
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8
|
|
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
change
|
|
|
|
EOF
|
|
|
|
git diff --inter-hunk-context=2 --ignore-blank-lines >out.tmp &&
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
diff --git a/x b/x
|
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,10 +1,15 @@
|
|
|
|
+change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
+
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
+
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8
|
|
|
|
9
|
|
|
|
+
|
|
|
|
10
|
|
|
|
+change
|
|
|
|
EOF
|
|
|
|
compare_diff_patch expected out.tmp
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ignore-blank-lines: scattered spaces' '
|
|
|
|
test_seq 10 >x &&
|
|
|
|
git update-index x &&
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
9
|
|
|
|
10
|
|
|
|
change
|
|
|
|
EOF
|
|
|
|
git diff --inter-hunk-context=4 --ignore-blank-lines >out.tmp &&
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
diff --git a/x b/x
|
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,3 +1,4 @@
|
|
|
|
+change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
@@ -8,3 +15,4 @@
|
|
|
|
8
|
|
|
|
9
|
|
|
|
10
|
|
|
|
+change
|
|
|
|
EOF
|
|
|
|
compare_diff_patch expected out.tmp
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ignore-blank-lines: spaces coalesce' '
|
|
|
|
test_seq 6 >x &&
|
|
|
|
git update-index x &&
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
change
|
|
|
|
EOF
|
|
|
|
git diff --inter-hunk-context=4 --ignore-blank-lines >out.tmp &&
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
diff --git a/x b/x
|
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,6 +1,11 @@
|
|
|
|
+change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
+
|
|
|
|
4
|
|
|
|
+
|
|
|
|
5
|
|
|
|
+
|
|
|
|
6
|
|
|
|
+change
|
|
|
|
EOF
|
|
|
|
compare_diff_patch expected out.tmp
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ignore-blank-lines: mix changes and blank lines' '
|
|
|
|
test_seq 16 >x &&
|
|
|
|
git update-index x &&
|
|
|
|
cat <<-\EOF >x &&
|
|
|
|
change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
change
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
10
|
|
|
|
11
|
|
|
|
change
|
|
|
|
12
|
|
|
|
13
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
16
|
|
|
|
change
|
|
|
|
EOF
|
|
|
|
git diff --ignore-blank-lines >out.tmp &&
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
diff --git a/x b/x
|
|
|
|
--- a/x
|
|
|
|
+++ b/x
|
|
|
|
@@ -1,8 +1,11 @@
|
|
|
|
+change
|
|
|
|
1
|
|
|
|
2
|
|
|
|
+
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
+change
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8
|
|
|
|
@@ -9,8 +13,11 @@
|
|
|
|
9
|
|
|
|
10
|
|
|
|
11
|
|
|
|
+change
|
|
|
|
12
|
|
|
|
13
|
|
|
|
14
|
|
|
|
+
|
|
|
|
15
|
|
|
|
16
|
|
|
|
+change
|
|
|
|
EOF
|
|
|
|
compare_diff_patch expected out.tmp
|
|
|
|
'
|
|
|
|
|
2007-12-12 17:22:59 +01:00
|
|
|
test_expect_success 'check mixed spaces and tabs in indent' '
|
|
|
|
# This is indented with SP HT SP.
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo();" >x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
test_must_fail git diff --check >check &&
|
|
|
|
grep "space before tab in indent" check
|
2007-12-12 17:22:59 +01:00
|
|
|
'
|
|
|
|
|
2007-12-16 17:31:40 +01:00
|
|
|
test_expect_success 'check mixed tabs and spaces in indent' '
|
|
|
|
# This is indented with HT SP HT.
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo();" >x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
test_must_fail git diff --check >check &&
|
|
|
|
grep "space before tab in indent" check
|
2007-12-16 17:31:40 +01:00
|
|
|
'
|
|
|
|
|
2007-12-13 21:24:52 +01:00
|
|
|
test_expect_success 'check with no whitespace errors' '
|
|
|
|
git commit -m "snapshot" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git diff --check
|
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check with trailing whitespace' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo(); " >x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff --check
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check with space before tab in indent' '
|
2007-12-13 21:24:52 +01:00
|
|
|
# indent has space followed by hard tab
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo();" >x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff --check
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
2007-12-14 08:40:27 +01:00
|
|
|
test_expect_success '--check and --exit-code are not exclusive' '
|
2007-12-13 21:24:52 +01:00
|
|
|
git checkout x &&
|
|
|
|
git diff --check --exit-code
|
|
|
|
'
|
|
|
|
|
2007-12-14 08:40:27 +01:00
|
|
|
test_expect_success '--check and --quiet are not exclusive' '
|
2007-12-13 21:24:52 +01:00
|
|
|
git diff --check --quiet
|
|
|
|
'
|
|
|
|
|
2020-12-17 02:27:13 +01:00
|
|
|
test_expect_success '-w and --exit-code interact sensibly' '
|
|
|
|
test_when_finished "git checkout x" &&
|
|
|
|
{
|
|
|
|
test_seq 15 &&
|
|
|
|
echo " 16"
|
|
|
|
} >x &&
|
|
|
|
test_must_fail git diff --exit-code &&
|
|
|
|
git diff -w >actual &&
|
|
|
|
test_must_be_empty actual &&
|
|
|
|
git diff -w --exit-code
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '-I and --exit-code interact sensibly' '
|
|
|
|
test_when_finished "git checkout x" &&
|
|
|
|
{
|
|
|
|
test_seq 15 &&
|
|
|
|
echo " 16"
|
|
|
|
} >x &&
|
|
|
|
test_must_fail git diff --exit-code &&
|
|
|
|
git diff -I. >actual &&
|
|
|
|
test_must_be_empty actual &&
|
|
|
|
git diff -I. --exit-code
|
|
|
|
'
|
|
|
|
|
2007-12-13 21:24:52 +01:00
|
|
|
test_expect_success 'check staged with no whitespace errors' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
|
|
|
git diff --cached --check
|
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check staged with trailing whitespace' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo(); " >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff --cached --check
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check staged with space before tab in indent' '
|
2007-12-13 21:24:52 +01:00
|
|
|
# indent has space followed by hard tab
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff --cached --check
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check with no whitespace errors (diff-index)' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
|
|
|
git diff-index --check HEAD
|
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check with trailing whitespace (diff-index)' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo(); " >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff-index --check HEAD
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check with space before tab in indent (diff-index)' '
|
2007-12-13 21:24:52 +01:00
|
|
|
# indent has space followed by hard tab
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff-index --check HEAD
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check staged with no whitespace errors (diff-index)' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
|
|
|
git diff-index --cached --check HEAD
|
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check staged with trailing whitespace (diff-index)' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo(); " >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff-index --cached --check HEAD
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check staged with space before tab in indent (diff-index)' '
|
2007-12-13 21:24:52 +01:00
|
|
|
# indent has space followed by hard tab
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git add x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff-index --cached --check HEAD
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check with no whitespace errors (diff-tree)' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git commit -m "new commit" x &&
|
|
|
|
git diff-tree --check HEAD^ HEAD
|
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check with trailing whitespace (diff-tree)' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo(); " >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git commit -m "another commit" x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff-tree --check HEAD^ HEAD
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check with space before tab in indent (diff-tree)' '
|
2007-12-13 21:24:52 +01:00
|
|
|
# indent has space followed by hard tab
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo();" >x &&
|
2007-12-13 21:24:52 +01:00
|
|
|
git commit -m "yet another" x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff-tree --check HEAD^ HEAD
|
2007-12-13 14:32:31 +01:00
|
|
|
'
|
|
|
|
|
2017-12-06 23:02:56 +01:00
|
|
|
test_expect_success 'check with ignored trailing whitespace attr (diff-tree)' '
|
|
|
|
test_when_finished "git reset --hard HEAD^" &&
|
|
|
|
|
|
|
|
# create a whitespace error that should be ignored
|
|
|
|
echo "* -whitespace" >.gitattributes &&
|
|
|
|
git add .gitattributes &&
|
|
|
|
echo "foo(); " >x &&
|
|
|
|
git add x &&
|
|
|
|
git commit -m "add trailing space" &&
|
|
|
|
|
|
|
|
# with a worktree diff-tree ignores the whitespace error
|
|
|
|
git diff-tree --root --check HEAD &&
|
|
|
|
|
|
|
|
# without a worktree diff-tree still ignores the whitespace error
|
|
|
|
git -C .git diff-tree --root --check HEAD
|
|
|
|
'
|
|
|
|
|
2007-12-13 14:32:31 +01:00
|
|
|
test_expect_success 'check trailing whitespace (trailing-space: off)' '
|
|
|
|
git config core.whitespace "-trailing-space" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo (); " >x &&
|
2007-12-13 14:32:31 +01:00
|
|
|
git diff --check
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check trailing whitespace (trailing-space: on)' '
|
|
|
|
git config core.whitespace "trailing-space" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo (); " >x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff --check
|
2007-12-13 14:32:31 +01:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check space before tab in indent (space-before-tab: off)' '
|
|
|
|
# indent contains space followed by HT
|
|
|
|
git config core.whitespace "-space-before-tab" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo ();" >x &&
|
2007-12-13 14:32:31 +01:00
|
|
|
git diff --check
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check space before tab in indent (space-before-tab: on)' '
|
|
|
|
# indent contains space followed by HT
|
|
|
|
git config core.whitespace "space-before-tab" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo (); " >x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff --check
|
2007-12-13 14:32:31 +01:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check spaces as indentation (indent-with-non-tab: off)' '
|
2010-10-31 02:46:54 +01:00
|
|
|
git config core.whitespace "-indent-with-non-tab" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo ();" >x &&
|
2007-12-13 14:32:31 +01:00
|
|
|
git diff --check
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check spaces as indentation (indent-with-non-tab: on)' '
|
|
|
|
git config core.whitespace "indent-with-non-tab" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo ();" >x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff --check
|
2007-12-13 21:24:52 +01:00
|
|
|
'
|
|
|
|
|
2010-11-30 09:29:11 +01:00
|
|
|
test_expect_success 'ditto, but tabwidth=9' '
|
|
|
|
git config core.whitespace "indent-with-non-tab,tabwidth=9" &&
|
|
|
|
git diff --check
|
|
|
|
'
|
|
|
|
|
2007-12-16 17:31:40 +01:00
|
|
|
test_expect_success 'check tabs and spaces as indentation (indent-with-non-tab: on)' '
|
|
|
|
git config core.whitespace "indent-with-non-tab" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo ();" >x &&
|
2008-07-12 17:47:52 +02:00
|
|
|
test_must_fail git diff --check
|
2007-12-16 17:31:40 +01:00
|
|
|
'
|
2008-02-16 05:30:05 +01:00
|
|
|
|
2010-11-30 09:29:11 +01:00
|
|
|
test_expect_success 'ditto, but tabwidth=10' '
|
|
|
|
git config core.whitespace "indent-with-non-tab,tabwidth=10" &&
|
|
|
|
test_must_fail git diff --check
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'ditto, but tabwidth=20' '
|
|
|
|
git config core.whitespace "indent-with-non-tab,tabwidth=20" &&
|
|
|
|
git diff --check
|
|
|
|
'
|
|
|
|
|
2010-04-03 01:37:15 +02:00
|
|
|
test_expect_success 'check tabs as indentation (tab-in-indent: off)' '
|
|
|
|
git config core.whitespace "-tab-in-indent" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo ();" >x &&
|
2010-04-03 01:37:15 +02:00
|
|
|
git diff --check
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check tabs as indentation (tab-in-indent: on)' '
|
|
|
|
git config core.whitespace "tab-in-indent" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo ();" >x &&
|
2010-04-03 01:37:15 +02:00
|
|
|
test_must_fail git diff --check
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check tabs and spaces as indentation (tab-in-indent: on)' '
|
|
|
|
git config core.whitespace "tab-in-indent" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo " foo ();" >x &&
|
2010-04-03 01:37:15 +02:00
|
|
|
test_must_fail git diff --check
|
|
|
|
'
|
|
|
|
|
2010-11-30 09:29:11 +01:00
|
|
|
test_expect_success 'ditto, but tabwidth=1 (must be irrelevant)' '
|
|
|
|
git config core.whitespace "tab-in-indent,tabwidth=1" &&
|
|
|
|
test_must_fail git diff --check
|
|
|
|
'
|
|
|
|
|
2010-04-03 01:37:15 +02:00
|
|
|
test_expect_success 'check tab-in-indent and indent-with-non-tab conflict' '
|
|
|
|
git config core.whitespace "tab-in-indent,indent-with-non-tab" &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "foo ();" >x &&
|
2010-04-03 01:37:15 +02:00
|
|
|
test_must_fail git diff --check
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'check tab-in-indent excluded from wildcard whitespace attribute' '
|
|
|
|
git config --unset core.whitespace &&
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "x whitespace" >.gitattributes &&
|
|
|
|
echo " foo ();" >x &&
|
2010-04-03 01:37:15 +02:00
|
|
|
git diff --check &&
|
|
|
|
rm -f .gitattributes
|
|
|
|
'
|
|
|
|
|
2008-02-16 05:30:05 +01:00
|
|
|
test_expect_success 'line numbers in --check output are correct' '
|
2015-05-26 20:19:52 +02:00
|
|
|
echo "" >x &&
|
|
|
|
echo "foo(); " >>x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
test_must_fail git diff --check >check &&
|
|
|
|
grep "x:2:" check
|
2008-02-16 05:30:05 +01:00
|
|
|
'
|
|
|
|
|
2009-09-04 07:30:27 +02:00
|
|
|
test_expect_success 'checkdiff detects new trailing blank lines (1)' '
|
2008-06-27 00:36:59 +02:00
|
|
|
echo "foo();" >x &&
|
|
|
|
echo "" >>x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
test_must_fail git diff --check >check &&
|
|
|
|
grep "new blank line" check
|
2008-06-27 00:36:59 +02:00
|
|
|
'
|
|
|
|
|
2009-09-04 08:39:43 +02:00
|
|
|
test_expect_success 'checkdiff detects new trailing blank lines (2)' '
|
2019-11-27 20:53:26 +01:00
|
|
|
test_write_lines a b "" "" >x &&
|
2009-09-04 08:39:43 +02:00
|
|
|
git add x &&
|
2019-11-27 20:53:26 +01:00
|
|
|
test_write_lines a "" "" "" "" >x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
test_must_fail git diff --check >check &&
|
|
|
|
grep "new blank line" check
|
2008-06-27 00:36:59 +02:00
|
|
|
'
|
|
|
|
|
2008-08-20 20:47:55 +02:00
|
|
|
test_expect_success 'checkdiff allows new blank lines' '
|
|
|
|
git checkout x &&
|
|
|
|
mv x y &&
|
|
|
|
(
|
|
|
|
echo "/* This is new */" &&
|
|
|
|
echo "" &&
|
|
|
|
cat y
|
|
|
|
) >x &&
|
|
|
|
git diff --check
|
|
|
|
'
|
|
|
|
|
2020-08-20 02:41:32 +02:00
|
|
|
test_expect_success 'whitespace-only changes not reported (diff)' '
|
2009-11-19 22:12:24 +01:00
|
|
|
git reset --hard &&
|
|
|
|
echo >x "hello world" &&
|
|
|
|
git add x &&
|
|
|
|
git commit -m "hello 1" &&
|
|
|
|
echo >x "hello world" &&
|
|
|
|
git diff -b >actual &&
|
tests: use 'test_must_be_empty' instead of 'test_cmp <empty> <out>'
Using 'test_must_be_empty' is shorter and more idiomatic than
>empty &&
test_cmp empty out
as it saves the creation of an empty file. Furthermore, sometimes the
expected empty file doesn't have such a descriptive name like 'empty',
and its creation is far away from the place where it's finally used
for comparison (e.g. in 't7600-merge.sh', where two expected empty
files are created in the 'setup' test, but are used only about 500
lines later).
These cases were found by instrumenting 'test_cmp' to error out the
test script when it's used to compare empty files, and then converted
manually.
Note that even after this patch there still remain a lot of cases
where we use 'test_cmp' to check empty files:
- Sometimes the expected output is not hard-coded in the test, but
'test_cmp' is used to ensure that two similar git commands produce
the same output, and that output happens to be empty, e.g. the
test 'submodule update --merge - ignores --merge for new
submodules' in 't7406-submodule-update.sh'.
- Repetitive common tasks, including preparing the expected results
and running 'test_cmp', are often extracted into a helper
function, and some of this helper's callsites expect no output.
- For the same reason as above, the whole 'test_expect_success'
block is within a helper function, e.g. in 't3070-wildmatch.sh'.
- Or 'test_cmp' is invoked in a loop, e.g. the test 'cvs update
(-p)' in 't9400-git-cvsserver-server.sh'.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-19 23:57:25 +02:00
|
|
|
test_must_be_empty actual
|
2009-11-19 22:12:24 +01:00
|
|
|
'
|
|
|
|
|
2020-08-20 02:41:32 +02:00
|
|
|
test_expect_success 'whitespace-only changes not reported (diffstat)' '
|
|
|
|
# reuse state from previous test
|
|
|
|
git diff --stat -b >actual &&
|
|
|
|
test_must_be_empty actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'whitespace changes with modification reported (diffstat)' '
|
|
|
|
git reset --hard &&
|
|
|
|
echo >x "hello world" &&
|
|
|
|
git update-index --chmod=+x x &&
|
|
|
|
git diff --stat --cached -b >actual &&
|
|
|
|
cat <<-EOF >expect &&
|
|
|
|
x | 0
|
|
|
|
1 file changed, 0 insertions(+), 0 deletions(-)
|
|
|
|
EOF
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'whitespace-only changes reported across renames (diffstat)' '
|
2010-05-26 04:50:12 +02:00
|
|
|
git reset --hard &&
|
2021-12-09 06:11:13 +01:00
|
|
|
for i in 1 2 3 4 5 6 7 8 9; do echo "$i$i$i$i$i$i" || return 1; done >x &&
|
2010-05-26 04:50:12 +02:00
|
|
|
git add x &&
|
2020-08-20 02:41:32 +02:00
|
|
|
git commit -m "base" &&
|
|
|
|
sed -e "5s/^/ /" x >z &&
|
|
|
|
git rm x &&
|
|
|
|
git add z &&
|
|
|
|
git diff -w -M --cached --stat >actual &&
|
|
|
|
cat <<-EOF >expect &&
|
|
|
|
x => z | 0
|
|
|
|
1 file changed, 0 insertions(+), 0 deletions(-)
|
|
|
|
EOF
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'whitespace-only changes reported across renames' '
|
|
|
|
git reset --hard HEAD~1 &&
|
2021-12-09 06:11:13 +01:00
|
|
|
for i in 1 2 3 4 5 6 7 8 9; do echo "$i$i$i$i$i$i" || return 1; done >x &&
|
2020-08-20 02:41:32 +02:00
|
|
|
git add x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
hash_x=$(git hash-object x) &&
|
|
|
|
before=$(git rev-parse --short "$hash_x") &&
|
2010-05-26 04:50:12 +02:00
|
|
|
git commit -m "base" &&
|
|
|
|
sed -e "5s/^/ /" x >z &&
|
|
|
|
git rm x &&
|
|
|
|
git add z &&
|
2019-11-27 20:53:24 +01:00
|
|
|
hash_z=$(git hash-object z) &&
|
|
|
|
after=$(git rev-parse --short "$hash_z") &&
|
|
|
|
git diff -w -M --cached >actual.raw &&
|
|
|
|
sed -e "/^similarity index /s/[0-9][0-9]*/NUM/" actual.raw >actual &&
|
2019-10-28 01:59:00 +01:00
|
|
|
cat <<-EOF >expect &&
|
|
|
|
diff --git a/x b/z
|
|
|
|
similarity index NUM%
|
|
|
|
rename from x
|
|
|
|
rename to z
|
|
|
|
index $before..$after 100644
|
|
|
|
EOF
|
2010-05-26 04:50:12 +02:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
cat >expected <<\EOF
|
|
|
|
diff --git a/empty b/void
|
|
|
|
similarity index 100%
|
|
|
|
rename from empty
|
|
|
|
rename to void
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_expect_success 'rename empty' '
|
|
|
|
git reset --hard &&
|
|
|
|
>empty &&
|
|
|
|
git add empty &&
|
|
|
|
git commit -m empty &&
|
|
|
|
git mv empty void &&
|
|
|
|
git diff -w --cached -M >current &&
|
|
|
|
test_cmp expected current
|
|
|
|
'
|
|
|
|
|
2008-08-23 21:21:21 +02:00
|
|
|
test_expect_success 'combined diff with autocrlf conversion' '
|
|
|
|
|
|
|
|
git reset --hard &&
|
2020-11-19 00:35:44 +01:00
|
|
|
test_commit "one side" x hello one-side &&
|
2008-08-23 21:21:21 +02:00
|
|
|
git checkout HEAD^ &&
|
|
|
|
echo >x goodbye &&
|
|
|
|
git commit -m "the other side" x &&
|
|
|
|
git config core.autocrlf true &&
|
2020-11-19 00:35:44 +01:00
|
|
|
test_must_fail git merge one-side >actual &&
|
|
|
|
test_i18ngrep "Automatic merge failed" actual &&
|
2008-08-23 21:21:21 +02:00
|
|
|
|
2019-11-27 20:53:24 +01:00
|
|
|
git diff >actual.raw &&
|
|
|
|
sed -e "1,/^@@@/d" actual.raw >actual &&
|
2008-08-23 21:21:21 +02:00
|
|
|
! grep "^-" actual
|
|
|
|
|
|
|
|
'
|
|
|
|
|
2010-10-21 00:17:26 +02:00
|
|
|
# Start testing the colored format for whitespace checks
|
|
|
|
|
|
|
|
test_expect_success 'setup diff colors' '
|
|
|
|
git config color.diff.plain normal &&
|
|
|
|
git config color.diff.meta bold &&
|
|
|
|
git config color.diff.frag cyan &&
|
|
|
|
git config color.diff.func normal &&
|
|
|
|
git config color.diff.old red &&
|
|
|
|
git config color.diff.new green &&
|
|
|
|
git config color.diff.commit yellow &&
|
2015-05-26 20:43:08 +02:00
|
|
|
git config color.diff.whitespace blue &&
|
2010-10-21 00:17:26 +02:00
|
|
|
|
2015-05-26 20:43:08 +02:00
|
|
|
git config core.autocrlf false
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'diff that introduces a line with only tabs' '
|
|
|
|
git config core.whitespace blank-at-eol &&
|
|
|
|
git reset --hard &&
|
|
|
|
echo "test" >x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
old_hash_x=$(git hash-object x) &&
|
|
|
|
before=$(git rev-parse --short "$old_hash_x") &&
|
2015-05-26 20:43:08 +02:00
|
|
|
git commit -m "initial" x &&
|
|
|
|
echo "{NTN}" | tr "NT" "\n\t" >>x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
new_hash_x=$(git hash-object x) &&
|
|
|
|
after=$(git rev-parse --short "$new_hash_x") &&
|
|
|
|
git diff --color >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2015-05-26 20:19:52 +02:00
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
cat >expected <<-EOF &&
|
2015-05-26 20:19:52 +02:00
|
|
|
<BOLD>diff --git a/x b/x<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before..$after 100644<RESET>
|
2015-05-26 20:19:52 +02:00
|
|
|
<BOLD>--- a/x<RESET>
|
|
|
|
<BOLD>+++ b/x<RESET>
|
|
|
|
<CYAN>@@ -1 +1,4 @@<RESET>
|
|
|
|
test<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>{<RESET>
|
2015-05-26 20:43:08 +02:00
|
|
|
<GREEN>+<RESET><BLUE> <RESET>
|
2015-05-26 20:19:52 +02:00
|
|
|
<GREEN>+<RESET><GREEN>}<RESET>
|
|
|
|
EOF
|
2010-10-21 00:17:26 +02:00
|
|
|
|
|
|
|
test_cmp expected current
|
|
|
|
'
|
|
|
|
|
2015-05-26 19:11:28 +02:00
|
|
|
test_expect_success 'diff that introduces and removes ws breakages' '
|
|
|
|
git reset --hard &&
|
|
|
|
{
|
|
|
|
echo "0. blank-at-eol " &&
|
|
|
|
echo "1. blank-at-eol "
|
|
|
|
} >x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
old_hash_x=$(git hash-object x) &&
|
|
|
|
before=$(git rev-parse --short "$old_hash_x") &&
|
2015-05-26 19:11:28 +02:00
|
|
|
git commit -a --allow-empty -m preimage &&
|
|
|
|
{
|
|
|
|
echo "0. blank-at-eol " &&
|
|
|
|
echo "1. still-blank-at-eol " &&
|
|
|
|
echo "2. and a new line "
|
|
|
|
} >x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
new_hash_x=$(git hash-object x) &&
|
|
|
|
after=$(git rev-parse --short "$new_hash_x") &&
|
2015-05-26 19:11:28 +02:00
|
|
|
|
2019-11-27 20:53:24 +01:00
|
|
|
git diff --color >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2015-05-26 19:11:28 +02:00
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
cat >expected <<-EOF &&
|
2015-05-26 19:11:28 +02:00
|
|
|
<BOLD>diff --git a/x b/x<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before..$after 100644<RESET>
|
2015-05-26 19:11:28 +02:00
|
|
|
<BOLD>--- a/x<RESET>
|
|
|
|
<BOLD>+++ b/x<RESET>
|
|
|
|
<CYAN>@@ -1,2 +1,3 @@<RESET>
|
|
|
|
0. blank-at-eol <RESET>
|
|
|
|
<RED>-1. blank-at-eol <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>1. still-blank-at-eol<RESET><BLUE> <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>2. and a new line<RESET><BLUE> <RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected current
|
|
|
|
'
|
|
|
|
|
2016-10-05 00:21:38 +02:00
|
|
|
test_expect_success 'ws-error-highlight test setup' '
|
|
|
|
|
2015-05-26 19:11:28 +02:00
|
|
|
git reset --hard &&
|
|
|
|
{
|
|
|
|
echo "0. blank-at-eol " &&
|
|
|
|
echo "1. blank-at-eol "
|
|
|
|
} >x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
old_hash_x=$(git hash-object x) &&
|
|
|
|
before=$(git rev-parse --short "$old_hash_x") &&
|
2015-05-26 19:11:28 +02:00
|
|
|
git commit -a --allow-empty -m preimage &&
|
|
|
|
{
|
|
|
|
echo "0. blank-at-eol " &&
|
|
|
|
echo "1. still-blank-at-eol " &&
|
|
|
|
echo "2. and a new line "
|
|
|
|
} >x &&
|
2019-11-27 20:53:24 +01:00
|
|
|
new_hash_x=$(git hash-object x) &&
|
|
|
|
after=$(git rev-parse --short "$new_hash_x") &&
|
2015-05-26 19:11:28 +02:00
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
cat >expect.default-old <<-EOF &&
|
2015-05-26 19:11:28 +02:00
|
|
|
<BOLD>diff --git a/x b/x<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before..$after 100644<RESET>
|
2015-05-26 19:11:28 +02:00
|
|
|
<BOLD>--- a/x<RESET>
|
|
|
|
<BOLD>+++ b/x<RESET>
|
|
|
|
<CYAN>@@ -1,2 +1,3 @@<RESET>
|
|
|
|
0. blank-at-eol <RESET>
|
|
|
|
<RED>-<RESET><RED>1. blank-at-eol<RESET><BLUE> <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>1. still-blank-at-eol<RESET><BLUE> <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>2. and a new line<RESET><BLUE> <RESET>
|
|
|
|
EOF
|
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
cat >expect.all <<-EOF &&
|
2015-05-26 19:11:28 +02:00
|
|
|
<BOLD>diff --git a/x b/x<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before..$after 100644<RESET>
|
2015-05-26 19:11:28 +02:00
|
|
|
<BOLD>--- a/x<RESET>
|
|
|
|
<BOLD>+++ b/x<RESET>
|
|
|
|
<CYAN>@@ -1,2 +1,3 @@<RESET>
|
|
|
|
<RESET>0. blank-at-eol<RESET><BLUE> <RESET>
|
|
|
|
<RED>-<RESET><RED>1. blank-at-eol<RESET><BLUE> <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>1. still-blank-at-eol<RESET><BLUE> <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>2. and a new line<RESET><BLUE> <RESET>
|
|
|
|
EOF
|
|
|
|
|
2019-10-28 01:59:00 +01:00
|
|
|
cat >expect.none <<-EOF
|
2015-05-26 19:11:28 +02:00
|
|
|
<BOLD>diff --git a/x b/x<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before..$after 100644<RESET>
|
2015-05-26 19:11:28 +02:00
|
|
|
<BOLD>--- a/x<RESET>
|
|
|
|
<BOLD>+++ b/x<RESET>
|
|
|
|
<CYAN>@@ -1,2 +1,3 @@<RESET>
|
|
|
|
0. blank-at-eol <RESET>
|
|
|
|
<RED>-1. blank-at-eol <RESET>
|
|
|
|
<GREEN>+1. still-blank-at-eol <RESET>
|
|
|
|
<GREEN>+2. and a new line <RESET>
|
|
|
|
EOF
|
|
|
|
|
2016-10-05 00:21:38 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'test --ws-error-highlight option' '
|
|
|
|
|
2019-11-27 20:53:24 +01:00
|
|
|
git diff --color --ws-error-highlight=default,old >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:21:38 +02:00
|
|
|
test_cmp expect.default-old current &&
|
|
|
|
|
2019-11-27 20:53:24 +01:00
|
|
|
git diff --color --ws-error-highlight=all >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:21:38 +02:00
|
|
|
test_cmp expect.all current &&
|
|
|
|
|
2019-11-27 20:53:24 +01:00
|
|
|
git diff --color --ws-error-highlight=none >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:21:38 +02:00
|
|
|
test_cmp expect.none current
|
|
|
|
|
2015-05-26 19:11:28 +02:00
|
|
|
'
|
|
|
|
|
2016-10-05 00:26:27 +02:00
|
|
|
test_expect_success 'test diff.wsErrorHighlight config' '
|
|
|
|
|
2019-11-27 20:53:24 +01:00
|
|
|
git -c diff.wsErrorHighlight=default,old diff --color >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:26:27 +02:00
|
|
|
test_cmp expect.default-old current &&
|
|
|
|
|
2019-11-27 20:53:24 +01:00
|
|
|
git -c diff.wsErrorHighlight=all diff --color >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:26:27 +02:00
|
|
|
test_cmp expect.all current &&
|
|
|
|
|
2019-11-27 20:53:24 +01:00
|
|
|
git -c diff.wsErrorHighlight=none diff --color >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:26:27 +02:00
|
|
|
test_cmp expect.none current
|
|
|
|
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'option overrides diff.wsErrorHighlight' '
|
|
|
|
|
2017-10-03 15:40:19 +02:00
|
|
|
git -c diff.wsErrorHighlight=none \
|
2019-11-27 20:53:24 +01:00
|
|
|
diff --color --ws-error-highlight=default,old >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:26:27 +02:00
|
|
|
test_cmp expect.default-old current &&
|
|
|
|
|
2017-10-03 15:40:19 +02:00
|
|
|
git -c diff.wsErrorHighlight=default \
|
2019-11-27 20:53:24 +01:00
|
|
|
diff --color --ws-error-highlight=all >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:26:27 +02:00
|
|
|
test_cmp expect.all current &&
|
|
|
|
|
2017-10-03 15:40:19 +02:00
|
|
|
git -c diff.wsErrorHighlight=all \
|
2019-11-27 20:53:24 +01:00
|
|
|
diff --color --ws-error-highlight=none >current.raw &&
|
|
|
|
test_decode_color <current.raw >current &&
|
2016-10-05 00:26:27 +02:00
|
|
|
test_cmp expect.none current
|
|
|
|
|
|
|
|
'
|
|
|
|
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
test_expect_success 'detect moved code, complete file' '
|
|
|
|
git reset --hard &&
|
|
|
|
cat <<-\EOF >test.c &&
|
|
|
|
#include<stdio.h>
|
|
|
|
main()
|
|
|
|
{
|
|
|
|
printf("Hello World");
|
|
|
|
}
|
|
|
|
EOF
|
|
|
|
git add test.c &&
|
|
|
|
git commit -m "add main function" &&
|
2019-10-28 01:59:00 +01:00
|
|
|
file=$(git rev-parse --short HEAD:test.c) &&
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
git mv test.c main.c &&
|
|
|
|
test_config color.diff.oldMoved "normal red" &&
|
|
|
|
test_config color.diff.newMoved "normal green" &&
|
2019-11-27 20:53:24 +01:00
|
|
|
git diff HEAD --color-moved=zebra --color --no-renames >actual.raw &&
|
|
|
|
test_decode_color <actual.raw >actual &&
|
2019-10-28 01:59:00 +01:00
|
|
|
cat >expected <<-EOF &&
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
<BOLD>diff --git a/main.c b/main.c<RESET>
|
|
|
|
<BOLD>new file mode 100644<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index 0000000..$file<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
<BOLD>--- /dev/null<RESET>
|
|
|
|
<BOLD>+++ b/main.c<RESET>
|
|
|
|
<CYAN>@@ -0,0 +1,5 @@<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>#include<stdio.h><RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>main()<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>{<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>printf("Hello World");<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>}<RESET>
|
|
|
|
<BOLD>diff --git a/test.c b/test.c<RESET>
|
|
|
|
<BOLD>deleted file mode 100644<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $file..0000000<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
<BOLD>--- a/test.c<RESET>
|
|
|
|
<BOLD>+++ /dev/null<RESET>
|
|
|
|
<CYAN>@@ -1,5 +0,0 @@<RESET>
|
|
|
|
<BRED>-#include<stdio.h><RESET>
|
|
|
|
<BRED>-main()<RESET>
|
|
|
|
<BRED>-{<RESET>
|
|
|
|
<BRED>-printf("Hello World");<RESET>
|
|
|
|
<BRED>-}<RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect malicious moved code, inside file' '
|
|
|
|
test_config color.diff.oldMoved "normal red" &&
|
|
|
|
test_config color.diff.newMoved "normal green" &&
|
|
|
|
test_config color.diff.oldMovedAlternative "blue" &&
|
|
|
|
test_config color.diff.newMovedAlternative "yellow" &&
|
|
|
|
git reset --hard &&
|
|
|
|
cat <<-\EOF >main.c &&
|
|
|
|
#include<stdio.h>
|
|
|
|
int stuff()
|
|
|
|
{
|
|
|
|
printf("Hello ");
|
|
|
|
printf("World\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
int secure_foo(struct user *u)
|
|
|
|
{
|
|
|
|
if (!u->is_allowed_foo)
|
|
|
|
return;
|
|
|
|
foo(u);
|
|
|
|
}
|
|
|
|
|
|
|
|
int main()
|
|
|
|
{
|
|
|
|
foo();
|
|
|
|
}
|
|
|
|
EOF
|
|
|
|
cat <<-\EOF >test.c &&
|
|
|
|
#include<stdio.h>
|
|
|
|
int bar()
|
|
|
|
{
|
|
|
|
printf("Hello World, but different\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
int another_function()
|
|
|
|
{
|
|
|
|
bar();
|
|
|
|
}
|
|
|
|
EOF
|
|
|
|
git add main.c test.c &&
|
|
|
|
git commit -m "add main and test file" &&
|
2019-10-28 01:59:00 +01:00
|
|
|
before_main=$(git rev-parse --short HEAD:main.c) &&
|
|
|
|
before_test=$(git rev-parse --short HEAD:test.c) &&
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
cat <<-\EOF >main.c &&
|
|
|
|
#include<stdio.h>
|
|
|
|
int stuff()
|
|
|
|
{
|
|
|
|
printf("Hello ");
|
|
|
|
printf("World\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
int main()
|
|
|
|
{
|
|
|
|
foo();
|
|
|
|
}
|
|
|
|
EOF
|
|
|
|
cat <<-\EOF >test.c &&
|
|
|
|
#include<stdio.h>
|
|
|
|
int bar()
|
|
|
|
{
|
|
|
|
printf("Hello World, but different\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
int secure_foo(struct user *u)
|
|
|
|
{
|
|
|
|
foo(u);
|
|
|
|
if (!u->is_allowed_foo)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int another_function()
|
|
|
|
{
|
|
|
|
bar();
|
|
|
|
}
|
|
|
|
EOF
|
2019-11-27 20:53:24 +01:00
|
|
|
hash_main=$(git hash-object main.c) &&
|
|
|
|
after_main=$(git rev-parse --short "$hash_main") &&
|
|
|
|
hash_test=$(git hash-object test.c) &&
|
|
|
|
after_test=$(git rev-parse --short "$hash_test") &&
|
|
|
|
git diff HEAD --no-renames --color-moved=zebra --color >actual.raw &&
|
|
|
|
test_decode_color <actual.raw >actual &&
|
2019-10-28 01:59:00 +01:00
|
|
|
cat <<-EOF >expected &&
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
<BOLD>diff --git a/main.c b/main.c<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before_main..$after_main 100644<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
<BOLD>--- a/main.c<RESET>
|
|
|
|
<BOLD>+++ b/main.c<RESET>
|
|
|
|
<CYAN>@@ -5,13 +5,6 @@<RESET> <RESET>printf("Hello ");<RESET>
|
|
|
|
printf("World\n");<RESET>
|
|
|
|
}<RESET>
|
|
|
|
<RESET>
|
|
|
|
<BRED>-int secure_foo(struct user *u)<RESET>
|
|
|
|
<BRED>-{<RESET>
|
|
|
|
<BLUE>-if (!u->is_allowed_foo)<RESET>
|
|
|
|
<BLUE>-return;<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<RED>-foo(u);<RESET>
|
|
|
|
<RED>-}<RESET>
|
|
|
|
<RED>-<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
int main()<RESET>
|
|
|
|
{<RESET>
|
|
|
|
foo();<RESET>
|
|
|
|
<BOLD>diff --git a/test.c b/test.c<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before_test..$after_test 100644<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
<BOLD>--- a/test.c<RESET>
|
|
|
|
<BOLD>+++ b/test.c<RESET>
|
|
|
|
<CYAN>@@ -4,6 +4,13 @@<RESET> <RESET>int bar()<RESET>
|
|
|
|
printf("Hello World, but different\n");<RESET>
|
|
|
|
}<RESET>
|
|
|
|
<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>int secure_foo(struct user *u)<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>{<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<GREEN>+<RESET><GREEN>foo(u);<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
<BGREEN>+<RESET><BGREEN>if (!u->is_allowed_foo)<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>return;<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<GREEN>+<RESET><GREEN>}<RESET>
|
|
|
|
<GREEN>+<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
int another_function()<RESET>
|
|
|
|
{<RESET>
|
|
|
|
bar();<RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2017-06-30 22:53:08 +02:00
|
|
|
test_expect_success 'plain moved code, inside file' '
|
|
|
|
test_config color.diff.oldMoved "normal red" &&
|
|
|
|
test_config color.diff.newMoved "normal green" &&
|
|
|
|
test_config color.diff.oldMovedAlternative "blue" &&
|
|
|
|
test_config color.diff.newMovedAlternative "yellow" &&
|
|
|
|
# needs previous test as setup
|
2019-11-27 20:53:24 +01:00
|
|
|
git diff HEAD --no-renames --color-moved=plain --color >actual.raw &&
|
|
|
|
test_decode_color <actual.raw >actual &&
|
2019-10-28 01:59:00 +01:00
|
|
|
cat <<-EOF >expected &&
|
2017-06-30 22:53:08 +02:00
|
|
|
<BOLD>diff --git a/main.c b/main.c<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before_main..$after_main 100644<RESET>
|
2017-06-30 22:53:08 +02:00
|
|
|
<BOLD>--- a/main.c<RESET>
|
|
|
|
<BOLD>+++ b/main.c<RESET>
|
|
|
|
<CYAN>@@ -5,13 +5,6 @@<RESET> <RESET>printf("Hello ");<RESET>
|
|
|
|
printf("World\n");<RESET>
|
|
|
|
}<RESET>
|
|
|
|
<RESET>
|
|
|
|
<BRED>-int secure_foo(struct user *u)<RESET>
|
|
|
|
<BRED>-{<RESET>
|
|
|
|
<BRED>-if (!u->is_allowed_foo)<RESET>
|
|
|
|
<BRED>-return;<RESET>
|
|
|
|
<BRED>-foo(u);<RESET>
|
|
|
|
<BRED>-}<RESET>
|
|
|
|
<BRED>-<RESET>
|
|
|
|
int main()<RESET>
|
|
|
|
{<RESET>
|
|
|
|
foo();<RESET>
|
|
|
|
<BOLD>diff --git a/test.c b/test.c<RESET>
|
2019-10-28 01:59:00 +01:00
|
|
|
<BOLD>index $before_test..$after_test 100644<RESET>
|
2017-06-30 22:53:08 +02:00
|
|
|
<BOLD>--- a/test.c<RESET>
|
|
|
|
<BOLD>+++ b/test.c<RESET>
|
|
|
|
<CYAN>@@ -4,6 +4,13 @@<RESET> <RESET>int bar()<RESET>
|
|
|
|
printf("Hello World, but different\n");<RESET>
|
|
|
|
}<RESET>
|
|
|
|
<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>int secure_foo(struct user *u)<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>{<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>foo(u);<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>if (!u->is_allowed_foo)<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>return;<RESET>
|
|
|
|
<BGREEN>+<RESET><BGREEN>}<RESET>
|
|
|
|
<BGREEN>+<RESET>
|
|
|
|
int another_function()<RESET>
|
|
|
|
{<RESET>
|
|
|
|
bar();<RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2018-07-17 01:05:39 +02:00
|
|
|
test_expect_success 'detect blocks of moved code' '
|
2017-06-30 22:53:09 +02:00
|
|
|
git reset --hard &&
|
|
|
|
cat <<-\EOF >lines.txt &&
|
2017-08-16 03:27:39 +02:00
|
|
|
long line 1
|
|
|
|
long line 2
|
|
|
|
long line 3
|
2017-06-30 22:53:09 +02:00
|
|
|
line 4
|
|
|
|
line 5
|
|
|
|
line 6
|
|
|
|
line 7
|
|
|
|
line 8
|
|
|
|
line 9
|
|
|
|
line 10
|
|
|
|
line 11
|
|
|
|
line 12
|
|
|
|
line 13
|
2017-08-16 03:27:39 +02:00
|
|
|
long line 14
|
|
|
|
long line 15
|
|
|
|
long line 16
|
2017-06-30 22:53:09 +02:00
|
|
|
EOF
|
|
|
|
git add lines.txt &&
|
|
|
|
git commit -m "add poetry" &&
|
|
|
|
cat <<-\EOF >lines.txt &&
|
|
|
|
line 4
|
|
|
|
line 5
|
|
|
|
line 6
|
|
|
|
line 7
|
|
|
|
line 8
|
|
|
|
line 9
|
2017-08-16 03:27:39 +02:00
|
|
|
long line 1
|
|
|
|
long line 2
|
|
|
|
long line 3
|
|
|
|
long line 14
|
|
|
|
long line 15
|
|
|
|
long line 16
|
2017-06-30 22:53:09 +02:00
|
|
|
line 10
|
|
|
|
line 11
|
|
|
|
line 12
|
|
|
|
line 13
|
|
|
|
EOF
|
|
|
|
test_config color.diff.oldMoved "magenta" &&
|
|
|
|
test_config color.diff.newMoved "cyan" &&
|
|
|
|
test_config color.diff.oldMovedAlternative "blue" &&
|
|
|
|
test_config color.diff.newMovedAlternative "yellow" &&
|
|
|
|
test_config color.diff.oldMovedDimmed "normal magenta" &&
|
|
|
|
test_config color.diff.newMovedDimmed "normal cyan" &&
|
|
|
|
test_config color.diff.oldMovedAlternativeDimmed "normal blue" &&
|
|
|
|
test_config color.diff.newMovedAlternativeDimmed "normal yellow" &&
|
2018-07-17 01:05:39 +02:00
|
|
|
git diff HEAD --no-renames --color-moved=blocks --color >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
|
|
|
<CYAN>@@ -1,16 +1,16 @@<RESET>
|
|
|
|
<MAGENTA>-long line 1<RESET>
|
|
|
|
<MAGENTA>-long line 2<RESET>
|
|
|
|
<MAGENTA>-long line 3<RESET>
|
|
|
|
line 4<RESET>
|
|
|
|
line 5<RESET>
|
|
|
|
line 6<RESET>
|
|
|
|
line 7<RESET>
|
|
|
|
line 8<RESET>
|
|
|
|
line 9<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 1<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 2<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 3<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 14<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 15<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 16<RESET>
|
|
|
|
line 10<RESET>
|
|
|
|
line 11<RESET>
|
|
|
|
line 12<RESET>
|
|
|
|
line 13<RESET>
|
|
|
|
<MAGENTA>-long line 14<RESET>
|
|
|
|
<MAGENTA>-long line 15<RESET>
|
|
|
|
<MAGENTA>-long line 16<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
|
|
|
|
|
|
|
'
|
|
|
|
|
2018-08-16 00:08:22 +02:00
|
|
|
test_expect_success 'detect permutations inside moved code -- dimmed-zebra' '
|
2018-07-17 01:05:39 +02:00
|
|
|
# reuse setup from test before!
|
|
|
|
test_config color.diff.oldMoved "magenta" &&
|
|
|
|
test_config color.diff.newMoved "cyan" &&
|
|
|
|
test_config color.diff.oldMovedAlternative "blue" &&
|
|
|
|
test_config color.diff.newMovedAlternative "yellow" &&
|
|
|
|
test_config color.diff.oldMovedDimmed "normal magenta" &&
|
|
|
|
test_config color.diff.newMovedDimmed "normal cyan" &&
|
|
|
|
test_config color.diff.oldMovedAlternativeDimmed "normal blue" &&
|
|
|
|
test_config color.diff.newMovedAlternativeDimmed "normal yellow" &&
|
2018-08-16 00:08:22 +02:00
|
|
|
git diff HEAD --no-renames --color-moved=dimmed-zebra --color >actual.raw &&
|
2018-07-17 01:05:36 +02:00
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
2017-06-30 22:53:09 +02:00
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
|
|
|
<CYAN>@@ -1,16 +1,16 @@<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<BMAGENTA>-long line 1<RESET>
|
|
|
|
<BMAGENTA>-long line 2<RESET>
|
|
|
|
<BMAGENTA>-long line 3<RESET>
|
2017-06-30 22:53:09 +02:00
|
|
|
line 4<RESET>
|
|
|
|
line 5<RESET>
|
|
|
|
line 6<RESET>
|
|
|
|
line 7<RESET>
|
|
|
|
line 8<RESET>
|
|
|
|
line 9<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<BCYAN>+<RESET><BCYAN>long line 1<RESET>
|
|
|
|
<BCYAN>+<RESET><BCYAN>long line 2<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 3<RESET>
|
|
|
|
<YELLOW>+<RESET><YELLOW>long line 14<RESET>
|
|
|
|
<BYELLOW>+<RESET><BYELLOW>long line 15<RESET>
|
|
|
|
<BYELLOW>+<RESET><BYELLOW>long line 16<RESET>
|
2017-06-30 22:53:09 +02:00
|
|
|
line 10<RESET>
|
|
|
|
line 11<RESET>
|
|
|
|
line 12<RESET>
|
|
|
|
line 13<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<BMAGENTA>-long line 14<RESET>
|
|
|
|
<BMAGENTA>-long line 15<RESET>
|
|
|
|
<BMAGENTA>-long line 16<RESET>
|
2017-06-30 22:53:09 +02:00
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2021-12-09 11:29:59 +01:00
|
|
|
test_expect_success 'zebra alternate color is only used when necessary' '
|
|
|
|
cat >old.txt <<-\EOF &&
|
|
|
|
line 1A should be marked as oldMoved newMovedAlternate
|
|
|
|
line 1B should be marked as oldMoved newMovedAlternate
|
|
|
|
unchanged
|
|
|
|
line 2A should be marked as oldMoved newMovedAlternate
|
|
|
|
line 2B should be marked as oldMoved newMovedAlternate
|
|
|
|
line 3A should be marked as oldMovedAlternate newMoved
|
|
|
|
line 3B should be marked as oldMovedAlternate newMoved
|
|
|
|
unchanged
|
|
|
|
line 4A should be marked as oldMoved newMovedAlternate
|
|
|
|
line 4B should be marked as oldMoved newMovedAlternate
|
|
|
|
line 5A should be marked as oldMovedAlternate newMoved
|
|
|
|
line 5B should be marked as oldMovedAlternate newMoved
|
|
|
|
line 6A should be marked as oldMoved newMoved
|
|
|
|
line 6B should be marked as oldMoved newMoved
|
|
|
|
EOF
|
|
|
|
cat >new.txt <<-\EOF &&
|
|
|
|
line 1A should be marked as oldMoved newMovedAlternate
|
|
|
|
line 1B should be marked as oldMoved newMovedAlternate
|
|
|
|
unchanged
|
|
|
|
line 3A should be marked as oldMovedAlternate newMoved
|
|
|
|
line 3B should be marked as oldMovedAlternate newMoved
|
|
|
|
line 2A should be marked as oldMoved newMovedAlternate
|
|
|
|
line 2B should be marked as oldMoved newMovedAlternate
|
|
|
|
unchanged
|
|
|
|
line 6A should be marked as oldMoved newMoved
|
|
|
|
line 6B should be marked as oldMoved newMoved
|
|
|
|
line 4A should be marked as oldMoved newMovedAlternate
|
|
|
|
line 4B should be marked as oldMoved newMovedAlternate
|
|
|
|
line 5A should be marked as oldMovedAlternate newMoved
|
|
|
|
line 5B should be marked as oldMovedAlternate newMoved
|
|
|
|
EOF
|
|
|
|
test_expect_code 1 git diff --no-index --color --color-moved=zebra \
|
|
|
|
--color-moved-ws=allow-indentation-change \
|
|
|
|
old.txt new.txt >output &&
|
|
|
|
grep -v index output | test_decode_color >actual &&
|
|
|
|
cat >expected <<-\EOF &&
|
|
|
|
<BOLD>diff --git a/old.txt b/new.txt<RESET>
|
|
|
|
<BOLD>--- a/old.txt<RESET>
|
|
|
|
<BOLD>+++ b/new.txt<RESET>
|
|
|
|
<CYAN>@@ -1,14 +1,14 @@<RESET>
|
|
|
|
<BOLD;MAGENTA>-line 1A should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;MAGENTA>-line 1B should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN> line 1A should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN> line 1B should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
unchanged<RESET>
|
|
|
|
<BOLD;MAGENTA>-line 2A should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;MAGENTA>-line 2B should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;BLUE>-line 3A should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;BLUE>-line 3B should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN> line 3A should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN> line 3B should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;YELLOW>+<RESET><BOLD;YELLOW> line 2A should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;YELLOW>+<RESET><BOLD;YELLOW> line 2B should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
unchanged<RESET>
|
|
|
|
<BOLD;MAGENTA>-line 4A should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;MAGENTA>-line 4B should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;BLUE>-line 5A should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;BLUE>-line 5B should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;MAGENTA>-line 6A should be marked as oldMoved newMoved<RESET>
|
|
|
|
<BOLD;MAGENTA>-line 6B should be marked as oldMoved newMoved<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN> line 6A should be marked as oldMoved newMoved<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN> line 6B should be marked as oldMoved newMoved<RESET>
|
|
|
|
<BOLD;YELLOW>+<RESET><BOLD;YELLOW> line 4A should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;YELLOW>+<RESET><BOLD;YELLOW> line 4B should be marked as oldMoved newMovedAlternate<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN> line 5A should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN> line 5B should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2021-12-09 11:30:00 +01:00
|
|
|
test_expect_success 'short lines of opposite sign do not get marked as moved' '
|
|
|
|
cat >old.txt <<-\EOF &&
|
|
|
|
this line should be marked as moved
|
|
|
|
unchanged
|
|
|
|
unchanged
|
|
|
|
unchanged
|
|
|
|
unchanged
|
|
|
|
too short
|
|
|
|
this line should be marked as oldMoved newMoved
|
|
|
|
this line should be marked as oldMovedAlternate newMoved
|
|
|
|
unchanged 1
|
|
|
|
unchanged 2
|
|
|
|
unchanged 3
|
|
|
|
unchanged 4
|
|
|
|
this line should be marked as oldMoved newMoved/newMovedAlternate
|
|
|
|
EOF
|
|
|
|
cat >new.txt <<-\EOF &&
|
|
|
|
too short
|
|
|
|
unchanged
|
|
|
|
unchanged
|
|
|
|
this line should be marked as moved
|
|
|
|
too short
|
|
|
|
unchanged
|
|
|
|
unchanged
|
|
|
|
this line should be marked as oldMoved newMoved/newMovedAlternate
|
|
|
|
unchanged 1
|
|
|
|
unchanged 2
|
|
|
|
this line should be marked as oldMovedAlternate newMoved
|
|
|
|
this line should be marked as oldMoved newMoved/newMovedAlternate
|
|
|
|
unchanged 3
|
|
|
|
this line should be marked as oldMoved newMoved
|
|
|
|
unchanged 4
|
|
|
|
EOF
|
|
|
|
test_expect_code 1 git diff --no-index --color --color-moved=zebra \
|
|
|
|
old.txt new.txt >output && cat output &&
|
|
|
|
grep -v index output | test_decode_color >actual &&
|
|
|
|
cat >expect <<-\EOF &&
|
|
|
|
<BOLD>diff --git a/old.txt b/new.txt<RESET>
|
|
|
|
<BOLD>--- a/old.txt<RESET>
|
|
|
|
<BOLD>+++ b/new.txt<RESET>
|
|
|
|
<CYAN>@@ -1,13 +1,15 @@<RESET>
|
|
|
|
<BOLD;MAGENTA>-this line should be marked as moved<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>too short<RESET>
|
|
|
|
unchanged<RESET>
|
|
|
|
unchanged<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>this line should be marked as moved<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>too short<RESET>
|
|
|
|
unchanged<RESET>
|
|
|
|
unchanged<RESET>
|
|
|
|
<RED>-too short<RESET>
|
|
|
|
<BOLD;MAGENTA>-this line should be marked as oldMoved newMoved<RESET>
|
|
|
|
<BOLD;BLUE>-this line should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>this line should be marked as oldMoved newMoved/newMovedAlternate<RESET>
|
|
|
|
unchanged 1<RESET>
|
|
|
|
unchanged 2<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>this line should be marked as oldMovedAlternate newMoved<RESET>
|
|
|
|
<BOLD;YELLOW>+<RESET><BOLD;YELLOW>this line should be marked as oldMoved newMoved/newMovedAlternate<RESET>
|
|
|
|
unchanged 3<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>this line should be marked as oldMoved newMoved<RESET>
|
|
|
|
unchanged 4<RESET>
|
|
|
|
<BOLD;MAGENTA>-this line should be marked as oldMoved newMoved/newMovedAlternate<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2017-06-30 22:53:09 +02:00
|
|
|
test_expect_success 'cmd option assumes configured colored-moved' '
|
|
|
|
test_config color.diff.oldMoved "magenta" &&
|
|
|
|
test_config color.diff.newMoved "cyan" &&
|
|
|
|
test_config color.diff.oldMovedAlternative "blue" &&
|
|
|
|
test_config color.diff.newMovedAlternative "yellow" &&
|
|
|
|
test_config color.diff.oldMovedDimmed "normal magenta" &&
|
|
|
|
test_config color.diff.newMovedDimmed "normal cyan" &&
|
|
|
|
test_config color.diff.oldMovedAlternativeDimmed "normal blue" &&
|
|
|
|
test_config color.diff.newMovedAlternativeDimmed "normal yellow" &&
|
|
|
|
test_config diff.colorMoved zebra &&
|
2018-07-17 01:05:36 +02:00
|
|
|
git diff HEAD --no-renames --color-moved --color >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
2017-06-30 22:53:09 +02:00
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
|
|
|
<CYAN>@@ -1,16 +1,16 @@<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<MAGENTA>-long line 1<RESET>
|
|
|
|
<MAGENTA>-long line 2<RESET>
|
|
|
|
<MAGENTA>-long line 3<RESET>
|
2017-06-30 22:53:09 +02:00
|
|
|
line 4<RESET>
|
|
|
|
line 5<RESET>
|
|
|
|
line 6<RESET>
|
|
|
|
line 7<RESET>
|
|
|
|
line 8<RESET>
|
|
|
|
line 9<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<CYAN>+<RESET><CYAN>long line 1<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 2<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 3<RESET>
|
|
|
|
<YELLOW>+<RESET><YELLOW>long line 14<RESET>
|
|
|
|
<YELLOW>+<RESET><YELLOW>long line 15<RESET>
|
|
|
|
<YELLOW>+<RESET><YELLOW>long line 16<RESET>
|
2017-06-30 22:53:09 +02:00
|
|
|
line 10<RESET>
|
|
|
|
line 11<RESET>
|
|
|
|
line 12<RESET>
|
|
|
|
line 13<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<MAGENTA>-long line 14<RESET>
|
|
|
|
<MAGENTA>-long line 15<RESET>
|
|
|
|
<MAGENTA>-long line 16<RESET>
|
2017-06-30 22:53:09 +02:00
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2022-03-17 15:55:34 +01:00
|
|
|
test_expect_success 'no effect on diff from --color-moved with --word-diff' '
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
cat <<-\EOF >text.txt &&
|
|
|
|
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
|
|
|
|
EOF
|
|
|
|
git add text.txt &&
|
|
|
|
git commit -a -m "clean state" &&
|
|
|
|
cat <<-\EOF >text.txt &&
|
|
|
|
simply Lorem Ipsum dummy is text of the typesetting and printing industry.
|
|
|
|
EOF
|
|
|
|
git diff --color-moved --word-diff >actual &&
|
|
|
|
git diff --word-diff >expect &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
diff.c: fix a double-free regression in a18d66cefb
My a18d66cefb9 (diff.c: free "buf" in diff_words_flush(), 2022-03-04)
has what it retrospect is a rather obvious bug (I don't know what I
was thinking, if it all): We use the "emitted_symbols" allocation in
append_emitted_diff_symbol() N times, but starting with a18d66cefb9
we'd free it after its first use!
The correct way to free this data would have been to add the free() to
the existing free_diff_words_data() function, so let's do that. The
"ecbdata->diff_words->opt->emitted_symbols" might be NULL, so let's
add a trivial free_emitted_diff_symbols() helper next to the function
that appends to it.
This fixes the "no effect on show from" leak tested for in the
preceding commit. Perhaps confusingly this change will skip that test
under SANITIZE=leak, but otherwise opt-in the
"t4015-diff-whitespace.sh" test.
The reason is that a18d66cefb9 "fixed" the leak in the preceding "no
effect on diff" test, but for the first call to diff_words_flush() the
"wol->buf" would be NULL, so we wouldn't double-free (and
SANITIZE=address would see nothing amiss). With this change we'll
still pass that test, showing that we've also fixed leaks on this
codepath.
We then have to skip the new "no effect on show" test because it
happens to trip over an unrelated memory leak (in revision.c). The
same goes for "move detection with submodules". Both of them pass with
SANITIZE=address though, which would error on the "no effect on show"
test before this change.
Reported-by: Michael J Gruber <git@grubix.eu>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-17 15:55:35 +01:00
|
|
|
test_expect_success !SANITIZE_LEAK 'no effect on show from --color-moved with --word-diff' '
|
2022-03-17 15:55:34 +01:00
|
|
|
git show --color-moved --word-diff >actual &&
|
|
|
|
git show --word-diff >expect &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2017-10-19 22:24:03 +02:00
|
|
|
test_expect_success 'set up whitespace tests' '
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
git reset --hard &&
|
2017-10-19 22:24:03 +02:00
|
|
|
# Note that these lines have no leading or trailing whitespace.
|
|
|
|
cat <<-\EOF >lines.txt &&
|
|
|
|
line 1
|
|
|
|
line 2
|
|
|
|
line 3
|
|
|
|
line 4
|
2017-10-19 22:25:57 +02:00
|
|
|
line 5
|
2017-08-16 03:27:39 +02:00
|
|
|
long line 6
|
|
|
|
long line 7
|
2017-10-19 22:25:57 +02:00
|
|
|
long line 8
|
|
|
|
long line 9
|
2017-10-19 22:24:03 +02:00
|
|
|
EOF
|
|
|
|
git add lines.txt &&
|
|
|
|
git commit -m "add poetry" &&
|
|
|
|
git config color.diff.oldMoved "magenta" &&
|
|
|
|
git config color.diff.newMoved "cyan"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'move detection ignoring whitespace ' '
|
|
|
|
q_to_tab <<-\EOF >lines.txt &&
|
|
|
|
Qlong line 6
|
|
|
|
Qlong line 7
|
2017-10-19 22:25:57 +02:00
|
|
|
Qlong line 8
|
|
|
|
Qchanged long line 9
|
2017-10-19 22:24:03 +02:00
|
|
|
line 1
|
|
|
|
line 2
|
|
|
|
line 3
|
|
|
|
line 4
|
2017-10-19 22:25:57 +02:00
|
|
|
line 5
|
2017-10-19 22:24:03 +02:00
|
|
|
EOF
|
2018-07-17 01:05:36 +02:00
|
|
|
git diff HEAD --no-renames --color-moved --color >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
2017-10-19 22:25:57 +02:00
|
|
|
<CYAN>@@ -1,9 +1,9 @@<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<GREEN>+<RESET> <GREEN>long line 6<RESET>
|
|
|
|
<GREEN>+<RESET> <GREEN>long line 7<RESET>
|
2017-10-19 22:25:57 +02:00
|
|
|
<GREEN>+<RESET> <GREEN>long line 8<RESET>
|
|
|
|
<GREEN>+<RESET> <GREEN>changed long line 9<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
line 1<RESET>
|
|
|
|
line 2<RESET>
|
|
|
|
line 3<RESET>
|
|
|
|
line 4<RESET>
|
2017-10-19 22:25:57 +02:00
|
|
|
line 5<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<RED>-long line 6<RESET>
|
|
|
|
<RED>-long line 7<RESET>
|
2017-10-19 22:25:57 +02:00
|
|
|
<RED>-long line 8<RESET>
|
|
|
|
<RED>-long line 9<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
EOF
|
|
|
|
test_cmp expected actual &&
|
|
|
|
|
2018-07-17 01:05:40 +02:00
|
|
|
git diff HEAD --no-renames --color-moved --color \
|
|
|
|
--color-moved-ws=ignore-all-space >actual.raw &&
|
2018-07-17 01:05:36 +02:00
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
2017-10-19 22:25:57 +02:00
|
|
|
<CYAN>@@ -1,9 +1,9 @@<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<CYAN>+<RESET> <CYAN>long line 6<RESET>
|
|
|
|
<CYAN>+<RESET> <CYAN>long line 7<RESET>
|
2017-10-19 22:25:57 +02:00
|
|
|
<CYAN>+<RESET> <CYAN>long line 8<RESET>
|
|
|
|
<GREEN>+<RESET> <GREEN>changed long line 9<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
line 1<RESET>
|
|
|
|
line 2<RESET>
|
|
|
|
line 3<RESET>
|
|
|
|
line 4<RESET>
|
2017-10-19 22:25:57 +02:00
|
|
|
line 5<RESET>
|
2017-08-16 03:27:39 +02:00
|
|
|
<MAGENTA>-long line 6<RESET>
|
|
|
|
<MAGENTA>-long line 7<RESET>
|
2017-10-19 22:25:57 +02:00
|
|
|
<MAGENTA>-long line 8<RESET>
|
|
|
|
<RED>-long line 9<RESET>
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2017-10-19 22:26:31 +02:00
|
|
|
test_expect_success 'move detection ignoring whitespace changes' '
|
|
|
|
git reset --hard &&
|
|
|
|
# Lines 6-8 have a space change, but 9 is new whitespace
|
|
|
|
q_to_tab <<-\EOF >lines.txt &&
|
|
|
|
longQline 6
|
|
|
|
longQline 7
|
|
|
|
longQline 8
|
|
|
|
long liQne 9
|
|
|
|
line 1
|
|
|
|
line 2
|
|
|
|
line 3
|
|
|
|
line 4
|
|
|
|
line 5
|
|
|
|
EOF
|
|
|
|
|
2018-07-17 01:05:36 +02:00
|
|
|
git diff HEAD --no-renames --color-moved --color >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
2017-10-19 22:26:31 +02:00
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
|
|
|
<CYAN>@@ -1,9 +1,9 @@<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long line 6<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long line 7<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long line 8<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long li ne 9<RESET>
|
|
|
|
line 1<RESET>
|
|
|
|
line 2<RESET>
|
|
|
|
line 3<RESET>
|
|
|
|
line 4<RESET>
|
|
|
|
line 5<RESET>
|
|
|
|
<RED>-long line 6<RESET>
|
|
|
|
<RED>-long line 7<RESET>
|
|
|
|
<RED>-long line 8<RESET>
|
|
|
|
<RED>-long line 9<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expected actual &&
|
|
|
|
|
2018-07-17 01:05:40 +02:00
|
|
|
git diff HEAD --no-renames --color-moved --color \
|
|
|
|
--color-moved-ws=ignore-space-change >actual.raw &&
|
2018-07-17 01:05:36 +02:00
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
2017-10-19 22:26:31 +02:00
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
|
|
|
<CYAN>@@ -1,9 +1,9 @@<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 6<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 7<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 8<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long li ne 9<RESET>
|
|
|
|
line 1<RESET>
|
|
|
|
line 2<RESET>
|
|
|
|
line 3<RESET>
|
|
|
|
line 4<RESET>
|
|
|
|
line 5<RESET>
|
|
|
|
<MAGENTA>-long line 6<RESET>
|
|
|
|
<MAGENTA>-long line 7<RESET>
|
|
|
|
<MAGENTA>-long line 8<RESET>
|
|
|
|
<RED>-long line 9<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
diff: fix whitespace-skipping with --color-moved
The code for handling whitespace with --color-moved
represents partial strings as a pair of pointers. There are
two possible conventions for the end pointer:
1. It points to the byte right after the end of the
string.
2. It points to the final byte of the string.
But we seem to use both conventions in the code:
a. we assign the initial pointers from the NUL-terminated
string using (1)
b. we eat trailing whitespace by checking the second
pointer for isspace(), which needs (2)
c. the next_byte() function checks for end-of-string with
"if (cp > endp)", which is (2)
d. in next_byte() we skip past internal whitespace with
"while (cp < end)", which is (1)
This creates fewer bugs than you might think, because there
are some subtle interactions. Because of (a) and (c), we
always return the NUL-terminator from next_byte(). But all
of the callers of next_byte() happen to handle that
gracefully.
Because of the mismatch between (d) and (c), next_byte()
could accidentally return a whitespace character right at
endp. But because of the interaction of (a) and (b), we fail
to actually chomp trailing whitespace, meaning our endp
_always_ points to a NUL, canceling out the problem.
But that does leave (b) as a real bug: when ignoring
whitespace only at the end-of-line, we don't correctly trim
it, and fail to match up lines.
We can fix the whole thing by moving consistently to one
convention. Since convention (1) is idiomatic in our code
base, we'll pick that one.
The existing "-w" and "-b" tests continue to pass, and a new
"--ignore-space-at-eol" shows off the breakage we're fixing.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-19 22:29:26 +02:00
|
|
|
test_expect_success 'move detection ignoring whitespace at eol' '
|
|
|
|
git reset --hard &&
|
|
|
|
# Lines 6-9 have new eol whitespace, but 9 also has it in the middle
|
|
|
|
q_to_tab <<-\EOF >lines.txt &&
|
|
|
|
long line 6Q
|
|
|
|
long line 7Q
|
|
|
|
long line 8Q
|
|
|
|
longQline 9Q
|
|
|
|
line 1
|
|
|
|
line 2
|
|
|
|
line 3
|
|
|
|
line 4
|
|
|
|
line 5
|
|
|
|
EOF
|
|
|
|
|
|
|
|
# avoid cluttering the output with complaints about our eol whitespace
|
|
|
|
test_config core.whitespace -blank-at-eol &&
|
|
|
|
|
2018-07-17 01:05:36 +02:00
|
|
|
git diff HEAD --no-renames --color-moved --color >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
diff: fix whitespace-skipping with --color-moved
The code for handling whitespace with --color-moved
represents partial strings as a pair of pointers. There are
two possible conventions for the end pointer:
1. It points to the byte right after the end of the
string.
2. It points to the final byte of the string.
But we seem to use both conventions in the code:
a. we assign the initial pointers from the NUL-terminated
string using (1)
b. we eat trailing whitespace by checking the second
pointer for isspace(), which needs (2)
c. the next_byte() function checks for end-of-string with
"if (cp > endp)", which is (2)
d. in next_byte() we skip past internal whitespace with
"while (cp < end)", which is (1)
This creates fewer bugs than you might think, because there
are some subtle interactions. Because of (a) and (c), we
always return the NUL-terminator from next_byte(). But all
of the callers of next_byte() happen to handle that
gracefully.
Because of the mismatch between (d) and (c), next_byte()
could accidentally return a whitespace character right at
endp. But because of the interaction of (a) and (b), we fail
to actually chomp trailing whitespace, meaning our endp
_always_ points to a NUL, canceling out the problem.
But that does leave (b) as a real bug: when ignoring
whitespace only at the end-of-line, we don't correctly trim
it, and fail to match up lines.
We can fix the whole thing by moving consistently to one
convention. Since convention (1) is idiomatic in our code
base, we'll pick that one.
The existing "-w" and "-b" tests continue to pass, and a new
"--ignore-space-at-eol" shows off the breakage we're fixing.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-19 22:29:26 +02:00
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
|
|
|
<CYAN>@@ -1,9 +1,9 @@<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long line 6 <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long line 7 <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long line 8 <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long line 9 <RESET>
|
|
|
|
line 1<RESET>
|
|
|
|
line 2<RESET>
|
|
|
|
line 3<RESET>
|
|
|
|
line 4<RESET>
|
|
|
|
line 5<RESET>
|
|
|
|
<RED>-long line 6<RESET>
|
|
|
|
<RED>-long line 7<RESET>
|
|
|
|
<RED>-long line 8<RESET>
|
|
|
|
<RED>-long line 9<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expected actual &&
|
|
|
|
|
2018-07-17 01:05:40 +02:00
|
|
|
git diff HEAD --no-renames --color-moved --color \
|
|
|
|
--color-moved-ws=ignore-space-at-eol >actual.raw &&
|
2018-07-17 01:05:36 +02:00
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
diff: fix whitespace-skipping with --color-moved
The code for handling whitespace with --color-moved
represents partial strings as a pair of pointers. There are
two possible conventions for the end pointer:
1. It points to the byte right after the end of the
string.
2. It points to the final byte of the string.
But we seem to use both conventions in the code:
a. we assign the initial pointers from the NUL-terminated
string using (1)
b. we eat trailing whitespace by checking the second
pointer for isspace(), which needs (2)
c. the next_byte() function checks for end-of-string with
"if (cp > endp)", which is (2)
d. in next_byte() we skip past internal whitespace with
"while (cp < end)", which is (1)
This creates fewer bugs than you might think, because there
are some subtle interactions. Because of (a) and (c), we
always return the NUL-terminator from next_byte(). But all
of the callers of next_byte() happen to handle that
gracefully.
Because of the mismatch between (d) and (c), next_byte()
could accidentally return a whitespace character right at
endp. But because of the interaction of (a) and (b), we fail
to actually chomp trailing whitespace, meaning our endp
_always_ points to a NUL, canceling out the problem.
But that does leave (b) as a real bug: when ignoring
whitespace only at the end-of-line, we don't correctly trim
it, and fail to match up lines.
We can fix the whole thing by moving consistently to one
convention. Since convention (1) is idiomatic in our code
base, we'll pick that one.
The existing "-w" and "-b" tests continue to pass, and a new
"--ignore-space-at-eol" shows off the breakage we're fixing.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-19 22:29:26 +02:00
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/lines.txt b/lines.txt<RESET>
|
|
|
|
<BOLD>--- a/lines.txt<RESET>
|
|
|
|
<BOLD>+++ b/lines.txt<RESET>
|
|
|
|
<CYAN>@@ -1,9 +1,9 @@<RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 6 <RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 7 <RESET>
|
|
|
|
<CYAN>+<RESET><CYAN>long line 8 <RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>long line 9 <RESET>
|
|
|
|
line 1<RESET>
|
|
|
|
line 2<RESET>
|
|
|
|
line 3<RESET>
|
|
|
|
line 4<RESET>
|
|
|
|
line 5<RESET>
|
|
|
|
<MAGENTA>-long line 6<RESET>
|
|
|
|
<MAGENTA>-long line 7<RESET>
|
|
|
|
<MAGENTA>-long line 8<RESET>
|
|
|
|
<RED>-long line 9<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2017-10-19 22:24:03 +02:00
|
|
|
test_expect_success 'clean up whitespace-test colors' '
|
|
|
|
git config --unset color.diff.oldMoved &&
|
|
|
|
git config --unset color.diff.newMoved
|
|
|
|
'
|
|
|
|
|
2017-08-16 03:27:39 +02:00
|
|
|
test_expect_success '--color-moved block at end of diff output respects MIN_ALNUM_COUNT' '
|
2017-08-16 03:27:38 +02:00
|
|
|
git reset --hard &&
|
|
|
|
>bar &&
|
|
|
|
cat <<-\EOF >foo &&
|
|
|
|
irrelevant_line
|
|
|
|
line1
|
|
|
|
EOF
|
|
|
|
git add foo bar &&
|
|
|
|
git commit -m x &&
|
|
|
|
|
|
|
|
cat <<-\EOF >bar &&
|
|
|
|
line1
|
|
|
|
EOF
|
|
|
|
cat <<-\EOF >foo &&
|
|
|
|
irrelevant_line
|
|
|
|
EOF
|
|
|
|
|
2018-07-17 01:05:36 +02:00
|
|
|
git diff HEAD --color-moved=zebra --color --no-renames >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
2017-08-16 03:27:38 +02:00
|
|
|
cat >expected <<-\EOF &&
|
|
|
|
<BOLD>diff --git a/bar b/bar<RESET>
|
|
|
|
<BOLD>--- a/bar<RESET>
|
|
|
|
<BOLD>+++ b/bar<RESET>
|
|
|
|
<CYAN>@@ -0,0 +1 @@<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>line1<RESET>
|
|
|
|
<BOLD>diff --git a/foo b/foo<RESET>
|
|
|
|
<BOLD>--- a/foo<RESET>
|
|
|
|
<BOLD>+++ b/foo<RESET>
|
|
|
|
<CYAN>@@ -1,2 +1 @@<RESET>
|
|
|
|
irrelevant_line<RESET>
|
|
|
|
<RED>-line1<RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2017-08-16 03:27:39 +02:00
|
|
|
test_expect_success '--color-moved respects MIN_ALNUM_COUNT' '
|
|
|
|
git reset --hard &&
|
|
|
|
cat <<-\EOF >foo &&
|
|
|
|
nineteen chars 456789
|
|
|
|
irrelevant_line
|
|
|
|
twenty chars 234567890
|
|
|
|
EOF
|
|
|
|
>bar &&
|
|
|
|
git add foo bar &&
|
|
|
|
git commit -m x &&
|
|
|
|
|
|
|
|
cat <<-\EOF >foo &&
|
|
|
|
irrelevant_line
|
|
|
|
EOF
|
|
|
|
cat <<-\EOF >bar &&
|
|
|
|
twenty chars 234567890
|
|
|
|
nineteen chars 456789
|
|
|
|
EOF
|
|
|
|
|
2018-07-17 01:05:36 +02:00
|
|
|
git diff HEAD --color-moved=zebra --color --no-renames >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
2017-08-16 03:27:39 +02:00
|
|
|
cat >expected <<-\EOF &&
|
|
|
|
<BOLD>diff --git a/bar b/bar<RESET>
|
|
|
|
<BOLD>--- a/bar<RESET>
|
|
|
|
<BOLD>+++ b/bar<RESET>
|
|
|
|
<CYAN>@@ -0,0 +1,2 @@<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>twenty chars 234567890<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>nineteen chars 456789<RESET>
|
|
|
|
<BOLD>diff --git a/foo b/foo<RESET>
|
|
|
|
<BOLD>--- a/foo<RESET>
|
|
|
|
<BOLD>+++ b/foo<RESET>
|
|
|
|
<CYAN>@@ -1,3 +1 @@<RESET>
|
|
|
|
<RED>-nineteen chars 456789<RESET>
|
|
|
|
irrelevant_line<RESET>
|
|
|
|
<BOLD;MAGENTA>-twenty chars 234567890<RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '--color-moved treats adjacent blocks as separate for MIN_ALNUM_COUNT' '
|
|
|
|
git reset --hard &&
|
|
|
|
cat <<-\EOF >foo &&
|
|
|
|
7charsA
|
|
|
|
irrelevant_line
|
|
|
|
7charsB
|
|
|
|
7charsC
|
|
|
|
EOF
|
|
|
|
>bar &&
|
|
|
|
git add foo bar &&
|
|
|
|
git commit -m x &&
|
|
|
|
|
|
|
|
cat <<-\EOF >foo &&
|
|
|
|
irrelevant_line
|
|
|
|
EOF
|
|
|
|
cat <<-\EOF >bar &&
|
|
|
|
7charsB
|
|
|
|
7charsC
|
|
|
|
7charsA
|
|
|
|
EOF
|
|
|
|
|
2018-07-17 01:05:39 +02:00
|
|
|
git diff HEAD --color-moved=zebra --color --no-renames >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
2017-08-16 03:27:39 +02:00
|
|
|
cat >expected <<-\EOF &&
|
|
|
|
<BOLD>diff --git a/bar b/bar<RESET>
|
|
|
|
<BOLD>--- a/bar<RESET>
|
|
|
|
<BOLD>+++ b/bar<RESET>
|
|
|
|
<CYAN>@@ -0,0 +1,3 @@<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>7charsB<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>7charsC<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>7charsA<RESET>
|
|
|
|
<BOLD>diff --git a/foo b/foo<RESET>
|
|
|
|
<BOLD>--- a/foo<RESET>
|
|
|
|
<BOLD>+++ b/foo<RESET>
|
|
|
|
<CYAN>@@ -1,4 +1 @@<RESET>
|
|
|
|
<RED>-7charsA<RESET>
|
|
|
|
irrelevant_line<RESET>
|
|
|
|
<RED>-7charsB<RESET>
|
|
|
|
<RED>-7charsC<RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2021-12-09 11:29:58 +01:00
|
|
|
test_expect_success '--color-moved rewinds for MIN_ALNUM_COUNT' '
|
|
|
|
git reset --hard &&
|
|
|
|
test_write_lines >file \
|
|
|
|
A B C one two three four five six seven D E F G H I J &&
|
|
|
|
git add file &&
|
|
|
|
test_write_lines >file \
|
|
|
|
one two A B C D E F G H I J two three four five six seven &&
|
|
|
|
git diff --color-moved=zebra -- file &&
|
|
|
|
|
|
|
|
git diff --color-moved=zebra --color -- file >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
|
|
|
cat >expected <<-\EOF &&
|
|
|
|
<BOLD>diff --git a/file b/file<RESET>
|
|
|
|
<BOLD>--- a/file<RESET>
|
|
|
|
<BOLD>+++ b/file<RESET>
|
|
|
|
<CYAN>@@ -1,13 +1,8 @@<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>one<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>two<RESET>
|
|
|
|
A<RESET>
|
|
|
|
B<RESET>
|
|
|
|
C<RESET>
|
|
|
|
<RED>-one<RESET>
|
|
|
|
<BOLD;MAGENTA>-two<RESET>
|
|
|
|
<BOLD;MAGENTA>-three<RESET>
|
|
|
|
<BOLD;MAGENTA>-four<RESET>
|
|
|
|
<BOLD;MAGENTA>-five<RESET>
|
|
|
|
<BOLD;MAGENTA>-six<RESET>
|
|
|
|
<BOLD;MAGENTA>-seven<RESET>
|
|
|
|
D<RESET>
|
|
|
|
E<RESET>
|
|
|
|
F<RESET>
|
|
|
|
<CYAN>@@ -15,3 +10,9 @@<RESET> <RESET>G<RESET>
|
|
|
|
H<RESET>
|
|
|
|
I<RESET>
|
|
|
|
J<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>two<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>three<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>four<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>five<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>six<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>seven<RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
diff.c: fix a double-free regression in a18d66cefb
My a18d66cefb9 (diff.c: free "buf" in diff_words_flush(), 2022-03-04)
has what it retrospect is a rather obvious bug (I don't know what I
was thinking, if it all): We use the "emitted_symbols" allocation in
append_emitted_diff_symbol() N times, but starting with a18d66cefb9
we'd free it after its first use!
The correct way to free this data would have been to add the free() to
the existing free_diff_words_data() function, so let's do that. The
"ecbdata->diff_words->opt->emitted_symbols" might be NULL, so let's
add a trivial free_emitted_diff_symbols() helper next to the function
that appends to it.
This fixes the "no effect on show from" leak tested for in the
preceding commit. Perhaps confusingly this change will skip that test
under SANITIZE=leak, but otherwise opt-in the
"t4015-diff-whitespace.sh" test.
The reason is that a18d66cefb9 "fixed" the leak in the preceding "no
effect on diff" test, but for the first call to diff_words_flush() the
"wol->buf" would be NULL, so we wouldn't double-free (and
SANITIZE=address would see nothing amiss). With this change we'll
still pass that test, showing that we've also fixed leaks on this
codepath.
We then have to skip the new "no effect on show" test because it
happens to trip over an unrelated memory leak (in revision.c). The
same goes for "move detection with submodules". Both of them pass with
SANITIZE=address though, which would error on the "no effect on show"
test before this change.
Reported-by: Michael J Gruber <git@grubix.eu>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-17 15:55:35 +01:00
|
|
|
test_expect_success !SANITIZE_LEAK 'move detection with submodules' '
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
test_create_repo bananas &&
|
|
|
|
echo ripe >bananas/recipe &&
|
|
|
|
git -C bananas add recipe &&
|
|
|
|
test_commit fruit &&
|
|
|
|
test_commit -C bananas recipe &&
|
|
|
|
git submodule add ./bananas &&
|
|
|
|
git add bananas &&
|
|
|
|
git commit -a -m "bananas are like a heavy library?" &&
|
|
|
|
echo foul >bananas/recipe &&
|
|
|
|
echo ripe >fruit.t &&
|
|
|
|
|
2017-10-03 15:41:51 +02:00
|
|
|
git diff --submodule=diff --color-moved --color >actual &&
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
|
|
|
|
# no move detection as the moved line is across repository boundaries.
|
|
|
|
test_decode_color <actual >decoded_actual &&
|
|
|
|
! grep BGREEN decoded_actual &&
|
|
|
|
! grep BRED decoded_actual &&
|
|
|
|
|
|
|
|
# nor did we mess with it another way
|
2019-11-27 20:53:24 +01:00
|
|
|
git diff --submodule=diff --color >expect.raw &&
|
|
|
|
test_decode_color <expect.raw >expect &&
|
2018-07-17 01:05:40 +02:00
|
|
|
test_cmp expect decoded_actual &&
|
|
|
|
rm -rf bananas &&
|
|
|
|
git submodule deinit bananas
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'only move detection ignores white spaces' '
|
|
|
|
git reset --hard &&
|
|
|
|
q_to_tab <<-\EOF >text.txt &&
|
|
|
|
a long line to exceed per-line minimum
|
|
|
|
another long line to exceed per-line minimum
|
|
|
|
original file
|
|
|
|
EOF
|
|
|
|
git add text.txt &&
|
|
|
|
git commit -m "add text" &&
|
|
|
|
q_to_tab <<-\EOF >text.txt &&
|
|
|
|
Qa long line to exceed per-line minimum
|
|
|
|
Qanother long line to exceed per-line minimum
|
|
|
|
new file
|
|
|
|
EOF
|
|
|
|
|
|
|
|
# Make sure we get a different diff using -w
|
|
|
|
git diff --color --color-moved -w >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
|
|
|
q_to_tab <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/text.txt b/text.txt<RESET>
|
|
|
|
<BOLD>--- a/text.txt<RESET>
|
|
|
|
<BOLD>+++ b/text.txt<RESET>
|
|
|
|
<CYAN>@@ -1,3 +1,3 @@<RESET>
|
|
|
|
Qa long line to exceed per-line minimum<RESET>
|
|
|
|
Qanother long line to exceed per-line minimum<RESET>
|
|
|
|
<RED>-original file<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>new file<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expected actual &&
|
|
|
|
|
|
|
|
# And now ignoring white space only in the move detection
|
|
|
|
git diff --color --color-moved \
|
|
|
|
--color-moved-ws=ignore-all-space,ignore-space-change,ignore-space-at-eol >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
|
|
|
q_to_tab <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/text.txt b/text.txt<RESET>
|
|
|
|
<BOLD>--- a/text.txt<RESET>
|
|
|
|
<BOLD>+++ b/text.txt<RESET>
|
|
|
|
<CYAN>@@ -1,3 +1,3 @@<RESET>
|
|
|
|
<BOLD;MAGENTA>-a long line to exceed per-line minimum<RESET>
|
|
|
|
<BOLD;MAGENTA>-another long line to exceed per-line minimum<RESET>
|
|
|
|
<RED>-original file<RESET>
|
2018-11-23 12:16:55 +01:00
|
|
|
<BOLD;CYAN>+<RESET>Q<BOLD;CYAN>a long line to exceed per-line minimum<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>Q<BOLD;CYAN>another long line to exceed per-line minimum<RESET>
|
2018-07-17 01:05:40 +02:00
|
|
|
<GREEN>+<RESET><GREEN>new file<RESET>
|
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
diff.c: color moved lines differently
When a patch consists mostly of moving blocks of code around, it can
be quite tedious to ensure that the blocks are moved verbatim, and not
undesirably modified in the move. To that end, color blocks that are
moved within the same patch differently. For example (OM, del, add,
and NM are different colors):
[OM] -void sensitive_stuff(void)
[OM] -{
[OM] - if (!is_authorized_user())
[OM] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OM] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NM] + sensitive_stuff(spanning,
[NM] + multiple,
[NM] + lines);
[NM] +}
However adjacent blocks may be problematic. For example, in this
potentially malicious patch, the swapping of blocks can be spotted:
[OM] -void sensitive_stuff(void)
[OM] -{
[OMA] - if (!is_authorized_user())
[OMA] - die("unauthorized");
[OM] - sensitive_stuff(spanning,
[OM] - multiple,
[OM] - lines);
[OMA] -}
void another_function()
{
[del] - printf("foo");
[add] + printf("bar");
}
[NM] +void sensitive_stuff(void)
[NM] +{
[NMA] + sensitive_stuff(spanning,
[NMA] + multiple,
[NMA] + lines);
[NM] + if (!is_authorized_user())
[NM] + die("unauthorized");
[NMA] +}
If the moved code is larger, it is easier to hide some permutation in the
code, which is why some alternative coloring is needed.
This patch implements the first mode:
* basic alternating 'Zebra' mode
This conveys all information needed to the user. Defer customization to
later patches.
First I implemented an alternative design, which would try to fingerprint
a line by its neighbors to detect if we are in a block or at the boundary.
This idea iss error prone as it inspected each line and its neighboring
lines to determine if the line was (a) moved and (b) if was deep inside
a hunk by having matching neighboring lines. This is unreliable as the
we can construct hunks which have equal neighbors that just exceed the
number of lines inspected. (Think of 'AXYZBXYZCXYZD..' with each letter
as a line, that is permutated to AXYZCXYZBXYZD..').
Instead this provides a dynamic programming greedy algorithm that finds
the largest moved hunk and then has several modes on highlighting bounds.
A note on the options '--submodule=diff' and '--color-words/--word-diff':
In the conversion to use emit_line in the prior patches both submodules
as well as word diff output carefully chose to call emit_line with sign=0.
All output with sign=0 is ignored for move detection purposes in this
patch, such that no weird looking output will be generated for these
cases. This leads to another thought: We could pass on '--color-moved' to
submodules such that they color up moved lines for themselves. If we'd do
so only line moves within a repository boundary are marked up.
It is useful to have moved lines colored, but there are annoying corner
cases, such as a single line moved, that is very common. For example
in a typical patch of C code, we have closing braces that end statement
blocks or functions.
While it is technically true that these lines are moved as they show up
elsewhere, it is harmful for the review as the reviewers attention is
drawn to such a minor side annoyance.
For now let's have a simple solution of hardcoding the number of
moved lines to be at least 3 before coloring them. Note, that the
length is applied across all blocks to find the 'lonely' blocks
that pollute new code, but do not interfere with a permutated
block where each permutation has less lines than 3.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-30 22:53:07 +02:00
|
|
|
'
|
|
|
|
|
diff.c: add white space mode to move detection that allows indent changes
The option of --color-moved has proven to be useful as observed on the
mailing list. However when refactoring sometimes the indentation changes,
for example when partitioning a functions into smaller helper functions
the code usually mostly moved around except for a decrease in indentation.
To just review the moved code ignoring the change in indentation, a mode
to ignore spaces in the move detection as implemented in a previous patch
would be enough. However the whole move coloring as motivated in commit
2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought
up the notion of the reviewer being able to trust the move of a "block".
As there are languages such as python, which depend on proper relative
indentation for the control flow of the program, ignoring any white space
change in a block would not uphold the promises of 2e2d5ac that allows
reviewers to pay less attention to the inside of a block, as inside
the reviewer wants to assume the same program flow.
This new mode of white space ignorance will take this into account and will
only allow the same white space changes per line in each block. This patch
even allows only for the same change at the beginning of the lines.
As this is a white space mode, it is made exclusive to other white space
modes in the move detection.
This patch brings some challenges, related to the detection of blocks.
We need a wide net to catch the possible moved lines, but then need to
narrow down to check if the blocks are still intact. Consider this
example (ignoring block sizes):
- A
- B
- C
+ A
+ B
+ C
At the beginning of a block when checking if there is a counterpart
for A, we have to ignore all space changes. However at the following
lines we have to check if the indent change stayed the same.
Checking if the indentation change did stay the same, is done by computing
the indentation change by the difference in line length, and then assume
the change is only in the beginning of the longer line, the common tail
is the same. That is why the test contains lines like:
- <TAB> A
...
+ A <TAB>
...
As the first line starting a block is caught using a compare function that
ignores white spaces unlike the rest of the block, where the white space
delta is taken into account for the comparison, we also have to think about
the following situation:
- A
- B
- A
- B
+ A
+ B
+ A
+ B
When checking if the first A (both in the + and - lines) is a start of
a block, we have to check all 'A' and record all the white space deltas
such that we can find the example above to be just one block that is
indented.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-18 21:31:55 +02:00
|
|
|
test_expect_success 'compare whitespace delta across moved blocks' '
|
|
|
|
|
|
|
|
git reset --hard &&
|
|
|
|
q_to_tab <<-\EOF >text.txt &&
|
|
|
|
QIndented
|
|
|
|
QText across
|
|
|
|
Qsome lines
|
|
|
|
QBut! <- this stands out
|
|
|
|
QAdjusting with
|
|
|
|
QQdifferent starting
|
|
|
|
Qwhite spaces
|
|
|
|
QAnother outlier
|
|
|
|
QQQIndented
|
|
|
|
QQQText across
|
|
|
|
QQQfive lines
|
|
|
|
QQQthat has similar lines
|
|
|
|
QQQto previous blocks, but with different indent
|
|
|
|
QQQYetQAnotherQoutlierQ
|
2018-11-23 12:16:53 +01:00
|
|
|
QLine with internal w h i t e s p a c e change
|
diff.c: add white space mode to move detection that allows indent changes
The option of --color-moved has proven to be useful as observed on the
mailing list. However when refactoring sometimes the indentation changes,
for example when partitioning a functions into smaller helper functions
the code usually mostly moved around except for a decrease in indentation.
To just review the moved code ignoring the change in indentation, a mode
to ignore spaces in the move detection as implemented in a previous patch
would be enough. However the whole move coloring as motivated in commit
2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought
up the notion of the reviewer being able to trust the move of a "block".
As there are languages such as python, which depend on proper relative
indentation for the control flow of the program, ignoring any white space
change in a block would not uphold the promises of 2e2d5ac that allows
reviewers to pay less attention to the inside of a block, as inside
the reviewer wants to assume the same program flow.
This new mode of white space ignorance will take this into account and will
only allow the same white space changes per line in each block. This patch
even allows only for the same change at the beginning of the lines.
As this is a white space mode, it is made exclusive to other white space
modes in the move detection.
This patch brings some challenges, related to the detection of blocks.
We need a wide net to catch the possible moved lines, but then need to
narrow down to check if the blocks are still intact. Consider this
example (ignoring block sizes):
- A
- B
- C
+ A
+ B
+ C
At the beginning of a block when checking if there is a counterpart
for A, we have to ignore all space changes. However at the following
lines we have to check if the indent change stayed the same.
Checking if the indentation change did stay the same, is done by computing
the indentation change by the difference in line length, and then assume
the change is only in the beginning of the longer line, the common tail
is the same. That is why the test contains lines like:
- <TAB> A
...
+ A <TAB>
...
As the first line starting a block is caught using a compare function that
ignores white spaces unlike the rest of the block, where the white space
delta is taken into account for the comparison, we also have to think about
the following situation:
- A
- B
- A
- B
+ A
+ B
+ A
+ B
When checking if the first A (both in the + and - lines) is a start of
a block, we have to check all 'A' and record all the white space deltas
such that we can find the example above to be just one block that is
indented.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-18 21:31:55 +02:00
|
|
|
EOF
|
|
|
|
|
|
|
|
git add text.txt &&
|
|
|
|
git commit -m "add text.txt" &&
|
|
|
|
|
|
|
|
q_to_tab <<-\EOF >text.txt &&
|
|
|
|
QQIndented
|
|
|
|
QQText across
|
|
|
|
QQsome lines
|
|
|
|
QQQBut! <- this stands out
|
|
|
|
Adjusting with
|
|
|
|
Qdifferent starting
|
|
|
|
white spaces
|
|
|
|
AnotherQoutlier
|
|
|
|
QQIndented
|
|
|
|
QQText across
|
|
|
|
QQfive lines
|
|
|
|
QQthat has similar lines
|
|
|
|
QQto previous blocks, but with different indent
|
|
|
|
QQYetQAnotherQoutlier
|
2018-11-23 12:16:53 +01:00
|
|
|
QLine with internal whitespace change
|
diff.c: add white space mode to move detection that allows indent changes
The option of --color-moved has proven to be useful as observed on the
mailing list. However when refactoring sometimes the indentation changes,
for example when partitioning a functions into smaller helper functions
the code usually mostly moved around except for a decrease in indentation.
To just review the moved code ignoring the change in indentation, a mode
to ignore spaces in the move detection as implemented in a previous patch
would be enough. However the whole move coloring as motivated in commit
2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought
up the notion of the reviewer being able to trust the move of a "block".
As there are languages such as python, which depend on proper relative
indentation for the control flow of the program, ignoring any white space
change in a block would not uphold the promises of 2e2d5ac that allows
reviewers to pay less attention to the inside of a block, as inside
the reviewer wants to assume the same program flow.
This new mode of white space ignorance will take this into account and will
only allow the same white space changes per line in each block. This patch
even allows only for the same change at the beginning of the lines.
As this is a white space mode, it is made exclusive to other white space
modes in the move detection.
This patch brings some challenges, related to the detection of blocks.
We need a wide net to catch the possible moved lines, but then need to
narrow down to check if the blocks are still intact. Consider this
example (ignoring block sizes):
- A
- B
- C
+ A
+ B
+ C
At the beginning of a block when checking if there is a counterpart
for A, we have to ignore all space changes. However at the following
lines we have to check if the indent change stayed the same.
Checking if the indentation change did stay the same, is done by computing
the indentation change by the difference in line length, and then assume
the change is only in the beginning of the longer line, the common tail
is the same. That is why the test contains lines like:
- <TAB> A
...
+ A <TAB>
...
As the first line starting a block is caught using a compare function that
ignores white spaces unlike the rest of the block, where the white space
delta is taken into account for the comparison, we also have to think about
the following situation:
- A
- B
- A
- B
+ A
+ B
+ A
+ B
When checking if the first A (both in the + and - lines) is a start of
a block, we have to check all 'A' and record all the white space deltas
such that we can find the example above to be just one block that is
indented.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-18 21:31:55 +02:00
|
|
|
EOF
|
|
|
|
|
|
|
|
git diff --color --color-moved --color-moved-ws=allow-indentation-change >actual.raw &&
|
|
|
|
grep -v "index" actual.raw | test_decode_color >actual &&
|
|
|
|
|
|
|
|
q_to_tab <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/text.txt b/text.txt<RESET>
|
|
|
|
<BOLD>--- a/text.txt<RESET>
|
|
|
|
<BOLD>+++ b/text.txt<RESET>
|
2018-11-23 12:16:53 +01:00
|
|
|
<CYAN>@@ -1,15 +1,15 @@<RESET>
|
diff.c: add white space mode to move detection that allows indent changes
The option of --color-moved has proven to be useful as observed on the
mailing list. However when refactoring sometimes the indentation changes,
for example when partitioning a functions into smaller helper functions
the code usually mostly moved around except for a decrease in indentation.
To just review the moved code ignoring the change in indentation, a mode
to ignore spaces in the move detection as implemented in a previous patch
would be enough. However the whole move coloring as motivated in commit
2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought
up the notion of the reviewer being able to trust the move of a "block".
As there are languages such as python, which depend on proper relative
indentation for the control flow of the program, ignoring any white space
change in a block would not uphold the promises of 2e2d5ac that allows
reviewers to pay less attention to the inside of a block, as inside
the reviewer wants to assume the same program flow.
This new mode of white space ignorance will take this into account and will
only allow the same white space changes per line in each block. This patch
even allows only for the same change at the beginning of the lines.
As this is a white space mode, it is made exclusive to other white space
modes in the move detection.
This patch brings some challenges, related to the detection of blocks.
We need a wide net to catch the possible moved lines, but then need to
narrow down to check if the blocks are still intact. Consider this
example (ignoring block sizes):
- A
- B
- C
+ A
+ B
+ C
At the beginning of a block when checking if there is a counterpart
for A, we have to ignore all space changes. However at the following
lines we have to check if the indent change stayed the same.
Checking if the indentation change did stay the same, is done by computing
the indentation change by the difference in line length, and then assume
the change is only in the beginning of the longer line, the common tail
is the same. That is why the test contains lines like:
- <TAB> A
...
+ A <TAB>
...
As the first line starting a block is caught using a compare function that
ignores white spaces unlike the rest of the block, where the white space
delta is taken into account for the comparison, we also have to think about
the following situation:
- A
- B
- A
- B
+ A
+ B
+ A
+ B
When checking if the first A (both in the + and - lines) is a start of
a block, we have to check all 'A' and record all the white space deltas
such that we can find the example above to be just one block that is
indented.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-18 21:31:55 +02:00
|
|
|
<BOLD;MAGENTA>-QIndented<RESET>
|
|
|
|
<BOLD;MAGENTA>-QText across<RESET>
|
|
|
|
<BOLD;MAGENTA>-Qsome lines<RESET>
|
|
|
|
<RED>-QBut! <- this stands out<RESET>
|
|
|
|
<BOLD;MAGENTA>-QAdjusting with<RESET>
|
|
|
|
<BOLD;MAGENTA>-QQdifferent starting<RESET>
|
|
|
|
<BOLD;MAGENTA>-Qwhite spaces<RESET>
|
|
|
|
<RED>-QAnother outlier<RESET>
|
|
|
|
<BOLD;MAGENTA>-QQQIndented<RESET>
|
|
|
|
<BOLD;MAGENTA>-QQQText across<RESET>
|
|
|
|
<BOLD;MAGENTA>-QQQfive lines<RESET>
|
|
|
|
<BOLD;MAGENTA>-QQQthat has similar lines<RESET>
|
|
|
|
<BOLD;MAGENTA>-QQQto previous blocks, but with different indent<RESET>
|
|
|
|
<RED>-QQQYetQAnotherQoutlierQ<RESET>
|
2018-11-23 12:16:53 +01:00
|
|
|
<RED>-QLine with internal w h i t e s p a c e change<RESET>
|
diff.c: add white space mode to move detection that allows indent changes
The option of --color-moved has proven to be useful as observed on the
mailing list. However when refactoring sometimes the indentation changes,
for example when partitioning a functions into smaller helper functions
the code usually mostly moved around except for a decrease in indentation.
To just review the moved code ignoring the change in indentation, a mode
to ignore spaces in the move detection as implemented in a previous patch
would be enough. However the whole move coloring as motivated in commit
2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought
up the notion of the reviewer being able to trust the move of a "block".
As there are languages such as python, which depend on proper relative
indentation for the control flow of the program, ignoring any white space
change in a block would not uphold the promises of 2e2d5ac that allows
reviewers to pay less attention to the inside of a block, as inside
the reviewer wants to assume the same program flow.
This new mode of white space ignorance will take this into account and will
only allow the same white space changes per line in each block. This patch
even allows only for the same change at the beginning of the lines.
As this is a white space mode, it is made exclusive to other white space
modes in the move detection.
This patch brings some challenges, related to the detection of blocks.
We need a wide net to catch the possible moved lines, but then need to
narrow down to check if the blocks are still intact. Consider this
example (ignoring block sizes):
- A
- B
- C
+ A
+ B
+ C
At the beginning of a block when checking if there is a counterpart
for A, we have to ignore all space changes. However at the following
lines we have to check if the indent change stayed the same.
Checking if the indentation change did stay the same, is done by computing
the indentation change by the difference in line length, and then assume
the change is only in the beginning of the longer line, the common tail
is the same. That is why the test contains lines like:
- <TAB> A
...
+ A <TAB>
...
As the first line starting a block is caught using a compare function that
ignores white spaces unlike the rest of the block, where the white space
delta is taken into account for the comparison, we also have to think about
the following situation:
- A
- B
- A
- B
+ A
+ B
+ A
+ B
When checking if the first A (both in the + and - lines) is a start of
a block, we have to check all 'A' and record all the white space deltas
such that we can find the example above to be just one block that is
indented.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-18 21:31:55 +02:00
|
|
|
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>Indented<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>Text across<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>some lines<RESET>
|
|
|
|
<GREEN>+<RESET>QQQ<GREEN>But! <- this stands out<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>Adjusting with<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>Q<BOLD;CYAN>different starting<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>white spaces<RESET>
|
|
|
|
<GREEN>+<RESET><GREEN>AnotherQoutlier<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>Indented<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>Text across<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>five lines<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>that has similar lines<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET>QQ<BOLD;CYAN>to previous blocks, but with different indent<RESET>
|
|
|
|
<GREEN>+<RESET>QQ<GREEN>YetQAnotherQoutlier<RESET>
|
2018-11-23 12:16:53 +01:00
|
|
|
<GREEN>+<RESET>Q<GREEN>Line with internal whitespace change<RESET>
|
diff.c: add white space mode to move detection that allows indent changes
The option of --color-moved has proven to be useful as observed on the
mailing list. However when refactoring sometimes the indentation changes,
for example when partitioning a functions into smaller helper functions
the code usually mostly moved around except for a decrease in indentation.
To just review the moved code ignoring the change in indentation, a mode
to ignore spaces in the move detection as implemented in a previous patch
would be enough. However the whole move coloring as motivated in commit
2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought
up the notion of the reviewer being able to trust the move of a "block".
As there are languages such as python, which depend on proper relative
indentation for the control flow of the program, ignoring any white space
change in a block would not uphold the promises of 2e2d5ac that allows
reviewers to pay less attention to the inside of a block, as inside
the reviewer wants to assume the same program flow.
This new mode of white space ignorance will take this into account and will
only allow the same white space changes per line in each block. This patch
even allows only for the same change at the beginning of the lines.
As this is a white space mode, it is made exclusive to other white space
modes in the move detection.
This patch brings some challenges, related to the detection of blocks.
We need a wide net to catch the possible moved lines, but then need to
narrow down to check if the blocks are still intact. Consider this
example (ignoring block sizes):
- A
- B
- C
+ A
+ B
+ C
At the beginning of a block when checking if there is a counterpart
for A, we have to ignore all space changes. However at the following
lines we have to check if the indent change stayed the same.
Checking if the indentation change did stay the same, is done by computing
the indentation change by the difference in line length, and then assume
the change is only in the beginning of the longer line, the common tail
is the same. That is why the test contains lines like:
- <TAB> A
...
+ A <TAB>
...
As the first line starting a block is caught using a compare function that
ignores white spaces unlike the rest of the block, where the white space
delta is taken into account for the comparison, we also have to think about
the following situation:
- A
- B
- A
- B
+ A
+ B
+ A
+ B
When checking if the first A (both in the + and - lines) is a start of
a block, we have to check all 'A' and record all the white space deltas
such that we can find the example above to be just one block that is
indented.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-18 21:31:55 +02:00
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2018-11-13 22:33:57 +01:00
|
|
|
test_expect_success 'bogus settings in move detection erroring out' '
|
|
|
|
test_must_fail git diff --color-moved=bogus 2>err &&
|
|
|
|
test_i18ngrep "must be one of" err &&
|
|
|
|
test_i18ngrep bogus err &&
|
|
|
|
|
|
|
|
test_must_fail git -c diff.colormoved=bogus diff 2>err &&
|
|
|
|
test_i18ngrep "must be one of" err &&
|
|
|
|
test_i18ngrep "from command-line config" err &&
|
|
|
|
|
|
|
|
test_must_fail git diff --color-moved-ws=bogus 2>err &&
|
|
|
|
test_i18ngrep "possible values" err &&
|
|
|
|
test_i18ngrep bogus err &&
|
|
|
|
|
|
|
|
test_must_fail git -c diff.colormovedws=bogus diff 2>err &&
|
|
|
|
test_i18ngrep "possible values" err &&
|
|
|
|
test_i18ngrep "from command-line config" err
|
|
|
|
'
|
|
|
|
|
diff.c: add white space mode to move detection that allows indent changes
The option of --color-moved has proven to be useful as observed on the
mailing list. However when refactoring sometimes the indentation changes,
for example when partitioning a functions into smaller helper functions
the code usually mostly moved around except for a decrease in indentation.
To just review the moved code ignoring the change in indentation, a mode
to ignore spaces in the move detection as implemented in a previous patch
would be enough. However the whole move coloring as motivated in commit
2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought
up the notion of the reviewer being able to trust the move of a "block".
As there are languages such as python, which depend on proper relative
indentation for the control flow of the program, ignoring any white space
change in a block would not uphold the promises of 2e2d5ac that allows
reviewers to pay less attention to the inside of a block, as inside
the reviewer wants to assume the same program flow.
This new mode of white space ignorance will take this into account and will
only allow the same white space changes per line in each block. This patch
even allows only for the same change at the beginning of the lines.
As this is a white space mode, it is made exclusive to other white space
modes in the move detection.
This patch brings some challenges, related to the detection of blocks.
We need a wide net to catch the possible moved lines, but then need to
narrow down to check if the blocks are still intact. Consider this
example (ignoring block sizes):
- A
- B
- C
+ A
+ B
+ C
At the beginning of a block when checking if there is a counterpart
for A, we have to ignore all space changes. However at the following
lines we have to check if the indent change stayed the same.
Checking if the indentation change did stay the same, is done by computing
the indentation change by the difference in line length, and then assume
the change is only in the beginning of the longer line, the common tail
is the same. That is why the test contains lines like:
- <TAB> A
...
+ A <TAB>
...
As the first line starting a block is caught using a compare function that
ignores white spaces unlike the rest of the block, where the white space
delta is taken into account for the comparison, we also have to think about
the following situation:
- A
- B
- A
- B
+ A
+ B
+ A
+ B
When checking if the first A (both in the + and - lines) is a start of
a block, we have to check all 'A' and record all the white space deltas
such that we can find the example above to be just one block that is
indented.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-18 21:31:55 +02:00
|
|
|
test_expect_success 'compare whitespace delta incompatible with other space options' '
|
|
|
|
test_must_fail git diff \
|
|
|
|
--color-moved-ws=allow-indentation-change,ignore-all-space \
|
|
|
|
2>err &&
|
|
|
|
test_i18ngrep allow-indentation-change err
|
|
|
|
'
|
|
|
|
|
2018-11-23 12:16:58 +01:00
|
|
|
EMPTY=''
|
diff --color-moved-ws: modify allow-indentation-change
Currently diff --color-moved-ws=allow-indentation-change does not
support indentation that contains a mix of tabs and spaces. For
example in commit 546f70f377 ("convert.h: drop 'extern' from function
declaration", 2018-06-30) the function parameters in the following
lines are not colored as moved [1].
-extern int stream_filter(struct stream_filter *,
- const char *input, size_t *isize_p,
- char *output, size_t *osize_p);
+int stream_filter(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
This commit changes the way the indentation is handled to track the
visual size of the indentation rather than the characters in the
indentation. This has the benefit that any whitespace errors do not
interfer with the move detection (the whitespace errors will still be
highlighted according to --ws-error-highlight). During the discussion
of this feature there were concerns about the correct detection of
indentation for python. However those concerns apply whether or not
we're detecting moved lines so no attempt is made to determine if the
indentation is 'pythonic'.
[1] Note that before the commit to fix the erroneous coloring of moved
lines each line was colored as a different block, since that commit
they are uncolored.
Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-23 12:16:57 +01:00
|
|
|
test_expect_success 'compare mixed whitespace delta across moved blocks' '
|
|
|
|
|
|
|
|
git reset --hard &&
|
2021-12-09 11:30:07 +01:00
|
|
|
tr "^|Q_" "\f\v\t " <<-EOF >text.txt &&
|
|
|
|
^__
|
|
|
|
|____too short without
|
|
|
|
^
|
2018-11-23 12:16:58 +01:00
|
|
|
___being grouped across blank line
|
|
|
|
${EMPTY}
|
|
|
|
context
|
|
|
|
lines
|
|
|
|
to
|
|
|
|
anchor
|
diff --color-moved-ws: modify allow-indentation-change
Currently diff --color-moved-ws=allow-indentation-change does not
support indentation that contains a mix of tabs and spaces. For
example in commit 546f70f377 ("convert.h: drop 'extern' from function
declaration", 2018-06-30) the function parameters in the following
lines are not colored as moved [1].
-extern int stream_filter(struct stream_filter *,
- const char *input, size_t *isize_p,
- char *output, size_t *osize_p);
+int stream_filter(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
This commit changes the way the indentation is handled to track the
visual size of the indentation rather than the characters in the
indentation. This has the benefit that any whitespace errors do not
interfer with the move detection (the whitespace errors will still be
highlighted according to --ws-error-highlight). During the discussion
of this feature there were concerns about the correct detection of
indentation for python. However those concerns apply whether or not
we're detecting moved lines so no attempt is made to determine if the
indentation is 'pythonic'.
[1] Note that before the commit to fix the erroneous coloring of moved
lines each line was colored as a different block, since that commit
they are uncolored.
Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-23 12:16:57 +01:00
|
|
|
____Indented text to
|
|
|
|
_Q____be further indented by four spaces across
|
|
|
|
____Qseveral lines
|
|
|
|
QQ____These two lines have had their
|
|
|
|
____indentation reduced by four spaces
|
|
|
|
Qdifferent indentation change
|
|
|
|
____too short
|
|
|
|
EOF
|
|
|
|
|
|
|
|
git add text.txt &&
|
|
|
|
git commit -m "add text.txt" &&
|
|
|
|
|
2021-12-09 11:30:07 +01:00
|
|
|
tr "^|Q_" "\f\v\t " <<-EOF >text.txt &&
|
2018-11-23 12:16:58 +01:00
|
|
|
context
|
|
|
|
lines
|
|
|
|
to
|
|
|
|
anchor
|
diff --color-moved-ws: modify allow-indentation-change
Currently diff --color-moved-ws=allow-indentation-change does not
support indentation that contains a mix of tabs and spaces. For
example in commit 546f70f377 ("convert.h: drop 'extern' from function
declaration", 2018-06-30) the function parameters in the following
lines are not colored as moved [1].
-extern int stream_filter(struct stream_filter *,
- const char *input, size_t *isize_p,
- char *output, size_t *osize_p);
+int stream_filter(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
This commit changes the way the indentation is handled to track the
visual size of the indentation rather than the characters in the
indentation. This has the benefit that any whitespace errors do not
interfer with the move detection (the whitespace errors will still be
highlighted according to --ws-error-highlight). During the discussion
of this feature there were concerns about the correct detection of
indentation for python. However those concerns apply whether or not
we're detecting moved lines so no attempt is made to determine if the
indentation is 'pythonic'.
[1] Note that before the commit to fix the erroneous coloring of moved
lines each line was colored as a different block, since that commit
they are uncolored.
Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-23 12:16:57 +01:00
|
|
|
QIndented text to
|
|
|
|
QQbe further indented by four spaces across
|
|
|
|
Q____several lines
|
2018-11-23 12:16:58 +01:00
|
|
|
${EMPTY}
|
|
|
|
QQtoo short without
|
|
|
|
${EMPTY}
|
2021-12-09 11:30:07 +01:00
|
|
|
^Q_______being grouped across blank line
|
2018-11-23 12:16:58 +01:00
|
|
|
${EMPTY}
|
diff --color-moved-ws: modify allow-indentation-change
Currently diff --color-moved-ws=allow-indentation-change does not
support indentation that contains a mix of tabs and spaces. For
example in commit 546f70f377 ("convert.h: drop 'extern' from function
declaration", 2018-06-30) the function parameters in the following
lines are not colored as moved [1].
-extern int stream_filter(struct stream_filter *,
- const char *input, size_t *isize_p,
- char *output, size_t *osize_p);
+int stream_filter(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
This commit changes the way the indentation is handled to track the
visual size of the indentation rather than the characters in the
indentation. This has the benefit that any whitespace errors do not
interfer with the move detection (the whitespace errors will still be
highlighted according to --ws-error-highlight). During the discussion
of this feature there were concerns about the correct detection of
indentation for python. However those concerns apply whether or not
we're detecting moved lines so no attempt is made to determine if the
indentation is 'pythonic'.
[1] Note that before the commit to fix the erroneous coloring of moved
lines each line was colored as a different block, since that commit
they are uncolored.
Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-23 12:16:57 +01:00
|
|
|
Q_QThese two lines have had their
|
|
|
|
indentation reduced by four spaces
|
|
|
|
QQdifferent indentation change
|
|
|
|
__Qtoo short
|
|
|
|
EOF
|
|
|
|
|
|
|
|
git -c color.diff.whitespace="normal red" \
|
|
|
|
-c core.whitespace=space-before-tab \
|
|
|
|
diff --color --color-moved --ws-error-highlight=all \
|
|
|
|
--color-moved-ws=allow-indentation-change >actual.raw &&
|
2021-12-09 11:30:07 +01:00
|
|
|
grep -v "index" actual.raw | tr "\f\v" "^|" | test_decode_color >actual &&
|
diff --color-moved-ws: modify allow-indentation-change
Currently diff --color-moved-ws=allow-indentation-change does not
support indentation that contains a mix of tabs and spaces. For
example in commit 546f70f377 ("convert.h: drop 'extern' from function
declaration", 2018-06-30) the function parameters in the following
lines are not colored as moved [1].
-extern int stream_filter(struct stream_filter *,
- const char *input, size_t *isize_p,
- char *output, size_t *osize_p);
+int stream_filter(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
This commit changes the way the indentation is handled to track the
visual size of the indentation rather than the characters in the
indentation. This has the benefit that any whitespace errors do not
interfer with the move detection (the whitespace errors will still be
highlighted according to --ws-error-highlight). During the discussion
of this feature there were concerns about the correct detection of
indentation for python. However those concerns apply whether or not
we're detecting moved lines so no attempt is made to determine if the
indentation is 'pythonic'.
[1] Note that before the commit to fix the erroneous coloring of moved
lines each line was colored as a different block, since that commit
they are uncolored.
Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-23 12:16:57 +01:00
|
|
|
|
|
|
|
cat <<-\EOF >expected &&
|
|
|
|
<BOLD>diff --git a/text.txt b/text.txt<RESET>
|
|
|
|
<BOLD>--- a/text.txt<RESET>
|
|
|
|
<BOLD>+++ b/text.txt<RESET>
|
2018-11-23 12:16:58 +01:00
|
|
|
<CYAN>@@ -1,16 +1,16 @@<RESET>
|
2021-12-09 11:30:07 +01:00
|
|
|
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA>^<RESET><BRED> <RESET>
|
|
|
|
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA>| too short without<RESET>
|
|
|
|
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA>^<RESET>
|
2018-11-23 12:16:58 +01:00
|
|
|
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA> being grouped across blank line<RESET>
|
|
|
|
<BOLD;MAGENTA>-<RESET>
|
|
|
|
<RESET>context<RESET>
|
|
|
|
<RESET>lines<RESET>
|
|
|
|
<RESET>to<RESET>
|
|
|
|
<RESET>anchor<RESET>
|
diff --color-moved-ws: modify allow-indentation-change
Currently diff --color-moved-ws=allow-indentation-change does not
support indentation that contains a mix of tabs and spaces. For
example in commit 546f70f377 ("convert.h: drop 'extern' from function
declaration", 2018-06-30) the function parameters in the following
lines are not colored as moved [1].
-extern int stream_filter(struct stream_filter *,
- const char *input, size_t *isize_p,
- char *output, size_t *osize_p);
+int stream_filter(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
This commit changes the way the indentation is handled to track the
visual size of the indentation rather than the characters in the
indentation. This has the benefit that any whitespace errors do not
interfer with the move detection (the whitespace errors will still be
highlighted according to --ws-error-highlight). During the discussion
of this feature there were concerns about the correct detection of
indentation for python. However those concerns apply whether or not
we're detecting moved lines so no attempt is made to determine if the
indentation is 'pythonic'.
[1] Note that before the commit to fix the erroneous coloring of moved
lines each line was colored as a different block, since that commit
they are uncolored.
Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-23 12:16:57 +01:00
|
|
|
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA> Indented text to<RESET>
|
|
|
|
<BOLD;MAGENTA>-<RESET><BRED> <RESET> <BOLD;MAGENTA> be further indented by four spaces across<RESET>
|
|
|
|
<BOLD;MAGENTA>-<RESET><BRED> <RESET> <BOLD;MAGENTA>several lines<RESET>
|
|
|
|
<BOLD;BLUE>-<RESET> <BOLD;BLUE> These two lines have had their<RESET>
|
|
|
|
<BOLD;BLUE>-<RESET><BOLD;BLUE> indentation reduced by four spaces<RESET>
|
|
|
|
<BOLD;MAGENTA>-<RESET> <BOLD;MAGENTA>different indentation change<RESET>
|
|
|
|
<RED>-<RESET><RED> too short<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET> <BOLD;CYAN>Indented text to<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET> <BOLD;CYAN>be further indented by four spaces across<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET> <BOLD;CYAN> several lines<RESET>
|
2018-11-23 12:16:58 +01:00
|
|
|
<BOLD;YELLOW>+<RESET>
|
|
|
|
<BOLD;YELLOW>+<RESET> <BOLD;YELLOW>too short without<RESET>
|
|
|
|
<BOLD;YELLOW>+<RESET>
|
2021-12-09 11:30:07 +01:00
|
|
|
<BOLD;YELLOW>+<RESET><BOLD;YELLOW>^ being grouped across blank line<RESET>
|
2018-11-23 12:16:58 +01:00
|
|
|
<BOLD;YELLOW>+<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET> <BRED> <RESET> <BOLD;CYAN>These two lines have had their<RESET>
|
|
|
|
<BOLD;CYAN>+<RESET><BOLD;CYAN>indentation reduced by four spaces<RESET>
|
|
|
|
<BOLD;YELLOW>+<RESET> <BOLD;YELLOW>different indentation change<RESET>
|
diff --color-moved-ws: modify allow-indentation-change
Currently diff --color-moved-ws=allow-indentation-change does not
support indentation that contains a mix of tabs and spaces. For
example in commit 546f70f377 ("convert.h: drop 'extern' from function
declaration", 2018-06-30) the function parameters in the following
lines are not colored as moved [1].
-extern int stream_filter(struct stream_filter *,
- const char *input, size_t *isize_p,
- char *output, size_t *osize_p);
+int stream_filter(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
This commit changes the way the indentation is handled to track the
visual size of the indentation rather than the characters in the
indentation. This has the benefit that any whitespace errors do not
interfer with the move detection (the whitespace errors will still be
highlighted according to --ws-error-highlight). During the discussion
of this feature there were concerns about the correct detection of
indentation for python. However those concerns apply whether or not
we're detecting moved lines so no attempt is made to determine if the
indentation is 'pythonic'.
[1] Note that before the commit to fix the erroneous coloring of moved
lines each line was colored as a different block, since that commit
they are uncolored.
Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-23 12:16:57 +01:00
|
|
|
<GREEN>+<RESET><BRED> <RESET> <GREEN>too short<RESET>
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2019-07-23 21:27:05 +02:00
|
|
|
test_expect_success 'combine --ignore-blank-lines with --function-context' '
|
|
|
|
test_write_lines 1 "" 2 3 4 5 >a &&
|
|
|
|
test_write_lines 1 2 3 4 >b &&
|
|
|
|
test_must_fail git diff --no-index \
|
|
|
|
--ignore-blank-lines --function-context a b >actual.raw &&
|
|
|
|
sed -n "/@@/,\$p" <actual.raw >actual &&
|
|
|
|
cat <<-\EOF >expect &&
|
|
|
|
@@ -1,6 +1,4 @@
|
|
|
|
1
|
2019-12-05 17:15:31 +01:00
|
|
|
-
|
2019-07-23 21:27:05 +02:00
|
|
|
2
|
|
|
|
3
|
|
|
|
4
|
|
|
|
-5
|
|
|
|
EOF
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2019-12-19 18:35:43 +01:00
|
|
|
test_expect_success 'combine --ignore-blank-lines with --function-context 2' '
|
|
|
|
test_write_lines a b c "" function 1 2 3 4 5 "" 6 7 8 9 >a &&
|
|
|
|
test_write_lines "" a b c "" function 1 2 3 4 5 6 7 8 >b &&
|
|
|
|
test_must_fail git diff --no-index \
|
|
|
|
--ignore-blank-lines --function-context a b >actual.raw &&
|
|
|
|
sed -n "/@@/,\$p" <actual.raw >actual &&
|
|
|
|
cat <<-\EOF >expect &&
|
|
|
|
@@ -5,11 +6,9 @@ c
|
|
|
|
function
|
|
|
|
1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
-
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8
|
|
|
|
-9
|
|
|
|
EOF
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2006-10-12 14:22:14 +02:00
|
|
|
test_done
|