2005-05-19 12:32:35 +02:00
|
|
|
#!/bin/sh
|
|
|
|
#
|
|
|
|
# Copyright (c) 2005 Junio C Hamano
|
|
|
|
#
|
|
|
|
|
|
|
|
test_description='Test rename detection in diff engine.
|
|
|
|
|
|
|
|
'
|
|
|
|
. ./test-lib.sh
|
2021-02-12 14:29:40 +01:00
|
|
|
. "$TEST_DIRECTORY"/lib-diff.sh
|
2005-05-19 12:32:35 +02:00
|
|
|
|
2016-02-25 09:59:18 +01:00
|
|
|
test_expect_success 'setup' '
|
|
|
|
cat >path0 <<-\EOF &&
|
|
|
|
Line 1
|
|
|
|
Line 2
|
|
|
|
Line 3
|
|
|
|
Line 4
|
|
|
|
Line 5
|
|
|
|
Line 6
|
|
|
|
Line 7
|
|
|
|
Line 8
|
|
|
|
Line 9
|
|
|
|
Line 10
|
|
|
|
line 11
|
|
|
|
Line 12
|
|
|
|
Line 13
|
|
|
|
Line 14
|
|
|
|
Line 15
|
|
|
|
EOF
|
2016-02-25 09:59:19 +01:00
|
|
|
cat >expected <<-\EOF &&
|
2016-02-25 09:59:18 +01:00
|
|
|
diff --git a/path0 b/path1
|
|
|
|
rename from path0
|
|
|
|
rename to path1
|
|
|
|
--- a/path0
|
|
|
|
+++ b/path1
|
|
|
|
@@ -8,7 +8,7 @@ Line 7
|
|
|
|
Line 8
|
|
|
|
Line 9
|
|
|
|
Line 10
|
|
|
|
-line 11
|
|
|
|
+Line 11
|
|
|
|
Line 12
|
|
|
|
Line 13
|
|
|
|
Line 14
|
|
|
|
EOF
|
2016-02-25 09:59:19 +01:00
|
|
|
cat >no-rename <<-\EOF
|
|
|
|
diff --git a/path0 b/path0
|
|
|
|
deleted file mode 100644
|
|
|
|
index fdbec44..0000000
|
|
|
|
--- a/path0
|
|
|
|
+++ /dev/null
|
|
|
|
@@ -1,15 +0,0 @@
|
|
|
|
-Line 1
|
|
|
|
-Line 2
|
|
|
|
-Line 3
|
|
|
|
-Line 4
|
|
|
|
-Line 5
|
|
|
|
-Line 6
|
|
|
|
-Line 7
|
|
|
|
-Line 8
|
|
|
|
-Line 9
|
|
|
|
-Line 10
|
|
|
|
-line 11
|
|
|
|
-Line 12
|
|
|
|
-Line 13
|
|
|
|
-Line 14
|
|
|
|
-Line 15
|
|
|
|
diff --git a/path1 b/path1
|
|
|
|
new file mode 100644
|
|
|
|
index 0000000..752c50e
|
|
|
|
--- /dev/null
|
|
|
|
+++ b/path1
|
|
|
|
@@ -0,0 +1,15 @@
|
|
|
|
+Line 1
|
|
|
|
+Line 2
|
|
|
|
+Line 3
|
|
|
|
+Line 4
|
|
|
|
+Line 5
|
|
|
|
+Line 6
|
|
|
|
+Line 7
|
|
|
|
+Line 8
|
|
|
|
+Line 9
|
|
|
|
+Line 10
|
|
|
|
+Line 11
|
|
|
|
+Line 12
|
|
|
|
+Line 13
|
|
|
|
+Line 14
|
|
|
|
+Line 15
|
|
|
|
EOF
|
2005-05-19 12:32:35 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success \
|
2007-11-30 12:35:23 +01:00
|
|
|
'update-index --add a file.' \
|
2007-07-03 07:52:14 +02:00
|
|
|
'git update-index --add path0'
|
2005-05-19 12:32:35 +02:00
|
|
|
|
|
|
|
test_expect_success \
|
|
|
|
'write that tree.' \
|
2007-07-03 07:52:14 +02:00
|
|
|
'tree=$(git write-tree) && echo $tree'
|
2005-05-19 12:32:35 +02:00
|
|
|
|
|
|
|
sed -e 's/line/Line/' <path0 >path1
|
|
|
|
rm -f path0
|
|
|
|
test_expect_success \
|
|
|
|
'renamed and edited the file.' \
|
2007-07-03 07:52:14 +02:00
|
|
|
'git update-index --add --remove path0 path1'
|
2005-05-19 12:32:35 +02:00
|
|
|
|
|
|
|
test_expect_success \
|
2007-07-03 07:52:14 +02:00
|
|
|
'git diff-index -p -M after rename and editing.' \
|
|
|
|
'git diff-index -p -M $tree >current'
|
2016-02-25 09:59:18 +01:00
|
|
|
|
2005-05-20 04:00:36 +02:00
|
|
|
|
|
|
|
test_expect_success \
|
|
|
|
'validate the output.' \
|
2005-10-07 12:42:00 +02:00
|
|
|
'compare_diff_patch current expected'
|
2005-05-20 04:00:36 +02:00
|
|
|
|
2016-02-25 09:59:19 +01:00
|
|
|
test_expect_success 'test diff.renames=true' '
|
|
|
|
git -c diff.renames=true diff --cached $tree >current &&
|
|
|
|
compare_diff_patch current expected
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'test diff.renames=false' '
|
|
|
|
git -c diff.renames=false diff --cached $tree >current &&
|
|
|
|
compare_diff_patch current no-rename
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'test diff.renames unset' '
|
|
|
|
git diff --cached $tree >current &&
|
2016-02-25 09:59:21 +01:00
|
|
|
compare_diff_patch current expected
|
2016-02-25 09:59:19 +01:00
|
|
|
'
|
|
|
|
|
2011-04-13 01:27:11 +02:00
|
|
|
test_expect_success 'favour same basenames over different ones' '
|
2007-06-21 13:52:11 +02:00
|
|
|
cp path1 another-path &&
|
|
|
|
git add another-path &&
|
|
|
|
git commit -m 1 &&
|
|
|
|
git rm path1 &&
|
|
|
|
mkdir subdir &&
|
|
|
|
git mv another-path subdir/path1 &&
|
2018-02-08 16:56:51 +01:00
|
|
|
git status >out &&
|
|
|
|
test_i18ngrep "renamed: .*path1 -> subdir/path1" out
|
|
|
|
'
|
2007-06-21 13:52:11 +02:00
|
|
|
|
2018-05-04 13:12:15 +02:00
|
|
|
test_expect_success 'test diff.renames=true for git status' '
|
|
|
|
git -c diff.renames=true status >out &&
|
|
|
|
test_i18ngrep "renamed: .*path1 -> subdir/path1" out
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'test diff.renames=false for git status' '
|
|
|
|
git -c diff.renames=false status >out &&
|
|
|
|
test_i18ngrep ! "renamed: .*path1 -> subdir/path1" out &&
|
|
|
|
test_i18ngrep "new file: .*subdir/path1" out &&
|
|
|
|
test_i18ngrep "deleted: .*[^/]path1" out
|
|
|
|
'
|
|
|
|
|
2011-04-13 01:27:11 +02:00
|
|
|
test_expect_success 'favour same basenames even with minor differences' '
|
2007-06-21 13:52:11 +02:00
|
|
|
git show HEAD:path1 | sed "s/15/16/" > subdir/path1 &&
|
2018-02-08 16:56:51 +01:00
|
|
|
git status >out &&
|
|
|
|
test_i18ngrep "renamed: .*path1 -> subdir/path1" out
|
|
|
|
'
|
2007-06-21 13:52:11 +02:00
|
|
|
|
diffcore: fix iteration order of identical files during rename detection
If the two paths 'dir/A/file' and 'dir/B/file' have identical content
and the parent directory is renamed, e.g. 'git mv dir other-dir', then
diffcore reports the following exact renames:
renamed: dir/B/file -> other-dir/A/file
renamed: dir/A/file -> other-dir/B/file
While technically not wrong, this is confusing not only for the user,
but also for git commands that make decisions based on rename
information, e.g. 'git log --follow other-dir/A/file' follows
'dir/B/file' past the rename.
This behavior is a side effect of commit v2.0.0-rc4~8^2~14
(diffcore-rename.c: simplify finding exact renames, 2013-11-14): the
hashmap storing sources returns entries from the same bucket, i.e.
sources matching the current destination, in LIFO order. Thus the
iteration first examines 'other-dir/A/file' and 'dir/B/file' and, upon
finding identical content and basename, reports an exact rename.
Other hashmap users are apparently happy with the current iteration
order over the entries of a bucket. Changing the iteration order
would risk upsetting other hashmap users and would increase the memory
footprint of each bucket by a pointer to the tail element.
Fill the hashmap with source entries in reverse order to restore the
original exact rename detection behavior.
Reported-by: Bill Okara <billokara@gmail.com>
Signed-off-by: SZEDER Gábor <szeder@ira.uka.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-03-30 10:35:07 +02:00
|
|
|
test_expect_success 'two files with same basename and same content' '
|
|
|
|
git reset --hard &&
|
|
|
|
mkdir -p dir/A dir/B &&
|
|
|
|
cp path1 dir/A/file &&
|
|
|
|
cp path1 dir/B/file &&
|
|
|
|
git add dir &&
|
|
|
|
git commit -m 2 &&
|
|
|
|
git mv dir other-dir &&
|
2018-02-08 16:56:51 +01:00
|
|
|
git status >out &&
|
|
|
|
test_i18ngrep "renamed: .*dir/A/file -> other-dir/A/file" out
|
diffcore: fix iteration order of identical files during rename detection
If the two paths 'dir/A/file' and 'dir/B/file' have identical content
and the parent directory is renamed, e.g. 'git mv dir other-dir', then
diffcore reports the following exact renames:
renamed: dir/B/file -> other-dir/A/file
renamed: dir/A/file -> other-dir/B/file
While technically not wrong, this is confusing not only for the user,
but also for git commands that make decisions based on rename
information, e.g. 'git log --follow other-dir/A/file' follows
'dir/B/file' past the rename.
This behavior is a side effect of commit v2.0.0-rc4~8^2~14
(diffcore-rename.c: simplify finding exact renames, 2013-11-14): the
hashmap storing sources returns entries from the same bucket, i.e.
sources matching the current destination, in LIFO order. Thus the
iteration first examines 'other-dir/A/file' and 'dir/B/file' and, upon
finding identical content and basename, reports an exact rename.
Other hashmap users are apparently happy with the current iteration
order over the entries of a bucket. Changing the iteration order
would risk upsetting other hashmap users and would increase the memory
footprint of each bucket by a pointer to the tail element.
Fill the hashmap with source entries in reverse order to restore the
original exact rename detection behavior.
Reported-by: Bill Okara <billokara@gmail.com>
Signed-off-by: SZEDER Gábor <szeder@ira.uka.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-03-30 10:35:07 +02:00
|
|
|
'
|
|
|
|
|
2011-01-06 22:50:06 +01:00
|
|
|
test_expect_success 'setup for many rename source candidates' '
|
|
|
|
git reset --hard &&
|
|
|
|
for i in 0 1 2 3 4 5 6 7 8 9;
|
|
|
|
do
|
|
|
|
for j in 0 1 2 3 4 5 6 7 8 9;
|
|
|
|
do
|
|
|
|
echo "$i$j" >"path$i$j"
|
|
|
|
done
|
|
|
|
done &&
|
|
|
|
git add "path??" &&
|
|
|
|
test_tick &&
|
|
|
|
git commit -m "hundred" &&
|
2018-07-02 02:24:00 +02:00
|
|
|
(cat path1 && echo new) >new-path &&
|
2011-01-06 22:50:06 +01:00
|
|
|
echo old >>path1 &&
|
|
|
|
git add new-path path1 &&
|
|
|
|
git diff -l 4 -C -C --cached --name-status >actual 2>actual.err &&
|
|
|
|
sed -e "s/^\([CM]\)[0-9]* /\1 /" actual >actual.munged &&
|
|
|
|
cat >expect <<-EOF &&
|
|
|
|
C path1 new-path
|
|
|
|
M path1
|
|
|
|
EOF
|
|
|
|
test_cmp expect actual.munged &&
|
|
|
|
grep warning actual.err
|
|
|
|
'
|
|
|
|
|
2013-03-06 22:36:12 +01:00
|
|
|
test_expect_success 'rename pretty print with nothing in common' '
|
|
|
|
mkdir -p a/b/ &&
|
|
|
|
: >a/b/c &&
|
|
|
|
git add a/b/c &&
|
|
|
|
git commit -m "create a/b/c" &&
|
|
|
|
mkdir -p c/b/ &&
|
|
|
|
git mv a/b/c c/b/a &&
|
|
|
|
git commit -m "a/b/c -> c/b/a" &&
|
|
|
|
git diff -M --summary HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " a/b/c => c/b/a " output &&
|
|
|
|
git diff -M --stat HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " a/b/c => c/b/a " output
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'rename pretty print with common prefix' '
|
|
|
|
mkdir -p c/d &&
|
|
|
|
git mv c/b/a c/d/e &&
|
|
|
|
git commit -m "c/b/a -> c/d/e" &&
|
|
|
|
git diff -M --summary HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " c/{b/a => d/e} " output &&
|
|
|
|
git diff -M --stat HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " c/{b/a => d/e} " output
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'rename pretty print with common suffix' '
|
|
|
|
mkdir d &&
|
|
|
|
git mv c/d/e d/e &&
|
|
|
|
git commit -m "c/d/e -> d/e" &&
|
|
|
|
git diff -M --summary HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " {c/d => d}/e " output &&
|
|
|
|
git diff -M --stat HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " {c/d => d}/e " output
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'rename pretty print with common prefix and suffix' '
|
|
|
|
mkdir d/f &&
|
|
|
|
git mv d/e d/f/e &&
|
|
|
|
git commit -m "d/e -> d/f/e" &&
|
|
|
|
git diff -M --summary HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " d/{ => f}/e " output &&
|
|
|
|
git diff -M --stat HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " d/{ => f}/e " output
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'rename pretty print common prefix and suffix overlap' '
|
|
|
|
mkdir d/f/f &&
|
|
|
|
git mv d/f/e d/f/f/e &&
|
|
|
|
git commit -m "d/f/e d/f/f/e" &&
|
|
|
|
git diff -M --summary HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " d/f/{ => f}/e " output &&
|
|
|
|
git diff -M --stat HEAD^ HEAD >output &&
|
|
|
|
test_i18ngrep " d/f/{ => f}/e " output
|
|
|
|
'
|
|
|
|
|
2017-11-29 21:11:54 +01:00
|
|
|
test_expect_success 'diff-tree -l0 defaults to a big rename limit, not zero' '
|
|
|
|
test_write_lines line1 line2 line3 >myfile &&
|
|
|
|
git add myfile &&
|
|
|
|
git commit -m x &&
|
|
|
|
|
|
|
|
test_write_lines line1 line2 line4 >myotherfile &&
|
|
|
|
git rm myfile &&
|
|
|
|
git add myotherfile &&
|
|
|
|
git commit -m x &&
|
|
|
|
|
|
|
|
git diff-tree -M -l0 HEAD HEAD^ >actual &&
|
|
|
|
# Verify that a rename from myotherfile to myfile was detected
|
|
|
|
grep "myotherfile.*myfile" actual
|
|
|
|
'
|
|
|
|
|
2021-02-14 08:51:46 +01:00
|
|
|
test_expect_success 'basename similarity vs best similarity' '
|
|
|
|
mkdir subdir &&
|
|
|
|
test_write_lines line1 line2 line3 line4 line5 \
|
|
|
|
line6 line7 line8 line9 line10 >subdir/file.txt &&
|
|
|
|
git add subdir/file.txt &&
|
|
|
|
git commit -m "base txt" &&
|
|
|
|
|
|
|
|
git rm subdir/file.txt &&
|
|
|
|
test_write_lines line1 line2 line3 line4 line5 \
|
|
|
|
line6 line7 line8 >file.txt &&
|
|
|
|
test_write_lines line1 line2 line3 line4 line5 \
|
|
|
|
line6 line7 line8 line9 >file.md &&
|
|
|
|
git add file.txt file.md &&
|
|
|
|
git commit -a -m "rename" &&
|
|
|
|
git diff-tree -r -M --name-status HEAD^ HEAD >actual &&
|
diffcore-rename: guide inexact rename detection based on basenames
Make use of the new find_basename_matches() function added in the last
two patches, to find renames more rapidly in cases where we can match up
files based on basenames. As a quick reminder (see the last two commit
messages for more details), this means for example that
docs/extensions.txt and docs/config/extensions.txt are considered likely
renames if there are no remaining 'extensions.txt' files elsewhere among
the added and deleted files, and if a similarity check confirms they are
similar, then they are marked as a rename without looking for a better
similarity match among other files. This is a behavioral change, as
covered in more detail in the previous commit message.
We do not use this heuristic together with either break or copy
detection. The point of break detection is to say that filename
similarity does not imply file content similarity, and we only want to
know about file content similarity. The point of copy detection is to
use more resources to check for additional similarities, while this is
an optimization that uses far less resources but which might also result
in finding slightly fewer similarities. So the idea behind this
optimization goes against both of those features, and will be turned off
for both.
For the testcases mentioned in commit 557ac0350d ("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:
Before After
no-renames: 13.815 s ± 0.062 s 13.294 s ± 0.103 s
mega-renames: 1799.937 s ± 0.493 s 187.248 s ± 0.882 s
just-one-mega: 51.289 s ± 0.019 s 5.557 s ± 0.017 s
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-14 08:51:49 +01:00
|
|
|
# subdir/file.txt is 88% similar to file.md, 78% similar to file.txt,
|
|
|
|
# but since same basenames are checked first...
|
2021-02-14 08:51:46 +01:00
|
|
|
cat >expected <<-\EOF &&
|
diffcore-rename: guide inexact rename detection based on basenames
Make use of the new find_basename_matches() function added in the last
two patches, to find renames more rapidly in cases where we can match up
files based on basenames. As a quick reminder (see the last two commit
messages for more details), this means for example that
docs/extensions.txt and docs/config/extensions.txt are considered likely
renames if there are no remaining 'extensions.txt' files elsewhere among
the added and deleted files, and if a similarity check confirms they are
similar, then they are marked as a rename without looking for a better
similarity match among other files. This is a behavioral change, as
covered in more detail in the previous commit message.
We do not use this heuristic together with either break or copy
detection. The point of break detection is to say that filename
similarity does not imply file content similarity, and we only want to
know about file content similarity. The point of copy detection is to
use more resources to check for additional similarities, while this is
an optimization that uses far less resources but which might also result
in finding slightly fewer similarities. So the idea behind this
optimization goes against both of those features, and will be turned off
for both.
For the testcases mentioned in commit 557ac0350d ("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:
Before After
no-renames: 13.815 s ± 0.062 s 13.294 s ± 0.103 s
mega-renames: 1799.937 s ± 0.493 s 187.248 s ± 0.882 s
just-one-mega: 51.289 s ± 0.019 s 5.557 s ± 0.017 s
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-14 08:51:49 +01:00
|
|
|
A file.md
|
|
|
|
R078 subdir/file.txt file.txt
|
2021-02-14 08:51:46 +01:00
|
|
|
EOF
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2005-05-20 04:00:36 +02:00
|
|
|
test_done
|