2006-05-06 09:15:54 +02:00
|
|
|
#!/bin/sh
|
|
|
|
#
|
|
|
|
# Copyright (c) 2006 Junio C Hamano
|
|
|
|
#
|
|
|
|
|
|
|
|
test_description='Binary diff and apply
|
|
|
|
'
|
|
|
|
|
2021-10-31 00:24:19 +02:00
|
|
|
TEST_PASSES_SANITIZE_LEAK=true
|
2006-05-06 09:15:54 +02:00
|
|
|
. ./test-lib.sh
|
|
|
|
|
2012-03-13 06:02:19 +01:00
|
|
|
cat >expect.binary-numstat <<\EOF
|
|
|
|
1 1 a
|
|
|
|
- - b
|
|
|
|
1 1 c
|
|
|
|
- - d
|
|
|
|
EOF
|
|
|
|
|
2012-07-12 00:12:03 +02:00
|
|
|
test_expect_success 'prepare repository' '
|
2012-07-16 22:45:40 +02:00
|
|
|
echo AIT >a && echo BIT >b && echo CIT >c && echo DIT >d &&
|
|
|
|
git update-index --add a b c d &&
|
|
|
|
echo git >a &&
|
|
|
|
cat "$TEST_DIRECTORY"/test-binary-1.png >b &&
|
|
|
|
echo git >c &&
|
|
|
|
cat b b >d
|
2012-07-12 00:12:03 +02:00
|
|
|
'
|
2006-05-06 09:15:54 +02:00
|
|
|
|
2006-05-26 04:06:16 +02:00
|
|
|
cat > expected <<\EOF
|
2006-05-06 09:15:54 +02:00
|
|
|
a | 2 +-
|
|
|
|
b | Bin
|
|
|
|
c | 2 +-
|
|
|
|
d | Bin
|
|
|
|
4 files changed, 2 insertions(+), 2 deletions(-)
|
2006-05-26 04:06:16 +02:00
|
|
|
EOF
|
2012-07-12 00:12:21 +02:00
|
|
|
test_expect_success 'apply --stat output for binary file change' '
|
2012-03-13 06:02:19 +01:00
|
|
|
git diff >diff &&
|
|
|
|
git apply --stat --summary <diff >current &&
|
2021-02-11 02:53:53 +01:00
|
|
|
test_cmp expected current
|
2012-03-13 06:02:19 +01:00
|
|
|
'
|
2006-05-06 09:15:54 +02:00
|
|
|
|
2012-06-15 23:50:30 +02:00
|
|
|
test_expect_success 'diff --shortstat output for binary file change' '
|
2012-07-16 22:45:10 +02:00
|
|
|
tail -n 1 expected >expect &&
|
2012-06-15 23:50:30 +02:00
|
|
|
git diff --shortstat >current &&
|
2021-02-11 02:53:53 +01:00
|
|
|
test_cmp expect current
|
2012-06-15 23:50:30 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'diff --shortstat output for binary file change only' '
|
|
|
|
echo " 1 file changed, 0 insertions(+), 0 deletions(-)" >expected &&
|
|
|
|
git diff --shortstat -- b >current &&
|
2021-02-11 02:53:53 +01:00
|
|
|
test_cmp expected current
|
2012-06-15 23:50:30 +02:00
|
|
|
'
|
|
|
|
|
2012-03-13 06:02:19 +01:00
|
|
|
test_expect_success 'apply --numstat notices binary file change' '
|
|
|
|
git diff >diff &&
|
|
|
|
git apply --numstat <diff >current &&
|
|
|
|
test_cmp expect.binary-numstat current
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'apply --numstat understands diff --binary format' '
|
|
|
|
git diff --binary >diff &&
|
|
|
|
git apply --numstat <diff >current &&
|
|
|
|
test_cmp expect.binary-numstat current
|
|
|
|
'
|
2006-05-06 09:15:54 +02:00
|
|
|
|
|
|
|
# apply needs to be able to skip the binary material correctly
|
|
|
|
# in order to report the line number of a corrupt patch.
|
2021-02-11 02:53:51 +01:00
|
|
|
test_expect_success 'apply detecting corrupt patch correctly' '
|
2012-07-16 22:45:40 +02:00
|
|
|
git diff >output &&
|
|
|
|
sed -e "s/-CIT/xCIT/" <output >broken &&
|
2012-07-16 22:47:22 +02:00
|
|
|
test_must_fail git apply --stat --summary broken 2>detected &&
|
2014-04-30 18:22:58 +02:00
|
|
|
detected=$(cat detected) &&
|
2016-08-08 23:03:04 +02:00
|
|
|
detected=$(expr "$detected" : "error.*at line \\([0-9]*\\)\$") &&
|
2014-04-30 18:22:58 +02:00
|
|
|
detected=$(sed -ne "${detected}p" broken) &&
|
2012-07-16 22:45:40 +02:00
|
|
|
test "$detected" = xCIT
|
2012-07-12 00:12:03 +02:00
|
|
|
'
|
2006-05-06 09:15:54 +02:00
|
|
|
|
2021-02-11 02:53:51 +01:00
|
|
|
test_expect_success 'apply detecting corrupt patch correctly' '
|
2012-07-16 22:45:40 +02:00
|
|
|
git diff --binary | sed -e "s/-CIT/xCIT/" >broken &&
|
2012-07-16 22:47:22 +02:00
|
|
|
test_must_fail git apply --stat --summary broken 2>detected &&
|
2014-04-30 18:22:58 +02:00
|
|
|
detected=$(cat detected) &&
|
2016-08-08 23:03:04 +02:00
|
|
|
detected=$(expr "$detected" : "error.*at line \\([0-9]*\\)\$") &&
|
2014-04-30 18:22:58 +02:00
|
|
|
detected=$(sed -ne "${detected}p" broken) &&
|
2012-07-16 22:45:40 +02:00
|
|
|
test "$detected" = xCIT
|
2012-07-12 00:12:03 +02:00
|
|
|
'
|
2006-05-06 09:15:54 +02:00
|
|
|
|
2008-09-03 10:59:29 +02:00
|
|
|
test_expect_success 'initial commit' 'git commit -a -m initial'
|
2006-05-06 09:15:54 +02:00
|
|
|
|
|
|
|
# Try removal (b), modification (d), and creation (e).
|
2012-07-12 00:12:03 +02:00
|
|
|
test_expect_success 'diff-index with --binary' '
|
2012-07-16 22:45:40 +02:00
|
|
|
echo AIT >a && mv b e && echo CIT >c && cat e >d &&
|
|
|
|
git update-index --add --remove a b c d e &&
|
2014-04-30 18:22:58 +02:00
|
|
|
tree0=$(git write-tree) &&
|
2012-07-16 22:45:40 +02:00
|
|
|
git diff --cached --binary >current &&
|
|
|
|
git apply --stat --summary current
|
2012-07-12 00:12:03 +02:00
|
|
|
'
|
2006-05-06 09:15:54 +02:00
|
|
|
|
2012-07-12 00:12:03 +02:00
|
|
|
test_expect_success 'apply binary patch' '
|
2012-07-16 22:45:40 +02:00
|
|
|
git reset --hard &&
|
|
|
|
git apply --binary --index <current &&
|
2014-04-30 18:22:58 +02:00
|
|
|
tree1=$(git write-tree) &&
|
2012-07-16 22:45:40 +02:00
|
|
|
test "$tree1" = "$tree0"
|
2012-07-12 00:12:03 +02:00
|
|
|
'
|
2006-05-06 09:15:54 +02:00
|
|
|
|
fix bogus "diff --git" header from "diff --no-index"
When "git diff --no-index" is given an absolute pathname, it
would generate a diff header with the absolute path
prepended by the prefix, like:
diff --git a/dev/null b/foo
Not only is this nonsensical, and not only does it violate
the description of diffs given in git-diff(1), but it would
produce broken binary diffs. Unlike text diffs, the binary
diffs don't contain the filenames anywhere else, and so "git
apply" relies on this header to figure out the filename.
This patch just refuses to use an invalid name for anything
visible in the diff.
Now, this fixes the "git diff --no-index --binary a
/dev/null" kind of case (and we'll end up using "a" as the
basename), but some other insane cases are impossible to
handle. If you do
git diff --no-index --binary a /bin/echo
you'll still get a patch like
diff --git a/a b/bin/echo
old mode 100644
new mode 100755
index ...
and "git apply" will refuse to apply it for a couple of
reasons, and the diff is simply bogus.
And that, btw, is no longer a bug, I think. It's impossible
to know whethe the user meant for the patch to be a rename
or not. And as such, refusing to apply it because you don't
know what name you should use is probably _exactly_ the
right thing to do!
Original problem reported by Imre Deak. Test script and problem
description by Jeff King.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-05 21:35:15 +02:00
|
|
|
test_expect_success 'diff --no-index with binary creation' '
|
|
|
|
echo Q | q_to_nul >binary &&
|
2018-07-02 02:23:41 +02:00
|
|
|
# hide error code from diff, which just indicates differences
|
|
|
|
test_might_fail git diff --binary --no-index /dev/null binary >current &&
|
fix bogus "diff --git" header from "diff --no-index"
When "git diff --no-index" is given an absolute pathname, it
would generate a diff header with the absolute path
prepended by the prefix, like:
diff --git a/dev/null b/foo
Not only is this nonsensical, and not only does it violate
the description of diffs given in git-diff(1), but it would
produce broken binary diffs. Unlike text diffs, the binary
diffs don't contain the filenames anywhere else, and so "git
apply" relies on this header to figure out the filename.
This patch just refuses to use an invalid name for anything
visible in the diff.
Now, this fixes the "git diff --no-index --binary a
/dev/null" kind of case (and we'll end up using "a" as the
basename), but some other insane cases are impossible to
handle. If you do
git diff --no-index --binary a /bin/echo
you'll still get a patch like
diff --git a/a b/bin/echo
old mode 100644
new mode 100755
index ...
and "git apply" will refuse to apply it for a couple of
reasons, and the diff is simply bogus.
And that, btw, is no longer a bug, I think. It's impossible
to know whethe the user meant for the patch to be a rename
or not. And as such, refusing to apply it because you don't
know what name you should use is probably _exactly_ the
right thing to do!
Original problem reported by Imre Deak. Test script and problem
description by Jeff King.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-05 21:35:15 +02:00
|
|
|
rm binary &&
|
|
|
|
git apply --binary <current &&
|
|
|
|
echo Q >expected &&
|
|
|
|
nul_to_q <binary >actual &&
|
|
|
|
test_cmp expected actual
|
|
|
|
'
|
|
|
|
|
2012-04-30 22:38:58 +02:00
|
|
|
cat >expect <<EOF
|
diff.c: use utf8_strwidth() to count display width
When unicode filenames (encoded in UTF-8) are used, the visible width
on the screen is not the same as strlen().
For example, `git log --stat` may produce an output like this:
[snip the header]
Arger.txt | 1 +
Ärger.txt | 1 +
2 files changed, 2 insertions(+)
A side note: the original report was about cyrillic filenames.
After some investigations it turned out that
a) This is not a problem with "ambiguous characters" in unicode
b) The same problem exists for all unicode code points (so we
can use Latin based Umlauts for demonstrations below)
The 'Ä' takes the same space on the screen as the 'A'.
But needs one more byte in memory, so the the `git log --stat` output
for "Arger.txt" (!) gets mis-aligned:
The maximum length is derived from "Ärger.txt", 10 bytes in memory,
9 positions on the screen. That is why "Arger.txt" gets one extra ' '
for aligment, it needs 9 bytes in memory.
If there was a file "Ö", it would be correctly aligned by chance,
but "Öhö" would not.
The solution is of course, to use utf8_strwidth() instead of strlen()
when dealing with the width on screen.
And then there is another problem, code like this:
strbuf_addf(&out, "%-*s", len, name);
(or using the underlying snprintf() function) does not align the
buffer to a minimum of len measured in screen-width, but uses the
memory count.
One could be tempted to wish that snprintf() was UTF-8 aware.
That doesn't seem to be the case anywhere (tested on Linux and Mac),
probably snprintf() uses the "bytes in memory"/strlen() approach to be
compatible with older versions and this will never change.
The basic idea is to change code in diff.c like this
strbuf_addf(&out, "%-*s", len, name);
into something like this:
int padding = len - utf8_strwidth(name);
if (padding < 0)
padding = 0;
strbuf_addf(&out, " %s%*s", name, padding, "");
The real change is slighty bigger, as it, as well, integrates two calls
of strbuf_addf() into one.
Tests:
Two things need to be tested:
- The calculation of the maximum width
- The calculation of padding
The name "textfile" is changed into "tëxtfilë", both have a width of 8.
If strlen() was used, to get the maximum width, the shorter "binfile" would
have been mis-aligned:
binfile | [snip]
tëxtfilë | [snip]
If only "binfile" would be renamed into "binfilë":
binfilë | [snip]
textfile | [snip]
In order to verify that the width is calculated correctly everywhere,
"binfile" is renamed into "binfilë", giving 1 bytes more in strlen()
"tëxtfile" is renamed into "tëxtfilë", 2 byte more in strlen().
The updated t4012-diff-binary.sh checks the correct aligment:
binfilë | [snip]
tëxtfilë | [snip]
Reported-by: Alexander Meshcheryakov <alexander.s.m@gmail.com>
Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-14 17:13:33 +02:00
|
|
|
binfilë | Bin 0 -> 1026 bytes
|
|
|
|
tëxtfilë | 10000 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
2012-04-30 22:38:58 +02:00
|
|
|
EOF
|
|
|
|
|
|
|
|
test_expect_success 'diff --stat with binary files and big change count' '
|
diff.c: use utf8_strwidth() to count display width
When unicode filenames (encoded in UTF-8) are used, the visible width
on the screen is not the same as strlen().
For example, `git log --stat` may produce an output like this:
[snip the header]
Arger.txt | 1 +
Ärger.txt | 1 +
2 files changed, 2 insertions(+)
A side note: the original report was about cyrillic filenames.
After some investigations it turned out that
a) This is not a problem with "ambiguous characters" in unicode
b) The same problem exists for all unicode code points (so we
can use Latin based Umlauts for demonstrations below)
The 'Ä' takes the same space on the screen as the 'A'.
But needs one more byte in memory, so the the `git log --stat` output
for "Arger.txt" (!) gets mis-aligned:
The maximum length is derived from "Ärger.txt", 10 bytes in memory,
9 positions on the screen. That is why "Arger.txt" gets one extra ' '
for aligment, it needs 9 bytes in memory.
If there was a file "Ö", it would be correctly aligned by chance,
but "Öhö" would not.
The solution is of course, to use utf8_strwidth() instead of strlen()
when dealing with the width on screen.
And then there is another problem, code like this:
strbuf_addf(&out, "%-*s", len, name);
(or using the underlying snprintf() function) does not align the
buffer to a minimum of len measured in screen-width, but uses the
memory count.
One could be tempted to wish that snprintf() was UTF-8 aware.
That doesn't seem to be the case anywhere (tested on Linux and Mac),
probably snprintf() uses the "bytes in memory"/strlen() approach to be
compatible with older versions and this will never change.
The basic idea is to change code in diff.c like this
strbuf_addf(&out, "%-*s", len, name);
into something like this:
int padding = len - utf8_strwidth(name);
if (padding < 0)
padding = 0;
strbuf_addf(&out, " %s%*s", name, padding, "");
The real change is slighty bigger, as it, as well, integrates two calls
of strbuf_addf() into one.
Tests:
Two things need to be tested:
- The calculation of the maximum width
- The calculation of padding
The name "textfile" is changed into "tëxtfilë", both have a width of 8.
If strlen() was used, to get the maximum width, the shorter "binfile" would
have been mis-aligned:
binfile | [snip]
tëxtfilë | [snip]
If only "binfile" would be renamed into "binfilë":
binfilë | [snip]
textfile | [snip]
In order to verify that the width is calculated correctly everywhere,
"binfile" is renamed into "binfilë", giving 1 bytes more in strlen()
"tëxtfile" is renamed into "tëxtfilë", 2 byte more in strlen().
The updated t4012-diff-binary.sh checks the correct aligment:
binfilë | [snip]
tëxtfilë | [snip]
Reported-by: Alexander Meshcheryakov <alexander.s.m@gmail.com>
Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-14 17:13:33 +02:00
|
|
|
printf "\01\00%1024d" 1 >binfilë &&
|
|
|
|
git add binfilë &&
|
2012-04-30 22:38:58 +02:00
|
|
|
i=0 &&
|
|
|
|
while test $i -lt 10000; do
|
|
|
|
echo $i &&
|
2021-12-09 06:11:13 +01:00
|
|
|
i=$(($i + 1)) || return 1
|
diff.c: use utf8_strwidth() to count display width
When unicode filenames (encoded in UTF-8) are used, the visible width
on the screen is not the same as strlen().
For example, `git log --stat` may produce an output like this:
[snip the header]
Arger.txt | 1 +
Ärger.txt | 1 +
2 files changed, 2 insertions(+)
A side note: the original report was about cyrillic filenames.
After some investigations it turned out that
a) This is not a problem with "ambiguous characters" in unicode
b) The same problem exists for all unicode code points (so we
can use Latin based Umlauts for demonstrations below)
The 'Ä' takes the same space on the screen as the 'A'.
But needs one more byte in memory, so the the `git log --stat` output
for "Arger.txt" (!) gets mis-aligned:
The maximum length is derived from "Ärger.txt", 10 bytes in memory,
9 positions on the screen. That is why "Arger.txt" gets one extra ' '
for aligment, it needs 9 bytes in memory.
If there was a file "Ö", it would be correctly aligned by chance,
but "Öhö" would not.
The solution is of course, to use utf8_strwidth() instead of strlen()
when dealing with the width on screen.
And then there is another problem, code like this:
strbuf_addf(&out, "%-*s", len, name);
(or using the underlying snprintf() function) does not align the
buffer to a minimum of len measured in screen-width, but uses the
memory count.
One could be tempted to wish that snprintf() was UTF-8 aware.
That doesn't seem to be the case anywhere (tested on Linux and Mac),
probably snprintf() uses the "bytes in memory"/strlen() approach to be
compatible with older versions and this will never change.
The basic idea is to change code in diff.c like this
strbuf_addf(&out, "%-*s", len, name);
into something like this:
int padding = len - utf8_strwidth(name);
if (padding < 0)
padding = 0;
strbuf_addf(&out, " %s%*s", name, padding, "");
The real change is slighty bigger, as it, as well, integrates two calls
of strbuf_addf() into one.
Tests:
Two things need to be tested:
- The calculation of the maximum width
- The calculation of padding
The name "textfile" is changed into "tëxtfilë", both have a width of 8.
If strlen() was used, to get the maximum width, the shorter "binfile" would
have been mis-aligned:
binfile | [snip]
tëxtfilë | [snip]
If only "binfile" would be renamed into "binfilë":
binfilë | [snip]
textfile | [snip]
In order to verify that the width is calculated correctly everywhere,
"binfile" is renamed into "binfilë", giving 1 bytes more in strlen()
"tëxtfile" is renamed into "tëxtfilë", 2 byte more in strlen().
The updated t4012-diff-binary.sh checks the correct aligment:
binfilë | [snip]
tëxtfilë | [snip]
Reported-by: Alexander Meshcheryakov <alexander.s.m@gmail.com>
Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-14 17:13:33 +02:00
|
|
|
done >tëxtfilë &&
|
|
|
|
git add tëxtfilë &&
|
|
|
|
git -c core.quotepath=false diff --cached --stat binfilë tëxtfilë >output &&
|
2012-04-30 22:38:58 +02:00
|
|
|
grep " | " output >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2006-05-06 09:15:54 +02:00
|
|
|
test_done
|