From 5ff10dd602f5926f0f5a73ae7de5866713428aa7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 26 Jun 2008 15:34:54 -0700 Subject: [PATCH 1/7] diff --check: explain why we do not care whether old side is binary All other codepaths refrain from running textual diff when either the old or the new side is binary, but this function only checks the new side. I was almost going to change it to check both, but that would be a bad change. Explain why to prevent future mistakes. Signed-off-by: Junio C Hamano --- diff.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/diff.c b/diff.c index 893942359b..c00d633c12 100644 --- a/diff.c +++ b/diff.c @@ -1544,8 +1544,9 @@ static void builtin_diffstat(const char *name_a, const char *name_b, static void builtin_checkdiff(const char *name_a, const char *name_b, const char *attr_path, - struct diff_filespec *one, - struct diff_filespec *two, struct diff_options *o) + struct diff_filespec *one, + struct diff_filespec *two, + struct diff_options *o) { mmfile_t mf1, mf2; struct checkdiff_t data; @@ -1564,6 +1565,12 @@ static void builtin_checkdiff(const char *name_a, const char *name_b, if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); + /* + * All the other codepaths check both sides, but not checking + * the "old" side here is deliberate. We are checking the newly + * introduced changes, and as long as the "new" side is text, we + * can and should check what it introduces. + */ if (diff_filespec_is_binary(two)) goto free_and_return; else { From 8f8841e9c8e6a26153b0cf9417c7540cf65ef09f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 26 Jun 2008 15:35:21 -0700 Subject: [PATCH 2/7] check_and_emit_line(): rename and refactor The function name was too bland and not explicit enough as to what it is checking. Split it into two, and call the one that checks if there is a whitespace breakage "ws_check()", and call the other one that checks and emits the line after color coding "ws_check_emit()". Signed-off-by: Junio C Hamano --- builtin-apply.c | 5 ++--- cache.h | 5 ++--- diff.c | 13 ++++++------- ws.c | 18 +++++++++++++++--- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/builtin-apply.c b/builtin-apply.c index c497889312..92f00471bb 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -979,8 +979,7 @@ static int find_header(char *line, unsigned long size, int *hdrsize, struct patc static void check_whitespace(const char *line, int len, unsigned ws_rule) { char *err; - unsigned result = check_and_emit_line(line + 1, len - 1, ws_rule, - NULL, NULL, NULL, NULL); + unsigned result = ws_check(line + 1, len - 1, ws_rule); if (!result) return; @@ -991,7 +990,7 @@ static void check_whitespace(const char *line, int len, unsigned ws_rule) else { err = whitespace_error_string(result); fprintf(stderr, "%s:%d: %s.\n%.*s\n", - patch_input_file, linenr, err, len - 2, line + 1); + patch_input_file, linenr, err, len - 2, line + 1); free(err); } } diff --git a/cache.h b/cache.h index 64ef86e129..3dfa53c566 100644 --- a/cache.h +++ b/cache.h @@ -819,9 +819,8 @@ void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, i extern unsigned whitespace_rule_cfg; extern unsigned whitespace_rule(const char *); extern unsigned parse_whitespace_rule(const char *); -extern unsigned check_and_emit_line(const char *line, int len, unsigned ws_rule, - FILE *stream, const char *set, - const char *reset, const char *ws); +extern unsigned ws_check(const char *line, int len, unsigned ws_rule); +extern void ws_check_emit(const char *line, int len, unsigned ws_rule, FILE *stream, const char *set, const char *reset, const char *ws); extern char *whitespace_error_string(unsigned ws); extern int ws_fix_copy(char *, const char *, int, unsigned, int *); diff --git a/diff.c b/diff.c index c00d633c12..52a34eec91 100644 --- a/diff.c +++ b/diff.c @@ -535,9 +535,9 @@ static void emit_add_line(const char *reset, struct emit_callback *ecbdata, cons else { /* Emit just the prefix, then the rest. */ emit_line(ecbdata->file, set, reset, line, ecbdata->nparents); - (void)check_and_emit_line(line + ecbdata->nparents, - len - ecbdata->nparents, ecbdata->ws_rule, - ecbdata->file, set, reset, ws); + ws_check_emit(line + ecbdata->nparents, + len - ecbdata->nparents, ecbdata->ws_rule, + ecbdata->file, set, reset, ws); } } @@ -1153,8 +1153,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) if (line[0] == '+') { unsigned bad; data->lineno++; - bad = check_and_emit_line(line + 1, len - 1, - data->ws_rule, NULL, NULL, NULL, NULL); + bad = ws_check(line + 1, len - 1, data->ws_rule); if (!bad) return; data->status |= bad; @@ -1162,8 +1161,8 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) fprintf(data->file, "%s:%d: %s.\n", data->filename, data->lineno, err); free(err); emit_line(data->file, set, reset, line, 1); - (void)check_and_emit_line(line + 1, len - 1, data->ws_rule, - data->file, set, reset, ws); + ws_check_emit(line + 1, len - 1, data->ws_rule, + data->file, set, reset, ws); } else if (line[0] == ' ') data->lineno++; else if (line[0] == '@') { diff --git a/ws.c b/ws.c index ba7e834ca8..24d3e3de07 100644 --- a/ws.c +++ b/ws.c @@ -117,9 +117,9 @@ char *whitespace_error_string(unsigned ws) } /* If stream is non-NULL, emits the line after checking. */ -unsigned check_and_emit_line(const char *line, int len, unsigned ws_rule, - FILE *stream, const char *set, - const char *reset, const char *ws) +static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule, + FILE *stream, const char *set, + const char *reset, const char *ws) { unsigned result = 0; int written = 0; @@ -213,6 +213,18 @@ unsigned check_and_emit_line(const char *line, int len, unsigned ws_rule, return result; } +void ws_check_emit(const char *line, int len, unsigned ws_rule, + FILE *stream, const char *set, + const char *reset, const char *ws) +{ + (void)ws_check_emit_1(line, len, ws_rule, stream, set, reset, ws); +} + +unsigned ws_check(const char *line, int len, unsigned ws_rule) +{ + return ws_check_emit_1(line, len, ws_rule, NULL, NULL, NULL, NULL); +} + /* Copy the line to the buffer while fixing whitespaces */ int ws_fix_copy(char *dst, const char *src, int len, unsigned ws_rule, int *error_count) { From 1ba111d1d6bd90b2c120ceb05418e01ee304cc46 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 26 Jun 2008 15:36:34 -0700 Subject: [PATCH 3/7] checkdiff: pass diff_options to the callback This way, we could later use more information from the diff_options. Signed-off-by: Junio C Hamano --- diff.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/diff.c b/diff.c index 52a34eec91..6bcbe20828 100644 --- a/diff.c +++ b/diff.c @@ -1136,18 +1136,19 @@ static void free_diffstat_info(struct diffstat_t *diffstat) struct checkdiff_t { struct xdiff_emit_state xm; const char *filename; - int lineno, color_diff; + int lineno; + struct diff_options *o; unsigned ws_rule; unsigned status; - FILE *file; }; static void checkdiff_consume(void *priv, char *line, unsigned long len) { struct checkdiff_t *data = priv; - const char *ws = diff_get_color(data->color_diff, DIFF_WHITESPACE); - const char *reset = diff_get_color(data->color_diff, DIFF_RESET); - const char *set = diff_get_color(data->color_diff, DIFF_FILE_NEW); + int color_diff = DIFF_OPT_TST(data->o, COLOR_DIFF); + const char *ws = diff_get_color(color_diff, DIFF_WHITESPACE); + const char *reset = diff_get_color(color_diff, DIFF_RESET); + const char *set = diff_get_color(color_diff, DIFF_FILE_NEW); char *err; if (line[0] == '+') { @@ -1158,11 +1159,12 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) return; data->status |= bad; err = whitespace_error_string(bad); - fprintf(data->file, "%s:%d: %s.\n", data->filename, data->lineno, err); + fprintf(data->o->file, "%s:%d: %s.\n", + data->filename, data->lineno, err); free(err); - emit_line(data->file, set, reset, line, 1); + emit_line(data->o->file, set, reset, line, 1); ws_check_emit(line + 1, len - 1, data->ws_rule, - data->file, set, reset, ws); + data->o->file, set, reset, ws); } else if (line[0] == ' ') data->lineno++; else if (line[0] == '@') { @@ -1557,9 +1559,8 @@ static void builtin_checkdiff(const char *name_a, const char *name_b, data.xm.consume = checkdiff_consume; data.filename = name_b ? name_b : name_a; data.lineno = 0; - data.color_diff = DIFF_OPT_TST(o, COLOR_DIFF); + data.o = o; data.ws_rule = whitespace_rule(attr_path); - data.file = o->file; if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); From 877f23ccb88227203f2576abdfb5d1c15925fcb3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 26 Jun 2008 15:36:59 -0700 Subject: [PATCH 4/7] Teach "diff --check" about new blank lines at end When a patch adds new blank lines at the end, "git apply --whitespace" warns. This teaches "diff --check" to do the same. Signed-off-by: Junio C Hamano --- cache.h | 1 + diff.c | 17 +++++++++++++++-- t/t4015-diff-whitespace.sh | 6 ++++++ ws.c | 15 +++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/cache.h b/cache.h index 3dfa53c566..188428dd26 100644 --- a/cache.h +++ b/cache.h @@ -823,6 +823,7 @@ extern unsigned ws_check(const char *line, int len, unsigned ws_rule); extern void ws_check_emit(const char *line, int len, unsigned ws_rule, FILE *stream, const char *set, const char *reset, const char *ws); extern char *whitespace_error_string(unsigned ws); extern int ws_fix_copy(char *, const char *, int, unsigned, int *); +extern int ws_blank_line(const char *line, int len, unsigned ws_rule); /* ls-files */ int pathspec_match(const char **spec, char *matched, const char *filename, int skiplen); diff --git a/diff.c b/diff.c index 6bcbe20828..f31c721168 100644 --- a/diff.c +++ b/diff.c @@ -1140,6 +1140,7 @@ struct checkdiff_t { struct diff_options *o; unsigned ws_rule; unsigned status; + int trailing_blanks_start; }; static void checkdiff_consume(void *priv, char *line, unsigned long len) @@ -1154,6 +1155,10 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) if (line[0] == '+') { unsigned bad; data->lineno++; + if (!ws_blank_line(line + 1, len - 1, data->ws_rule)) + data->trailing_blanks_start = 0; + else if (!data->trailing_blanks_start) + data->trailing_blanks_start = data->lineno; bad = ws_check(line + 1, len - 1, data->ws_rule); if (!bad) return; @@ -1165,14 +1170,16 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) emit_line(data->o->file, set, reset, line, 1); ws_check_emit(line + 1, len - 1, data->ws_rule, data->o->file, set, reset, ws); - } else if (line[0] == ' ') + } else if (line[0] == ' ') { data->lineno++; - else if (line[0] == '@') { + data->trailing_blanks_start = 0; + } else if (line[0] == '@') { char *plus = strchr(line, '+'); if (plus) data->lineno = strtol(plus, NULL, 10) - 1; else die("invalid diff"); + data->trailing_blanks_start = 0; } } @@ -1584,6 +1591,12 @@ static void builtin_checkdiff(const char *name_a, const char *name_b, ecb.outf = xdiff_outf; ecb.priv = &data; xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + + if (data.trailing_blanks_start) { + fprintf(o->file, "%s:%d: ends with blank lines.\n", + data.filename, data.trailing_blanks_start); + data.status = 1; /* report errors */ + } } free_and_return: diff_free_filespec_data(one); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index b7cc6b28e6..0922c708f1 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -335,4 +335,10 @@ test_expect_success 'line numbers in --check output are correct' ' ' +test_expect_success 'checkdiff detects trailing blank lines' ' + echo "foo();" >x && + echo "" >>x && + git diff --check | grep "ends with blank" +' + test_done diff --git a/ws.c b/ws.c index 24d3e3de07..7a7ff130a3 100644 --- a/ws.c +++ b/ws.c @@ -225,6 +225,21 @@ unsigned ws_check(const char *line, int len, unsigned ws_rule) return ws_check_emit_1(line, len, ws_rule, NULL, NULL, NULL, NULL); } +int ws_blank_line(const char *line, int len, unsigned ws_rule) +{ + /* + * We _might_ want to treat CR differently from other + * whitespace characters when ws_rule has WS_CR_AT_EOL, but + * for now we just use this stupid definition. + */ + while (len-- > 0) { + if (!isspace(*line)) + return 0; + line++; + } + return 1; +} + /* Copy the line to the buffer while fixing whitespaces */ int ws_fix_copy(char *dst, const char *src, int len, unsigned ws_rule, int *error_count) { From 049540435fa5f7f583b8f5af257322b17eac7375 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 26 Jun 2008 15:37:21 -0700 Subject: [PATCH 5/7] diff --check: detect leftover conflict markers This teaches "diff --check" to detect and complain if the change adds lines that look like leftover conflict markers. We should be able to remove the old Perl script used in the sample pre-commit hook and modernize the script with this facility. Signed-off-by: Junio C Hamano --- diff.c | 35 +++++++++++++++++++++++++++++++++++ t/t4017-diff-retval.sh | 14 ++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/diff.c b/diff.c index f31c721168..d515b06ea3 100644 --- a/diff.c +++ b/diff.c @@ -1143,6 +1143,35 @@ struct checkdiff_t { int trailing_blanks_start; }; +static int is_conflict_marker(const char *line, unsigned long len) +{ + char firstchar; + int cnt; + + if (len < 8) + return 0; + firstchar = line[0]; + switch (firstchar) { + case '=': case '>': case '<': + break; + default: + return 0; + } + for (cnt = 1; cnt < 7; cnt++) + if (line[cnt] != firstchar) + return 0; + /* line[0] thru line[6] are same as firstchar */ + if (firstchar == '=') { + /* divider between ours and theirs? */ + if (len != 8 || line[7] != '\n') + return 0; + } else if (len < 8 || !isspace(line[7])) { + /* not divider before ours nor after theirs */ + return 0; + } + return 1; +} + static void checkdiff_consume(void *priv, char *line, unsigned long len) { struct checkdiff_t *data = priv; @@ -1159,6 +1188,12 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) data->trailing_blanks_start = 0; else if (!data->trailing_blanks_start) data->trailing_blanks_start = data->lineno; + if (is_conflict_marker(line + 1, len - 1)) { + data->status |= 1; + fprintf(data->o->file, + "%s:%d: leftover conflict marker\n", + data->filename, data->lineno); + } bad = ws_check(line + 1, len - 1, data->ws_rule); if (!bad) return; diff --git a/t/t4017-diff-retval.sh b/t/t4017-diff-retval.sh index 0d0fb87f57..d748d45dae 100755 --- a/t/t4017-diff-retval.sh +++ b/t/t4017-diff-retval.sh @@ -113,4 +113,18 @@ test_expect_success 'check should test not just the last line' ' ' +test_expect_success 'check detects leftover conflict markers' ' + git reset --hard && + git checkout HEAD^ && + echo binary >>b && + git commit -m "side" b && + test_must_fail git merge master && + git add b && ( + git --no-pager diff --cached --check >test.out + test $? = 2 + ) && + test "$(grep "conflict marker" test.out | wc -l)" = 3 && + git reset --hard +' + test_done From 03e2b630f05b88da5ff43f194fed25755de44e8b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 26 Jun 2008 16:08:05 -0700 Subject: [PATCH 6/7] Update sample pre-commit hook to use "diff --check" Now "diff --check" can detect not just whitespace errors but also notices leftover conflict marker lines, we can use it in the sample pre-commit hook script. These days the object layer knows about the empty tree object without actually having one in the repository, so we can run the test even for the initial commit. Signed-off-by: Junio C Hamano --- templates/hooks--pre-commit.sample | 64 +++--------------------------- 1 file changed, 6 insertions(+), 58 deletions(-) diff --git a/templates/hooks--pre-commit.sample b/templates/hooks--pre-commit.sample index 71c10f25f4..0e49279c7f 100755 --- a/templates/hooks--pre-commit.sample +++ b/templates/hooks--pre-commit.sample @@ -7,64 +7,12 @@ # # To enable this hook, rename this file to "pre-commit". -# This is slightly modified from Andrew Morton's Perfect Patch. -# Lines you introduce should not have trailing whitespace. -# Also check for an indentation that has SP before a TAB. - if git-rev-parse --verify HEAD 2>/dev/null then - git-diff-index -p -M --cached HEAD -- + against=HEAD else - # NEEDSWORK: we should produce a diff with an empty tree here - # if we want to do the same verification for the initial import. - : -fi | -perl -e ' - my $found_bad = 0; - my $filename; - my $reported_filename = ""; - my $lineno; - sub bad_line { - my ($why, $line) = @_; - if (!$found_bad) { - print STDERR "*\n"; - print STDERR "* You have some suspicious patch lines:\n"; - print STDERR "*\n"; - $found_bad = 1; - } - if ($reported_filename ne $filename) { - print STDERR "* In $filename\n"; - $reported_filename = $filename; - } - print STDERR "* $why (line $lineno)\n"; - print STDERR "$filename:$lineno:$line\n"; - } - while (<>) { - if (m|^diff --git a/(.*) b/\1$|) { - $filename = $1; - next; - } - if (/^@@ -\S+ \+(\d+)/) { - $lineno = $1 - 1; - next; - } - if (/^ /) { - $lineno++; - next; - } - if (s/^\+//) { - $lineno++; - chomp; - if (/\s$/) { - bad_line("trailing whitespace", $_); - } - if (/^\s* \t/) { - bad_line("indent SP followed by a TAB", $_); - } - if (/^([<>])\1{6} |^={7}$/) { - bad_line("unresolved merge conflict", $_); - } - } - } - exit($found_bad); -' + # Initial commit: diff against an empty tree object + against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 +fi + +exec git diff-index --check --cached $against -- From ab20fda99236e38edf5d63f964b6b920b494aacb Mon Sep 17 00:00:00 2001 From: Brian Gernhardt Date: Sun, 29 Jun 2008 16:49:06 -0400 Subject: [PATCH 7/7] Fix t4017-diff-retval for white-space from wc Signed-off-by: Brian Gernhardt Signed-off-by: Junio C Hamano --- t/t4017-diff-retval.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t4017-diff-retval.sh b/t/t4017-diff-retval.sh index d748d45dae..60dd2014d5 100755 --- a/t/t4017-diff-retval.sh +++ b/t/t4017-diff-retval.sh @@ -123,7 +123,7 @@ test_expect_success 'check detects leftover conflict markers' ' git --no-pager diff --cached --check >test.out test $? = 2 ) && - test "$(grep "conflict marker" test.out | wc -l)" = 3 && + test 3 = $(grep "conflict marker" test.out | wc -l) && git reset --hard '