From 9b25949a07933fb9ce7c10bd309ac176dbe2df2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 6 Mar 2010 15:30:21 +0100 Subject: [PATCH 1/5] apply: Don't unnecessarily update line lengths in the preimage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In match_fragment(), the line lengths in the preimage are updated just before calling update_pre_post_images(). That is not necessary, since update_pre_post_images() itself will update the line lengths based on the buffer passed to it. Signed-off-by: Björn Gustavsson Signed-off-by: Junio C Hamano --- builtin-apply.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/builtin-apply.c b/builtin-apply.c index 2a1004d025..fc6c7083f7 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1905,20 +1905,15 @@ static int match_fragment(struct image *img, } /* - * Ok, the preimage matches with whitespace fuzz. Update it and - * the common postimage lines to use the same whitespace as the - * target. imgoff now holds the true length of the target that - * matches the preimage, and we need to update the line lengths - * of the preimage to match the target ones. + * Ok, the preimage matches with whitespace fuzz. + * + * imgoff now holds the true length of the target that + * matches the preimage. Update the preimage and + * the common postimage context lines to use the same + * whitespace as the target. */ fixed_buf = xmalloc(imgoff); memcpy(fixed_buf, img->buf + try, imgoff); - for (i = 0; i < preimage->nr; i++) - preimage->line[i].len = img->line[try_lno+i].len; - - /* - * Update the preimage buffer and the postimage context lines. - */ update_pre_post_images(preimage, postimage, fixed_buf, imgoff, postlen); return 1; From 24ff4d56cf400126aa93ac9a5b9d8a21afadf3f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 6 Mar 2010 15:30:29 +0100 Subject: [PATCH 2/5] apply: Remove the quick rejection test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the next commit, we will make it possible for blank context lines to match beyond the end of the file. That means that a hunk with a preimage that has more lines than present in the file may be possible to successfully apply. Therefore, we must remove the quick rejection test in find_pos(). find_pos() will already work correctly without the quick rejection test, but that might not be obvious. Therefore, comment the test for handling out-of-range line numbers in find_pos() and cast the "line" variable to the same (unsigned) type as img->nr. What are performance implications of removing the quick rejection test? It can only help "git apply" to reject a patch faster. For example, if I have a file with one million lines and a patch that removes slightly more than 50 percent of the lines and try to apply that patch twice, the second attempt will fail slightly faster with the test than without (based on actual measurements). However, there is the pathological case of a patch with many more context lines than the default three, and applying that patch using "git apply -C1". Without the rejection test, the running time will be roughly proportional to the number of context lines times the size of the file. That could be handled by writing a more complicated rejection test (it would have to count the number of blanks at the end of the preimage), but I don't find that worth doing until there is a real-world use case that would benfit from it. It would be possible to keep the quick rejection test if --whitespace=fix is not given, but I don't like that from a testing point of view. Signed-off-by: Björn Gustavsson Signed-off-by: Junio C Hamano --- builtin-apply.c | 12 +++++++----- t/t4104-apply-boundary.sh | 9 +++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/builtin-apply.c b/builtin-apply.c index fc6c7083f7..9641a6479a 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1997,11 +1997,8 @@ static int find_pos(struct image *img, unsigned long backwards, forwards, try; int backwards_lno, forwards_lno, try_lno; - if (preimage->nr > img->nr) - return -1; - /* - * If match_begining or match_end is specified, there is no + * If match_beginning or match_end is specified, there is no * point starting from a wrong line that will never match and * wander around and wait for a match at the specified end. */ @@ -2010,7 +2007,12 @@ static int find_pos(struct image *img, else if (match_end) line = img->nr - preimage->nr; - if (line > img->nr) + /* + * Because the comparison is unsigned, the following test + * will also take care of a negative line number that can + * result when match_end and preimage is larger than the target. + */ + if ((size_t) line > img->nr) line = img->nr; try = 0; diff --git a/t/t4104-apply-boundary.sh b/t/t4104-apply-boundary.sh index 0e3ce3611d..c617c2a33d 100755 --- a/t/t4104-apply-boundary.sh +++ b/t/t4104-apply-boundary.sh @@ -134,4 +134,13 @@ test_expect_success 'two lines' ' ' +test_expect_success 'apply patch with 3 context lines matching at end' ' + { echo a; echo b; echo c; echo d; } >file && + git add file && + echo e >>file && + git diff >patch && + >file && + test_must_fail git apply patch +' + test_done From 51667147be4a5aed6d735d43630ce36b74134eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 6 Mar 2010 15:30:42 +0100 Subject: [PATCH 3/5] apply: Allow blank context lines to match beyond EOF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "git apply --whitespace=fix" will not always succeed when used on a series of patches in the following circumstances: * One patch adds a blank line at the end of a file. (Since --whitespace=fix is used, the blank line will *not* be added.) * The next patch adds non-blank lines after the blank line introduced in the first patch. That patch will not apply because the blank line that is expected to be found at end of the file is no longer there. A patch series that starts by deleting lines at the end will fail in a similar way. Fix this problem by allowing a blank context line at the beginning of a hunk to match if parts of it falls beyond end of the file. We still require that at least one non-blank context line match before the end of the file. If the --ignore-space-change option is given (as well as the --whitespace=fix option), blank context lines falling beyond the end of the file will be copied unchanged to the target file (i.e. they will have the same line terminators and extra spaces will not be removed). Signed-off-by: Björn Gustavsson Signed-off-by: Junio C Hamano --- builtin-apply.c | 168 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 138 insertions(+), 30 deletions(-) diff --git a/builtin-apply.c b/builtin-apply.c index 9641a6479a..7ca90472c1 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1854,33 +1854,76 @@ static int match_fragment(struct image *img, { int i; char *fixed_buf, *buf, *orig, *target; + int preimage_limit; - if (preimage->nr + try_lno > img->nr) + if (preimage->nr + try_lno <= img->nr) { + /* + * The hunk falls within the boundaries of img. + */ + preimage_limit = preimage->nr; + if (match_end && (preimage->nr + try_lno != img->nr)) + return 0; + } else if (ws_error_action == correct_ws_error && + (ws_rule & WS_BLANK_AT_EOF) && match_end) { + /* + * This hunk that matches at the end extends beyond + * the end of img, and we are removing blank lines + * at the end of the file. This many lines from the + * beginning of the preimage must match with img, and + * the remainder of the preimage must be blank. + */ + preimage_limit = img->nr - try_lno; + } else { + /* + * The hunk extends beyond the end of the img and + * we are not removing blanks at the end, so we + * should reject the hunk at this position. + */ return 0; + } if (match_beginning && try_lno) return 0; - if (match_end && preimage->nr + try_lno != img->nr) - return 0; - /* Quick hash check */ - for (i = 0; i < preimage->nr; i++) + for (i = 0; i < preimage_limit; i++) if (preimage->line[i].hash != img->line[try_lno + i].hash) return 0; - /* - * Do we have an exact match? If we were told to match - * at the end, size must be exactly at try+fragsize, - * otherwise try+fragsize must be still within the preimage, - * and either case, the old piece should match the preimage - * exactly. - */ - if ((match_end - ? (try + preimage->len == img->len) - : (try + preimage->len <= img->len)) && - !memcmp(img->buf + try, preimage->buf, preimage->len)) - return 1; + if (preimage_limit == preimage->nr) { + /* + * Do we have an exact match? If we were told to match + * at the end, size must be exactly at try+fragsize, + * otherwise try+fragsize must be still within the preimage, + * and either case, the old piece should match the preimage + * exactly. + */ + if ((match_end + ? (try + preimage->len == img->len) + : (try + preimage->len <= img->len)) && + !memcmp(img->buf + try, preimage->buf, preimage->len)) + return 1; + } else { + /* + * The preimage extends beyond the end of img, so + * there cannot be an exact match. + * + * There must be one non-blank context line that match + * a line before the end of img. + */ + char *buf_end; + + buf = preimage->buf; + buf_end = buf; + for (i = 0; i < preimage_limit; i++) + buf_end += preimage->line[i].len; + + for ( ; buf < buf_end; buf++) + if (!isspace(*buf)) + break; + if (buf == buf_end) + return 0; + } /* * No exact match. If we are ignoring whitespace, run a line-by-line @@ -1891,7 +1934,10 @@ static int match_fragment(struct image *img, size_t imgoff = 0; size_t preoff = 0; size_t postlen = postimage->len; - for (i = 0; i < preimage->nr; i++) { + size_t extra_chars; + char *preimage_eof; + char *preimage_end; + for (i = 0; i < preimage_limit; i++) { size_t prelen = preimage->line[i].len; size_t imglen = img->line[try_lno+i].len; @@ -1908,12 +1954,33 @@ static int match_fragment(struct image *img, * Ok, the preimage matches with whitespace fuzz. * * imgoff now holds the true length of the target that - * matches the preimage. Update the preimage and - * the common postimage context lines to use the same - * whitespace as the target. + * matches the preimage before the end of the file. + * + * Count the number of characters in the preimage that fall + * beyond the end of the file and make sure that all of them + * are whitespace characters. (This can only happen if + * we are removing blank lines at the end of the file.) */ - fixed_buf = xmalloc(imgoff); + buf = preimage_eof = preimage->buf + preoff; + for ( ; i < preimage->nr; i++) + preoff += preimage->line[i].len; + preimage_end = preimage->buf + preoff; + for ( ; buf < preimage_end; buf++) + if (!isspace(*buf)) + return 0; + + /* + * Update the preimage and the common postimage context + * lines to use the same whitespace as the target. + * If whitespace is missing in the target (i.e. + * if the preimage extends beyond the end of the file), + * use the whitespace from the preimage. + */ + extra_chars = preimage_end - preimage_eof; + fixed_buf = xmalloc(imgoff + extra_chars); memcpy(fixed_buf, img->buf + try, imgoff); + memcpy(fixed_buf + imgoff, preimage_eof, extra_chars); + imgoff += extra_chars; update_pre_post_images(preimage, postimage, fixed_buf, imgoff, postlen); return 1; @@ -1927,12 +1994,16 @@ static int match_fragment(struct image *img, * it might with whitespace fuzz. We haven't been asked to * ignore whitespace, we were asked to correct whitespace * errors, so let's try matching after whitespace correction. + * + * The preimage may extend beyond the end of the file, + * but in this loop we will only handle the part of the + * preimage that falls within the file. */ fixed_buf = xmalloc(preimage->len + 1); buf = fixed_buf; orig = preimage->buf; target = img->buf + try; - for (i = 0; i < preimage->nr; i++) { + for (i = 0; i < preimage_limit; i++) { size_t fixlen; /* length after fixing the preimage */ size_t oldlen = preimage->line[i].len; size_t tgtlen = img->line[try_lno + i].len; @@ -1972,6 +2043,29 @@ static int match_fragment(struct image *img, target += tgtlen; } + + /* + * Now handle the lines in the preimage that falls beyond the + * end of the file (if any). They will only match if they are + * empty or only contain whitespace (if WS_BLANK_AT_EOL is + * false). + */ + for ( ; i < preimage->nr; i++) { + size_t fixlen; /* length after fixing the preimage */ + size_t oldlen = preimage->line[i].len; + int j; + + /* Try fixing the line in the preimage */ + fixlen = ws_fix_copy(buf, orig, oldlen, ws_rule, NULL); + + for (j = 0; j < fixlen; j++) + if (!isspace(buf[j])) + goto unmatch_exit; + + orig += oldlen; + buf += fixlen; + } + /* * Yes, the preimage is based on an older version that still * has whitespace breakages unfixed, and fixing them makes the @@ -2088,12 +2182,26 @@ static void update_image(struct image *img, int i, nr; size_t remove_count, insert_count, applied_at = 0; char *result; + int preimage_limit; + + /* + * If we are removing blank lines at the end of img, + * the preimage may extend beyond the end. + * If that is the case, we must be careful only to + * remove the part of the preimage that falls within + * the boundaries of img. Initialize preimage_limit + * to the number of lines in the preimage that falls + * within the boundaries. + */ + preimage_limit = preimage->nr; + if (preimage_limit > img->nr - applied_pos) + preimage_limit = img->nr - applied_pos; for (i = 0; i < applied_pos; i++) applied_at += img->line[i].len; remove_count = 0; - for (i = 0; i < preimage->nr; i++) + for (i = 0; i < preimage_limit; i++) remove_count += img->line[applied_pos + i].len; insert_count = postimage->len; @@ -2110,8 +2218,8 @@ static void update_image(struct image *img, result[img->len] = '\0'; /* Adjust the line table */ - nr = img->nr + postimage->nr - preimage->nr; - if (preimage->nr < postimage->nr) { + nr = img->nr + postimage->nr - preimage_limit; + if (preimage_limit < postimage->nr) { /* * NOTE: this knows that we never call remove_first_line() * on anything other than pre/post image. @@ -2119,10 +2227,10 @@ static void update_image(struct image *img, img->line = xrealloc(img->line, nr * sizeof(*img->line)); img->line_allocated = img->line; } - if (preimage->nr != postimage->nr) + if (preimage_limit != postimage->nr) memmove(img->line + applied_pos + postimage->nr, - img->line + applied_pos + preimage->nr, - (img->nr - (applied_pos + preimage->nr)) * + img->line + applied_pos + preimage_limit, + (img->nr - (applied_pos + preimage_limit)) * sizeof(*img->line)); memcpy(img->line + applied_pos, postimage->line, @@ -2318,7 +2426,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag, if (applied_pos >= 0) { if (new_blank_lines_at_end && - preimage.nr + applied_pos == img->nr && + preimage.nr + applied_pos >= img->nr && (ws_rule & WS_BLANK_AT_EOF) && ws_error_action != nowarn_ws_error) { record_ws_error(WS_BLANK_AT_EOF, "+", 1, frag->linenr); From c1376c12b781e511fd2c94c5d538b075cc1913e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 6 Mar 2010 15:31:04 +0100 Subject: [PATCH 4/5] t4124: Add additional tests of --whitespace=fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Björn Gustavsson Signed-off-by: Junio C Hamano --- t/t4124-apply-ws-rule.sh | 170 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/t/t4124-apply-ws-rule.sh b/t/t4124-apply-ws-rule.sh index ca26397590..fb9ad247bf 100755 --- a/t/t4124-apply-ws-rule.sh +++ b/t/t4124-apply-ws-rule.sh @@ -261,4 +261,174 @@ test_expect_success 'blank but not empty at EOF' ' grep "new blank line at EOF" error ' +test_expect_success 'applying beyond EOF requires one non-blank context line' ' + { echo; echo; echo; echo; } >one && + git add one && + { echo b; } >>one && + git diff -- one >patch && + + git checkout one && + { echo a; echo; } >one && + cp one expect && + test_must_fail git apply --whitespace=fix patch && + test_cmp one expect && + test_must_fail git apply --ignore-space-change --whitespace=fix patch && + test_cmp one expect +' + +test_expect_success 'tons of blanks at EOF should not apply' ' + for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16; do + echo; echo; echo; echo; + done >one && + git add one && + echo a >>one && + git diff -- one >patch && + + >one && + test_must_fail git apply --whitespace=fix patch && + test_must_fail git apply --ignore-space-change --whitespace=fix patch +' + +test_expect_success 'missing blank line at end with --whitespace=fix' ' + echo a >one && + echo >>one && + git add one && + echo b >>one && + cp one expect && + git diff -- one >patch && + echo a >one && + cp one saved-one && + test_must_fail git apply patch && + git apply --whitespace=fix patch && + test_cmp one expect && + mv saved-one one && + git apply --ignore-space-change --whitespace=fix patch && + test_cmp one expect +' + +test_expect_success 'two missing blank lines at end with --whitespace=fix' ' + { echo a; echo; echo b; echo c; } >one && + cp one no-blank-lines && + { echo; echo; } >>one && + git add one && + echo d >>one && + cp one expect && + echo >>one && + git diff -- one >patch && + cp no-blank-lines one && + test_must_fail git apply patch && + git apply --whitespace=fix patch && + test_cmp one expect && + mv no-blank-lines one && + test_must_fail git apply patch && + git apply --ignore-space-change --whitespace=fix patch && + test_cmp one expect +' + +test_expect_success 'shrink file with tons of missing blanks at end of file' ' + { echo a; echo b; echo c; } >one && + cp one no-blank-lines && + for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16; do + echo; echo; echo; echo; + done >>one && + git add one && + echo a >one && + cp one expect && + git diff -- one >patch && + cp no-blank-lines one && + test_must_fail git apply patch && + git apply --whitespace=fix patch && + test_cmp one expect && + mv no-blank-lines one && + git apply --ignore-space-change --whitespace=fix patch && + test_cmp one expect +' + +test_expect_success 'missing blanks at EOF must only match blank lines' ' + { echo a; echo b; } >one && + git add one && + { echo c; echo d; } >>one && + git diff -- one >patch && + + echo a >one && + test_must_fail git apply patch + test_must_fail git apply --whitespace=fix patch && + test_must_fail git apply --ignore-space-change --whitespace=fix patch +' + +sed -e's/Z//' >one <>one && + git diff -- one >patch && + { echo a; echo b; echo c; } >one && + cp one expect && + { echo; echo d; } >>expect && + git add one && + + git apply --whitespace=fix patch && + test_cmp one expect +' + +sed -e's/Z//' >one <>one && + cp one expect && + git diff -- one >patch && + { echo a; echo b; echo c; } >one && + git add one && + + git checkout-index -f one && + git apply --ignore-space-change --whitespace=fix patch && + test_cmp one expect +' + +test_expect_success 'same, but with CR-LF line endings && cr-at-eol set' ' + git config core.whitespace cr-at-eol && + printf "a\r\n" >one && + printf "b\r\n" >>one && + printf "c\r\n" >>one && + cp one save-one && + printf " \r\n" >>one + git add one && + printf "d\r\n" >>one && + cp one expect && + git diff -- one >patch && + mv save-one one && + + git apply --ignore-space-change --whitespace=fix patch && + test_cmp one expect +' + +test_expect_success 'same, but with CR-LF line endings && cr-at-eol unset' ' + git config --unset core.whitespace && + printf "a\r\n" >one && + printf "b\r\n" >>one && + printf "c\r\n" >>one && + cp one save-one && + printf " \r\n" >>one + git add one && + cp one expect && + printf "d\r\n" >>one && + git diff -- one >patch && + mv save-one one && + echo d >>expect && + + git apply --ignore-space-change --whitespace=fix patch && + test_cmp one expect +' + test_done From 59f5ced65b46370a121916593cce7fcd210a15df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 6 Mar 2010 15:31:17 +0100 Subject: [PATCH 5/5] t3417: Add test cases for "rebase --whitespace=fix" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The command "git rebase --whitespace=fix HEAD~" is supposed to only clean up trailing whitespace, and the expectation is that it cannot fail. Unfortunately, if one commit adds a blank line at the end of a file and a subsequent commit adds more non-blank lines after the blank line, "git apply" (used indirectly by "git rebase") will fail to apply the patch of the second commit. Signed-off-by: Björn Gustavsson Signed-off-by: Junio C Hamano --- t/t3417-rebase-whitespace-fix.sh | 126 +++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100755 t/t3417-rebase-whitespace-fix.sh diff --git a/t/t3417-rebase-whitespace-fix.sh b/t/t3417-rebase-whitespace-fix.sh new file mode 100755 index 0000000000..220a740ee8 --- /dev/null +++ b/t/t3417-rebase-whitespace-fix.sh @@ -0,0 +1,126 @@ +#!/bin/sh + +test_description='git rebase --whitespace=fix + +This test runs git rebase --whitespace=fix and make sure that it works. +' + +. ./test-lib.sh + +# prepare initial revision of "file" with a blank line at the end +cat >file <expect-first <second <expect-second <third <expect-third + +test_expect_success 'two blanks line at end of file; extend at end of file' ' + cp third file && git add file && git commit -m third && + git rebase --whitespace=fix HEAD^^ && + git diff --exit-code HEAD^:file expect-second && + test_cmp file expect-third +' + +test_expect_success 'same, but do not remove trailing spaces' ' + git config core.whitespace "-blank-at-eol" && + git reset --hard HEAD^ && + cp third file && git add file && git commit -m third && + git rebase --whitespace=fix HEAD^^ + git diff --exit-code HEAD^:file expect-second && + test_cmp file third +' + +sed -e's/Z//' >beginning <expect-beginning <> file && + git commit -m more file && + git rebase --whitespace=fix HEAD^^ && + test_cmp file expect-beginning +' + +test_done