git-commit-vandalism/t/t6007-rev-list-cherry-pick-file.sh

285 lines
6.7 KiB
Bash
Raw Normal View History

#!/bin/sh
test_description='test git rev-list --cherry-pick -- file'
GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
tests: mark tests relying on the current default for `init.defaultBranch` In addition to the manual adjustment to let the `linux-gcc` CI job run the test suite with `master` and then with `main`, this patch makes sure that GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME is set in all test scripts that currently rely on the initial branch name being `master by default. To determine which test scripts to mark up, the first step was to force-set the default branch name to `master` in - all test scripts that contain the keyword `master`, - t4211, which expects `t/t4211/history.export` with a hard-coded ref to initialize the default branch, - t5560 because it sources `t/t556x_common` which uses `master`, - t8002 and t8012 because both source `t/annotate-tests.sh` which also uses `master`) This trick was performed by this command: $ sed -i '/^ *\. \.\/\(test-lib\|lib-\(bash\|cvs\|git-svn\)\|gitweb-lib\)\.sh$/i\ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master\ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME\ ' $(git grep -l master t/t[0-9]*.sh) \ t/t4211*.sh t/t5560*.sh t/t8002*.sh t/t8012*.sh After that, careful, manual inspection revealed that some of the test scripts containing the needle `master` do not actually rely on a specific default branch name: either they mention `master` only in a comment, or they initialize that branch specificially, or they do not actually refer to the current default branch. Therefore, the aforementioned modification was undone in those test scripts thusly: $ git checkout HEAD -- \ t/t0027-auto-crlf.sh t/t0060-path-utils.sh \ t/t1011-read-tree-sparse-checkout.sh \ t/t1305-config-include.sh t/t1309-early-config.sh \ t/t1402-check-ref-format.sh t/t1450-fsck.sh \ t/t2024-checkout-dwim.sh \ t/t2106-update-index-assume-unchanged.sh \ t/t3040-subprojects-basic.sh t/t3301-notes.sh \ t/t3308-notes-merge.sh t/t3423-rebase-reword.sh \ t/t3436-rebase-more-options.sh \ t/t4015-diff-whitespace.sh t/t4257-am-interactive.sh \ t/t5323-pack-redundant.sh t/t5401-update-hooks.sh \ t/t5511-refspec.sh t/t5526-fetch-submodules.sh \ t/t5529-push-errors.sh t/t5530-upload-pack-error.sh \ t/t5548-push-porcelain.sh \ t/t5552-skipping-fetch-negotiator.sh \ t/t5572-pull-submodule.sh t/t5608-clone-2gb.sh \ t/t5614-clone-submodules-shallow.sh \ t/t7508-status.sh t/t7606-merge-custom.sh \ t/t9302-fast-import-unpack-limit.sh We excluded one set of test scripts in these commands, though: the range of `git p4` tests. The reason? `git p4` stores the (foreign) remote branch in the branch called `p4/master`, which is obviously not the default branch. Manual analysis revealed that only five of these tests actually require a specific default branch name to pass; They were modified thusly: $ sed -i '/^ *\. \.\/lib-git-p4\.sh$/i\ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master\ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME\ ' t/t980[0167]*.sh t/t9811*.sh Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-11-19 00:44:19 +01:00
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
. ./test-lib.sh
# A---B---D---F
# \
# \
# C---E
#
# B changes a file foo.c, adding a line of text. C changes foo.c as
# well as bar.c, but the change in foo.c was identical to change B.
# D and C change bar in the same way, E and F differently.
test_expect_success setup '
echo Hallo > foo &&
git add foo &&
test_tick &&
git commit -m "A" &&
git tag A &&
git checkout -b branch &&
echo Bello > foo &&
echo Cello > bar &&
git add foo bar &&
test_tick &&
git commit -m "C" &&
git tag C &&
echo Dello > bar &&
git add bar &&
test_tick &&
git commit -m "E" &&
git tag E &&
git checkout main &&
git checkout branch foo &&
test_tick &&
git commit -m "B" &&
git tag B &&
echo Cello > bar &&
git add bar &&
test_tick &&
git commit -m "D" &&
git tag D &&
echo Nello > bar &&
git add bar &&
test_tick &&
git commit -m "F" &&
git tag F
'
cat >expect <<EOF
<tags/B
>tags/C
EOF
test_expect_success '--left-right' '
git rev-list --left-right B...C > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
test_expect_success '--count' '
git rev-list --count B...C > actual &&
test "$(cat actual)" = 2
'
test_expect_success '--cherry-pick foo comes up empty' '
test -z "$(git rev-list --left-right --cherry-pick B...C -- foo)"
'
cat >expect <<EOF
>tags/C
EOF
test_expect_success '--cherry-pick bar does not come up empty' '
git rev-list --left-right --cherry-pick B...C -- bar > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
test_expect_success 'bar does not come up empty' '
git rev-list --left-right B...C -- bar > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
cat >expect <<EOF
<tags/F
>tags/E
EOF
test_expect_success '--cherry-pick bar does not come up empty (II)' '
git rev-list --left-right --cherry-pick F...E -- bar > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
test_expect_success 'name-rev multiple --refs combine inclusive' '
git rev-list --left-right --cherry-pick F...E -- bar >actual &&
git name-rev --stdin --name-only --refs="*tags/F" --refs="*tags/E" \
<actual >actual.named &&
test_cmp expect actual.named
'
cat >expect <<EOF
<tags/F
EOF
test_expect_success 'name-rev --refs excludes non-matched patterns' '
git rev-list --left-right --right-only --cherry-pick F...E -- bar >>expect &&
git rev-list --left-right --cherry-pick F...E -- bar >actual &&
git name-rev --stdin --name-only --refs="*tags/F" \
<actual >actual.named &&
test_cmp expect actual.named
'
cat >expect <<EOF
<tags/F
EOF
test_expect_success 'name-rev --exclude excludes matched patterns' '
git rev-list --left-right --right-only --cherry-pick F...E -- bar >>expect &&
git rev-list --left-right --cherry-pick F...E -- bar >actual &&
git name-rev --stdin --name-only --refs="*tags/*" --exclude="*E" \
<actual >actual.named &&
test_cmp expect actual.named
'
test_expect_success 'name-rev --no-refs clears the refs list' '
git rev-list --left-right --cherry-pick F...E -- bar >expect &&
git name-rev --stdin --name-only --refs="*tags/F" --refs="*tags/E" --no-refs --refs="*tags/G" \
<expect >actual &&
test_cmp expect actual
'
cat >expect <<EOF
+tags/F
=tags/D
+tags/E
=tags/C
EOF
test_expect_success '--cherry-mark' '
git rev-list --cherry-mark F...E -- bar > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
cat >expect <<EOF
<tags/F
=tags/D
>tags/E
=tags/C
EOF
test_expect_success '--cherry-mark --left-right' '
git rev-list --cherry-mark --left-right F...E -- bar > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
cat >expect <<EOF
tags/E
EOF
test_expect_success '--cherry-pick --right-only' '
git rev-list --cherry-pick --right-only F...E -- bar > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
test_expect_success '--cherry-pick --left-only' '
git rev-list --cherry-pick --left-only E...F -- bar > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
cat >expect <<EOF
+tags/E
=tags/C
EOF
test_expect_success '--cherry' '
git rev-list --cherry F...E -- bar > actual &&
git name-rev --stdin --name-only --refs="*tags/*" \
< actual > actual.named &&
test_cmp expect actual.named
'
cat >expect <<EOF
1 1
EOF
test_expect_success '--cherry --count' '
git rev-list --cherry --count F...E -- bar > actual &&
test_cmp expect actual
'
cat >expect <<EOF
2 2
EOF
test_expect_success '--cherry-mark --count' '
git rev-list --cherry-mark --count F...E -- bar > actual &&
test_cmp expect actual
'
cat >expect <<EOF
1 1 2
EOF
test_expect_success '--cherry-mark --left-right --count' '
git rev-list --cherry-mark --left-right --count F...E -- bar > actual &&
test_cmp expect actual
'
test_expect_success '--cherry-pick with independent, but identical branches' '
git symbolic-ref HEAD refs/heads/independent &&
rm .git/index &&
echo Hallo > foo &&
git add foo &&
test_tick &&
git commit -m "independent" &&
echo Bello > foo &&
test_tick &&
git commit -m "independent, too" foo &&
test -z "$(git rev-list --left-right --cherry-pick \
HEAD...main -- foo)"
'
cat >expect <<EOF
1 2
EOF
test_expect_success '--count --left-right' '
git rev-list --count --left-right C...D > actual &&
test_cmp expect actual
'
patch-ids: handle duplicate hashmap entries This fixes a bug introduced in dfb7a1b4d0 (patch-ids: stop using a hand-rolled hashmap implementation, 2016-07-29) in which git rev-list --cherry-pick A...B will fail to suppress commits reachable from A even if a commit with matching patch-id appears in B. Around the time of that commit, the algorithm for "--cherry-pick" looked something like this: 0. Traverse all of the commits, marking them as being on the left or right side of the symmetric difference. 1. Iterate over the left-hand commits, inserting a patch-id struct for each into a hashmap, and pointing commit->util to the patch-id struct. 2. Iterate over the right-hand commits, checking which are present in the hashmap. If so, we exclude the commit from the output _and_ we mark the patch-id as "seen". 3. Iterate again over the left-hand commits, checking whether commit->util->seen is set; if so, exclude them from the output. At the end, we'll have eliminated commits from both sides that have a matching patch-id on the other side. But there's a subtle assumption here: for any given patch-id, we must have exactly one struct representing it. If two commits from A both have the same patch-id and we allow duplicates in the hashmap, then we run into a problem: a. In step 1, we insert two patch-id structs into the hashmap. b. In step 2, our lookups will find only one of these structs, so only one "seen" flag is marked. c. In step 3, one of the commits in A will have its commit->util->seen set, but the other will not. We'll erroneously output the latter. Prior to dfb7a1b4d0, our hashmap did not allow duplicates. Afterwards, it used hashmap_add(), which explicitly does allow duplicates. At that point, the solution would have been easy: when we are about to add a duplicate, skip doing so and return the existing entry which matches. But it gets more complicated. In 683f17ec44 (patch-ids: replace the seen indicator with a commit pointer, 2016-07-29), our step 3 goes away entirely. Instead, in step 2, when the right-hand side finds a matching patch_id from the left-hand side, we can directly mark the left-hand patch_id->commit to be omitted. Solving that would be easy, too; there's a one-to-many relationship of patch-ids to commits, so we just need to keep a list. But there's more. Commit b3dfeebb92 (rebase: avoid computing unnecessary patch IDs, 2016-07-29) built on that by lazily computing the full patch-ids. So we don't even know when adding to the hashmap whether two commits truly have the same id. We'd have to tentatively assign them a list, and then possibly split them apart (possibly into N new structs) at the moment we compute the real patch-ids. This could work, but it's complicated and error-prone. Instead, let's accept that we may store duplicates, and teach the lookup side to be more clever. Rather than asking for a single matching patch-id, it will need to iterate over all matching patch-ids. This does mean examining every entry in a single hash bucket, but the worst-case for a hash lookup was already doing that. We'll keep the hashmap details out of the caller by providing a simple iteration interface. We can retain the simple has_commit_patch_id() interface for the other callers, but we'll simplify its return value into an integer, rather than returning the patch_id struct. That way they won't be tempted to look at the "commit" field of the return value without iterating. Reported-by: Arnaud Morin <arnaud.morin@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-12 16:52:32 +01:00
test_expect_success '--cherry-pick with duplicates on each side' '
git checkout -b dup-orig &&
test_commit dup-base &&
git revert dup-base &&
git cherry-pick dup-base &&
git checkout -b dup-side HEAD~3 &&
test_tick &&
git cherry-pick -3 dup-orig &&
git rev-list --cherry-pick dup-orig...dup-side >actual &&
test_must_be_empty actual
'
rebase: avoid computing unnecessary patch IDs The `rebase` family of Git commands avoid applying patches that were already integrated upstream. They do that by using the revision walking option that computes the patch IDs of the two sides of the rebase (local-only patches vs upstream-only ones) and skipping those local patches whose patch ID matches one of the upstream ones. In many cases, this causes unnecessary churn, as already the set of paths touched by a given commit would suffice to determine that an upstream patch has no local equivalent. This hurts performance in particular when there are a lot of upstream patches, and/or large ones. Therefore, let's introduce the concept of a "diff-header-only" patch ID, compare those first, and only evaluate the "full" patch ID lazily. Please note that in contrast to the "full" patch IDs, those "diff-header-only" patch IDs are prone to collide with one another, as adjacent commits frequently touch the very same files. Hence we now have to be careful to allow multiple hash entries with the same hash. We accomplish that by using the hashmap_add() function that does not even test for hash collisions. This also allows us to evaluate the full patch ID lazily, i.e. only when we found commits with matching diff-header-only patch IDs. We add a performance test that demonstrates ~1-6% improvement. In practice this will depend on various factors such as how many upstream changes and how big those changes are along with whether file system caches are cold or warm. As Git's test suite has no way of catching performance regressions, we also add a regression test that verifies that the full patch ID computation is skipped when the diff-header-only computation suffices. Signed-off-by: Kevin Willford <kcwillford@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 18:19:20 +02:00
# Corrupt the object store deliberately to make sure
# the object is not even checked for its existence.
remove_loose_object () {
sha1="$(git rev-parse "$1")" &&
remainder=${sha1#??} &&
firsttwo=${sha1%$remainder} &&
rm .git/objects/$firsttwo/$remainder
}
test_expect_success '--cherry-pick avoids looking at full diffs' '
git checkout -b shy-diff &&
test_commit dont-look-at-me &&
echo Hello >dont-look-at-me.t &&
test_tick &&
git commit -m tip dont-look-at-me.t &&
git checkout -b mainline HEAD^ &&
test_commit to-cherry-pick &&
remove_loose_object shy-diff^:dont-look-at-me.t &&
git rev-list --cherry-pick ...shy-diff
'
test_done