git-commit-vandalism/t/t1401-symbolic-ref.sh

227 lines
6.7 KiB
Bash
Raw Normal View History

#!/bin/sh
test_description='basic symbolic-ref tests'
TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh
# If the tests munging HEAD fail, they can break detection of
# the git repo, meaning that further tests will operate on
# the surrounding git repo instead of the trash directory.
reset_to_sane() {
rm -rf .git &&
"$TAR" xf .git.tar
}
test_expect_success 'setup' '
git symbolic-ref HEAD refs/heads/foo &&
test_commit file &&
"$TAR" cf .git.tar .git/
'
test_expect_success 'symbolic-ref read/write roundtrip' '
git symbolic-ref HEAD refs/heads/read-write-roundtrip &&
echo refs/heads/read-write-roundtrip >expect &&
git symbolic-ref HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'symbolic-ref refuses non-ref for HEAD' '
test_must_fail git symbolic-ref HEAD foo
'
reset_to_sane
test_expect_success 'symbolic-ref refuses bare sha1' '
test_must_fail git symbolic-ref HEAD $(git rev-parse HEAD)
'
reset_to_sane
test_expect_success 'HEAD cannot be removed' '
test_must_fail git symbolic-ref -d HEAD
'
reset_to_sane
test_expect_success 'symbolic-ref can be deleted' '
git symbolic-ref NOTHEAD refs/heads/foo &&
git symbolic-ref -d NOTHEAD &&
git rev-parse refs/heads/foo &&
test_must_fail git symbolic-ref NOTHEAD
'
reset_to_sane
test_expect_success 'symbolic-ref can delete dangling symref' '
git symbolic-ref NOTHEAD refs/heads/missing &&
git symbolic-ref -d NOTHEAD &&
test_must_fail git rev-parse refs/heads/missing &&
test_must_fail git symbolic-ref NOTHEAD
'
reset_to_sane
test_expect_success 'symbolic-ref fails to delete missing FOO' '
echo "fatal: Cannot delete FOO, not a symbolic ref" >expect &&
test_must_fail git symbolic-ref -d FOO >actual 2>&1 &&
test_cmp expect actual
'
reset_to_sane
test_expect_success 'symbolic-ref fails to delete real ref' '
echo "fatal: Cannot delete refs/heads/foo, not a symbolic ref" >expect &&
test_must_fail git symbolic-ref -d refs/heads/foo >actual 2>&1 &&
git rev-parse --verify refs/heads/foo &&
test_cmp expect actual
'
reset_to_sane
test_expect_success 'create large ref name' '
# make 256+ character ref; some systems may not handle that,
# so be gentle
long=0123456789abcdef &&
long=$long/$long/$long/$long &&
long=$long/$long/$long/$long &&
long_ref=refs/heads/$long &&
tree=$(git write-tree) &&
commit=$(echo foo | git commit-tree $tree) &&
if git update-ref $long_ref $commit; then
test_set_prereq LONG_REF
else
echo >&2 "long refs not supported"
fi
'
test_expect_success LONG_REF 'symbolic-ref can point to large ref name' '
git symbolic-ref HEAD $long_ref &&
echo $long_ref >expect &&
git symbolic-ref HEAD >actual &&
test_cmp expect actual
'
test_expect_success LONG_REF 'we can parse long symbolic ref' '
echo $commit >expect &&
git rev-parse --verify HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'symbolic-ref reports failure in exit code' '
test_when_finished "rm -f .git/HEAD.lock" &&
>.git/HEAD.lock &&
test_must_fail git symbolic-ref HEAD refs/heads/whatever
'
test_expect_success 'symbolic-ref writes reflog entry' '
git checkout -b log1 &&
test_commit one &&
git checkout -b log2 &&
test_commit two &&
git checkout --orphan orphan &&
git symbolic-ref -m create HEAD refs/heads/log1 &&
git symbolic-ref -m update HEAD refs/heads/log2 &&
cat >expect <<-\EOF &&
update
create
EOF
git log --format=%gs -g -2 >actual &&
test_cmp expect actual
'
create_symref: use existing ref-lock code The create_symref() function predates the existence of "struct lock_file", let alone the more recent "struct ref_lock". Instead, it just does its own manual dot-locking. Besides being more code, this has a few downsides: - if git is interrupted while holding the lock, we don't clean up the lockfile - we don't do the usual directory/filename conflict check. So you can sometimes create a symref "refs/heads/foo/bar", even if "refs/heads/foo" exists (namely, if the refs are packed and we do not hit the d/f conflict in the filesystem). This patch refactors create_symref() to use the "struct ref_lock" interface, which handles both of these things. There are a few bonus cleanups that come along with it: - we leaked ref_path in some error cases - the symref contents were stored in a fixed-size buffer, putting an artificial (albeit large) limitation on the length of the refname. We now write through fprintf, and handle refnames of any size. - we called adjust_shared_perm only after the file was renamed into place, creating a potential race with readers in a shared repository. The lockfile code now handles this when creating the lockfile, making it atomic. - the legacy prefer_symlink_refs path did not do any locking at all. Admittedly, it is not atomic from a reader's perspective (as it unlinks and re-creates the symlink to overwrite), but at least it cannot conflict with other writers now. - the result of this patch is hopefully more readable. It eliminates three goto labels. Two were for error checking that is now simplified, and the third was to reach shared code that has been pulled into its own function. Signed-off-by: Jeff King <peff@peff.net> Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 06:57:01 +01:00
test_expect_success 'symbolic-ref does not create ref d/f conflicts' '
git checkout -b df &&
test_commit df &&
test_must_fail git symbolic-ref refs/heads/df/conflict refs/heads/df &&
git pack-refs --all --prune &&
test_must_fail git symbolic-ref refs/heads/df/conflict refs/heads/df
'
refs_resolve_ref_unsafe: handle d/f conflicts for writes If our call to refs_read_raw_ref() fails, we check errno to see if the ref is simply missing, or if we encountered a more serious error. If it's just missing, then in "write" mode (i.e., when RESOLVE_REFS_READING is not set), this is perfectly fine. However, checking for ENOENT isn't sufficient to catch all missing-ref cases. In the filesystem backend, we may also see EISDIR when we try to resolve "a" and "a/b" exists. Likewise, we may see ENOTDIR if we try to resolve "a/b" and "a" exists. In both of those cases, we know that our resolved ref doesn't exist, but we return an error (rather than reporting the refname and returning a null sha1). This has been broken for a long time, but nobody really noticed because the next step after resolving without the READING flag is usually to lock the ref and write it. But in both of those cases, the write will fail with the same errno due to the directory/file conflict. There are two cases where we can notice this, though: 1. If we try to write "a" and there's a leftover directory already at "a", even though there is no ref "a/b". The actual write is smart enough to move the empty "a" out of the way. This is reasonably rare, if only because the writing code has to do an independent resolution before trying its write (because the actual update_ref() code handles this case fine). The notes-merge code does this, and before the fix in the prior commit t3308 erroneously expected this case to fail. 2. When resolving symbolic refs, we typically do not use the READING flag because we want to resolve even symrefs that point to unborn refs. Even if those unborn refs could not actually be written because of d/f conflicts with existing refs. You can see this by asking "git symbolic-ref" to report the target of a symref pointing past a d/f conflict. We can fix the problem by recognizing the other "missing" errnos and treating them like ENOENT. This should be safe to do even for callers who are then going to actually write the ref, because the actual writing process will fail if the d/f conflict is a real one (and t1404 checks these cases). Arguably this should be the responsibility of the files-backend to normalize all "missing ref" errors into ENOENT (since something like EISDIR may not be meaningful at all to a database backend). However other callers of refs_read_raw_ref() may actually care about the distinction; putting this into resolve_ref() is the minimal fix for now. The new tests in t1401 use git-symbolic-ref, which is the most direct way to check the resolution by itself. Interestingly we actually had a test that setup this case already, but we only used it to verify that the funny state could be overwritten, not that it could be resolved. We also add a new test in t3200, as "branch -m" was the original motivation for looking into this. What happens is this: 0. HEAD is pointing to branch "a" 1. The user asks to rename "a" to "a/b". 2. We create "a/b" and delete "a". 3. We then try to update any worktree HEADs that point to the renamed ref (including the main repo HEAD). To do that, we have to resolve each HEAD. But now our HEAD is pointing at "a", and we get EISDIR due to the loose "a/b". As a result, we think there is no HEAD, and we do not update it. It now points to the bogus "a". Interestingly this case used to work, but only accidentally. Before 31824d180d (branch: fix branch renaming not updating HEADs correctly, 2017-08-24), we'd update any HEAD which we couldn't resolve. That was wrong, but it papered over the fact that we were incorrectly failing to resolve HEAD. So while the bug demonstrated by the git-symbolic-ref is quite old, the regression to "branch -m" is recent. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-06 16:42:17 +02:00
test_expect_success 'symbolic-ref can overwrite pointer to invalid name' '
test_when_finished reset_to_sane &&
lock_ref_sha1_basic: handle REF_NODEREF with invalid refs We sometimes call lock_ref_sha1_basic with REF_NODEREF to operate directly on a symbolic ref. This is used, for example, to move to a detached HEAD, or when updating the contents of HEAD via checkout or symbolic-ref. However, the first step of the function is to resolve the refname to get the "old" sha1, and we do so without telling resolve_ref_unsafe() that we are only interested in the symref. As a result, we may detect a problem there not with the symref itself, but with something it points to. The real-world example I found (and what is used in the test suite) is a HEAD pointing to a ref that cannot exist, because it would cause a directory/file conflict with other existing refs. This situation is somewhat broken, of course, as trying to _commit_ on that HEAD would fail. But it's not explicitly forbidden, and we should be able to move away from it. However, neither "git checkout" nor "git symbolic-ref" can do so. We try to take the lock on HEAD, which is pointing to a non-existent ref. We bail from resolve_ref_unsafe() with errno set to EISDIR, and the lock code thinks we are attempting to create a d/f conflict. Of course we're not. The problem is that the lock code has no idea what level we were at when we got EISDIR, so trying to diagnose or remove empty directories for HEAD is not useful. To make things even more complicated, we only get EISDIR in the loose-ref case. If the refs are packed, the resolution may "succeed", giving us the pointed-to ref in "refname", but a null oid. Later, we say "ah, the null oid means we are creating; let's make sure there is room for it", but mistakenly check against the _resolved_ refname, not the original. We can fix this by making two tweaks: 1. Call resolve_ref_unsafe() with RESOLVE_REF_NO_RECURSE when REF_NODEREF is set. This means any errors we get will be from the orig_refname, and we can act accordingly. We already do this in the REF_DELETING case, but we should do it for update, too. 2. If we do get a "refname" return from resolve_ref_unsafe(), even with RESOLVE_REF_NO_RECURSE it may be the name of the ref pointed-to by a symref. We already normalize this back to orig_refname before taking the lockfile, but we need to do so before the null_oid check. While we're rearranging the REF_NODEREF handling, we can also bump the initialization of lflags to the top of the function, where we are setting up other flags. This saves us from having yet another conditional block on REF_NODEREF just to set it later. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-12 22:45:09 +01:00
head=$(git rev-parse HEAD) &&
git symbolic-ref HEAD refs/heads/outer &&
refs_resolve_ref_unsafe: handle d/f conflicts for writes If our call to refs_read_raw_ref() fails, we check errno to see if the ref is simply missing, or if we encountered a more serious error. If it's just missing, then in "write" mode (i.e., when RESOLVE_REFS_READING is not set), this is perfectly fine. However, checking for ENOENT isn't sufficient to catch all missing-ref cases. In the filesystem backend, we may also see EISDIR when we try to resolve "a" and "a/b" exists. Likewise, we may see ENOTDIR if we try to resolve "a/b" and "a" exists. In both of those cases, we know that our resolved ref doesn't exist, but we return an error (rather than reporting the refname and returning a null sha1). This has been broken for a long time, but nobody really noticed because the next step after resolving without the READING flag is usually to lock the ref and write it. But in both of those cases, the write will fail with the same errno due to the directory/file conflict. There are two cases where we can notice this, though: 1. If we try to write "a" and there's a leftover directory already at "a", even though there is no ref "a/b". The actual write is smart enough to move the empty "a" out of the way. This is reasonably rare, if only because the writing code has to do an independent resolution before trying its write (because the actual update_ref() code handles this case fine). The notes-merge code does this, and before the fix in the prior commit t3308 erroneously expected this case to fail. 2. When resolving symbolic refs, we typically do not use the READING flag because we want to resolve even symrefs that point to unborn refs. Even if those unborn refs could not actually be written because of d/f conflicts with existing refs. You can see this by asking "git symbolic-ref" to report the target of a symref pointing past a d/f conflict. We can fix the problem by recognizing the other "missing" errnos and treating them like ENOENT. This should be safe to do even for callers who are then going to actually write the ref, because the actual writing process will fail if the d/f conflict is a real one (and t1404 checks these cases). Arguably this should be the responsibility of the files-backend to normalize all "missing ref" errors into ENOENT (since something like EISDIR may not be meaningful at all to a database backend). However other callers of refs_read_raw_ref() may actually care about the distinction; putting this into resolve_ref() is the minimal fix for now. The new tests in t1401 use git-symbolic-ref, which is the most direct way to check the resolution by itself. Interestingly we actually had a test that setup this case already, but we only used it to verify that the funny state could be overwritten, not that it could be resolved. We also add a new test in t3200, as "branch -m" was the original motivation for looking into this. What happens is this: 0. HEAD is pointing to branch "a" 1. The user asks to rename "a" to "a/b". 2. We create "a/b" and delete "a". 3. We then try to update any worktree HEADs that point to the renamed ref (including the main repo HEAD). To do that, we have to resolve each HEAD. But now our HEAD is pointing at "a", and we get EISDIR due to the loose "a/b". As a result, we think there is no HEAD, and we do not update it. It now points to the bogus "a". Interestingly this case used to work, but only accidentally. Before 31824d180d (branch: fix branch renaming not updating HEADs correctly, 2017-08-24), we'd update any HEAD which we couldn't resolve. That was wrong, but it papered over the fact that we were incorrectly failing to resolve HEAD. So while the bug demonstrated by the git-symbolic-ref is quite old, the regression to "branch -m" is recent. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-06 16:42:17 +02:00
test_when_finished "git update-ref -d refs/heads/outer/inner" &&
lock_ref_sha1_basic: handle REF_NODEREF with invalid refs We sometimes call lock_ref_sha1_basic with REF_NODEREF to operate directly on a symbolic ref. This is used, for example, to move to a detached HEAD, or when updating the contents of HEAD via checkout or symbolic-ref. However, the first step of the function is to resolve the refname to get the "old" sha1, and we do so without telling resolve_ref_unsafe() that we are only interested in the symref. As a result, we may detect a problem there not with the symref itself, but with something it points to. The real-world example I found (and what is used in the test suite) is a HEAD pointing to a ref that cannot exist, because it would cause a directory/file conflict with other existing refs. This situation is somewhat broken, of course, as trying to _commit_ on that HEAD would fail. But it's not explicitly forbidden, and we should be able to move away from it. However, neither "git checkout" nor "git symbolic-ref" can do so. We try to take the lock on HEAD, which is pointing to a non-existent ref. We bail from resolve_ref_unsafe() with errno set to EISDIR, and the lock code thinks we are attempting to create a d/f conflict. Of course we're not. The problem is that the lock code has no idea what level we were at when we got EISDIR, so trying to diagnose or remove empty directories for HEAD is not useful. To make things even more complicated, we only get EISDIR in the loose-ref case. If the refs are packed, the resolution may "succeed", giving us the pointed-to ref in "refname", but a null oid. Later, we say "ah, the null oid means we are creating; let's make sure there is room for it", but mistakenly check against the _resolved_ refname, not the original. We can fix this by making two tweaks: 1. Call resolve_ref_unsafe() with RESOLVE_REF_NO_RECURSE when REF_NODEREF is set. This means any errors we get will be from the orig_refname, and we can act accordingly. We already do this in the REF_DELETING case, but we should do it for update, too. 2. If we do get a "refname" return from resolve_ref_unsafe(), even with RESOLVE_REF_NO_RECURSE it may be the name of the ref pointed-to by a symref. We already normalize this back to orig_refname before taking the lockfile, but we need to do so before the null_oid check. While we're rearranging the REF_NODEREF handling, we can also bump the initialization of lflags to the top of the function, where we are setting up other flags. This saves us from having yet another conditional block on REF_NODEREF just to set it later. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-12 22:45:09 +01:00
git update-ref refs/heads/outer/inner $head &&
git symbolic-ref HEAD refs/heads/unrelated
'
refs_resolve_ref_unsafe: handle d/f conflicts for writes If our call to refs_read_raw_ref() fails, we check errno to see if the ref is simply missing, or if we encountered a more serious error. If it's just missing, then in "write" mode (i.e., when RESOLVE_REFS_READING is not set), this is perfectly fine. However, checking for ENOENT isn't sufficient to catch all missing-ref cases. In the filesystem backend, we may also see EISDIR when we try to resolve "a" and "a/b" exists. Likewise, we may see ENOTDIR if we try to resolve "a/b" and "a" exists. In both of those cases, we know that our resolved ref doesn't exist, but we return an error (rather than reporting the refname and returning a null sha1). This has been broken for a long time, but nobody really noticed because the next step after resolving without the READING flag is usually to lock the ref and write it. But in both of those cases, the write will fail with the same errno due to the directory/file conflict. There are two cases where we can notice this, though: 1. If we try to write "a" and there's a leftover directory already at "a", even though there is no ref "a/b". The actual write is smart enough to move the empty "a" out of the way. This is reasonably rare, if only because the writing code has to do an independent resolution before trying its write (because the actual update_ref() code handles this case fine). The notes-merge code does this, and before the fix in the prior commit t3308 erroneously expected this case to fail. 2. When resolving symbolic refs, we typically do not use the READING flag because we want to resolve even symrefs that point to unborn refs. Even if those unborn refs could not actually be written because of d/f conflicts with existing refs. You can see this by asking "git symbolic-ref" to report the target of a symref pointing past a d/f conflict. We can fix the problem by recognizing the other "missing" errnos and treating them like ENOENT. This should be safe to do even for callers who are then going to actually write the ref, because the actual writing process will fail if the d/f conflict is a real one (and t1404 checks these cases). Arguably this should be the responsibility of the files-backend to normalize all "missing ref" errors into ENOENT (since something like EISDIR may not be meaningful at all to a database backend). However other callers of refs_read_raw_ref() may actually care about the distinction; putting this into resolve_ref() is the minimal fix for now. The new tests in t1401 use git-symbolic-ref, which is the most direct way to check the resolution by itself. Interestingly we actually had a test that setup this case already, but we only used it to verify that the funny state could be overwritten, not that it could be resolved. We also add a new test in t3200, as "branch -m" was the original motivation for looking into this. What happens is this: 0. HEAD is pointing to branch "a" 1. The user asks to rename "a" to "a/b". 2. We create "a/b" and delete "a". 3. We then try to update any worktree HEADs that point to the renamed ref (including the main repo HEAD). To do that, we have to resolve each HEAD. But now our HEAD is pointing at "a", and we get EISDIR due to the loose "a/b". As a result, we think there is no HEAD, and we do not update it. It now points to the bogus "a". Interestingly this case used to work, but only accidentally. Before 31824d180d (branch: fix branch renaming not updating HEADs correctly, 2017-08-24), we'd update any HEAD which we couldn't resolve. That was wrong, but it papered over the fact that we were incorrectly failing to resolve HEAD. So while the bug demonstrated by the git-symbolic-ref is quite old, the regression to "branch -m" is recent. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-06 16:42:17 +02:00
test_expect_success 'symbolic-ref can resolve d/f name (EISDIR)' '
test_when_finished reset_to_sane &&
head=$(git rev-parse HEAD) &&
git symbolic-ref HEAD refs/heads/outer/inner &&
test_when_finished "git update-ref -d refs/heads/outer" &&
git update-ref refs/heads/outer $head &&
echo refs/heads/outer/inner >expect &&
git symbolic-ref HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'symbolic-ref can resolve d/f name (ENOTDIR)' '
test_when_finished reset_to_sane &&
head=$(git rev-parse HEAD) &&
git symbolic-ref HEAD refs/heads/outer &&
test_when_finished "git update-ref -d refs/heads/outer/inner" &&
git update-ref refs/heads/outer/inner $head &&
echo refs/heads/outer >expect &&
git symbolic-ref HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'symbolic-ref refuses invalid target for non-HEAD' '
test_must_fail git symbolic-ref refs/heads/invalid foo..bar
'
test_expect_success 'symbolic-ref allows top-level target for non-HEAD' '
git symbolic-ref refs/heads/top-level FETCH_HEAD &&
git update-ref FETCH_HEAD HEAD &&
test_cmp_rev top-level HEAD
'
symbolic-ref: teach "--[no-]recurse" option Suppose you are managing many maintenance tracks in your project, and some of the more recent ones are maint-2.36 and maint-2.37. Further imagine that your project recently tagged the official 2.38 release, which means you would need to start maint-2.38 track soon, by doing: $ git checkout -b maint-2.38 v2.38.0^0 $ git branch --list 'maint-2.3[6-9]' * maint-2.38 maint-2.36 maint-2.37 So far, so good. But it also is reasonable to want not to have to worry about which maintenance track is the latest, by pointing a more generic-sounding 'maint' branch at it, by doing: $ git symbolic-ref refs/heads/maint refs/heads/maint-2.38 which would allow you to say "whichever it is, check out the latest maintenance track", by doing: $ git checkout maint $ git branch --show-current maint-2.38 It is arguably better to say that we are on 'maint-2.38' rather than on 'maint', and "git merge/pull" would record "into maint-2.38" and not "into maint", so I think what we have is a good behaviour. One thing that is slightly irritating, however, is that I do not think there is a good way (other than "cat .git/HEAD") to learn that you checked out 'maint' to get into that state. Just like the output of "git branch --show-current" shows above, "git symbolic-ref HEAD" would report 'refs/heads/maint-2.38', bypassing the intermediate symbolic ref at 'refs/heads/maint' that is pointed at by HEAD. The internal resolve_ref() API already has the necessary support for stopping after resolving a single level of a symbolic-ref, and we can expose it by adding a "--[no-]recurse" option to the command. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-08 00:00:39 +02:00
test_expect_success 'symbolic-ref pointing at another' '
git update-ref refs/heads/maint-2.37 HEAD &&
git symbolic-ref refs/heads/maint refs/heads/maint-2.37 &&
git checkout maint &&
git symbolic-ref HEAD >actual &&
echo refs/heads/maint-2.37 >expect &&
test_cmp expect actual &&
git symbolic-ref --no-recurse HEAD >actual &&
echo refs/heads/maint >expect &&
test_cmp expect actual
'
shorten_unambiguous_ref(): avoid sscanf() To shorten a fully qualified ref (e.g., taking "refs/heads/foo" to just "foo"), we munge the usual lookup rules ("refs/heads/%.*s", etc) to drop the ".*" modifier (so "refs/heads/%s"), and then use sscanf() to match that against the refname, pulling the "%s" content into a separate buffer. This has a few downsides: - sscanf("%s") reportedly misbehaves on macOS with some input and locale combinations, returning a partial or garbled string. See this thread: https://lore.kernel.org/git/CAGF3oAcCi+fG12j-1U0hcrWwkF5K_9WhOi6ZPHBzUUzfkrZDxA@mail.gmail.com/ - scanf's matching of "%s" is greedy. So the "refs/remotes/%s/HEAD" rule would never pull "origin" out of "refs/remotes/origin/HEAD". Instead it always produced "origin/HEAD", which is redundant with the "refs/remotes/%s" rule. - scanf in general is an error-prone interface. For example, scanning for "%s" will copy bytes into a destination string, which must have been correctly sized ahead of time to avoid a buffer overflow. In this case, the code is OK (the buffer is pessimistically sized to match the original string, which should give us a maximum). But in general, we do not want to encourage people to use scanf at all. So instead, let's note that our lookup rules are not arbitrary format strings, but all contain exactly one "%.*s" placeholder. We already rely on this, both for lookup (we feed the lookup format along with exactly one int/ptr combo to snprintf, etc) and for shortening (we munge "%.*s" to "%s", and then insist that sscanf() finds exactly one result). We can parse this manually by just matching the bytes that occur before and after the "%.*s" placeholder. While we have a few extra lines of parsing code, the result is arguably simpler, as can skip the preprocessing step and its tricky memory management entirely. The in-code comments should explain the parsing strategy, but there's one subtle change here. The original code allocated a single buffer, and then overwrote it in each loop iteration, since that's the only option sscanf() gives us. But our parser can actually return a ptr/len combo for the matched string, which is all we need (since we just feed it back to the lookup rules with "%.*s"), and then copy it only when returning to the caller. There are a few new tests here, all using symbolic-ref (the code can be triggered in many ways, but symrefs are convenient in that we don't need to create a real ref, which avoids any complications from the filesystem munging the name): - the first covers the real-world case which misbehaved on macOS. Setting LC_ALL is required to trigger the problem there (since otherwise our tests use LC_ALL=C), and hopefully is at worst simply ignored on other systems (and doesn't cause libc to complain, etc, on systems without that locale). - the second covers the "origin/HEAD" case as discussed above, which is now fixed - the remainder are for "weird" cases that work both before and after this patch, but would be easy to get wrong with off-by-one problems in the parsing (and came out of discussions and earlier iterations of the patch that did get them wrong). - absent here are tests of boring, expected-to-work cases like "refs/heads/foo", etc. Those are covered all over the test suite both explicitly (for-each-ref's refname:short) and implicitly (in the output of git-status, etc). Reported-by: 孟子易 <mengziyi540841@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-15 16:16:21 +01:00
test_expect_success 'symbolic-ref --short handles complex utf8 case' '
name="测试-加-增加-加-增加" &&
git symbolic-ref TEST_SYMREF "refs/heads/$name" &&
# In the real world, we saw problems with this case only
# when the locale includes UTF-8. Set it here to try to make things as
# hard as possible for us to pass, but in practice we should do the
# right thing regardless (and of course some platforms may not even
# have this locale).
LC_ALL=en_US.UTF-8 git symbolic-ref --short TEST_SYMREF >actual &&
echo "$name" >expect &&
test_cmp expect actual
'
test_expect_success 'symbolic-ref --short handles name with suffix' '
git symbolic-ref TEST_SYMREF "refs/remotes/origin/HEAD" &&
git symbolic-ref --short TEST_SYMREF >actual &&
echo "origin" >expect &&
test_cmp expect actual
'
test_expect_success 'symbolic-ref --short handles almost-matching name' '
git symbolic-ref TEST_SYMREF "refs/headsXfoo" &&
git symbolic-ref --short TEST_SYMREF >actual &&
echo "headsXfoo" >expect &&
test_cmp expect actual
'
test_expect_success 'symbolic-ref --short handles name with percent' '
git symbolic-ref TEST_SYMREF "refs/heads/%foo" &&
git symbolic-ref --short TEST_SYMREF >actual &&
echo "%foo" >expect &&
test_cmp expect actual
'
test_done