2005-06-06 22:31:29 +02:00
|
|
|
#include "cache.h"
|
2014-10-01 12:28:42 +02:00
|
|
|
#include "lockfile.h"
|
2006-12-19 23:34:12 +01:00
|
|
|
#include "refs.h"
|
2006-11-19 22:22:44 +01:00
|
|
|
#include "object.h"
|
|
|
|
#include "tag.h"
|
2007-09-28 17:28:54 +02:00
|
|
|
#include "dir.h"
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 01:08:30 +01:00
|
|
|
#include "string-list.h"
|
2005-06-06 22:31:29 +02:00
|
|
|
|
2014-12-12 09:57:02 +01:00
|
|
|
struct ref_lock {
|
|
|
|
char *ref_name;
|
|
|
|
char *orig_ref_name;
|
|
|
|
struct lock_file *lk;
|
2015-05-25 20:39:22 +02:00
|
|
|
struct object_id old_oid;
|
2014-12-12 09:57:02 +01:00
|
|
|
};
|
|
|
|
|
2012-04-10 07:30:13 +02:00
|
|
|
/*
|
2014-06-04 05:38:10 +02:00
|
|
|
* How to handle various characters in refnames:
|
|
|
|
* 0: An acceptable character for refs
|
2014-07-28 19:41:53 +02:00
|
|
|
* 1: End-of-component
|
|
|
|
* 2: ., look for a preceding . to reject .. in refs
|
|
|
|
* 3: {, look for a preceding @ to reject @{ in refs
|
|
|
|
* 4: A bad character: ASCII control characters, "~", "^", ":" or SP
|
2014-06-04 05:38:10 +02:00
|
|
|
*/
|
|
|
|
static unsigned char refname_disposition[256] = {
|
2014-07-28 19:41:53 +02:00
|
|
|
1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
|
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4,
|
2014-06-04 05:38:10 +02:00
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
2014-07-28 19:41:53 +02:00
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
|
2014-06-04 05:38:10 +02:00
|
|
|
};
|
|
|
|
|
2015-02-12 12:12:12 +01:00
|
|
|
/*
|
|
|
|
* Flag passed to lock_ref_sha1_basic() telling it to tolerate broken
|
|
|
|
* refs (i.e., because the reference is about to be deleted anyway).
|
|
|
|
*/
|
|
|
|
#define REF_DELETING 0x02
|
|
|
|
|
2014-04-30 18:03:36 +02:00
|
|
|
/*
|
2015-02-17 18:00:14 +01:00
|
|
|
* Used as a flag in ref_update::flags when a loose ref is being
|
2014-04-30 18:03:36 +02:00
|
|
|
* pruned.
|
|
|
|
*/
|
2015-02-12 12:12:13 +01:00
|
|
|
#define REF_ISPRUNING 0x04
|
|
|
|
|
2015-02-17 18:00:21 +01:00
|
|
|
/*
|
|
|
|
* Used as a flag in ref_update::flags when the reference should be
|
|
|
|
* updated to new_sha1.
|
|
|
|
*/
|
|
|
|
#define REF_HAVE_NEW 0x08
|
|
|
|
|
2015-02-17 18:00:14 +01:00
|
|
|
/*
|
|
|
|
* Used as a flag in ref_update::flags when old_sha1 should be
|
|
|
|
* checked.
|
|
|
|
*/
|
2015-02-17 18:00:21 +01:00
|
|
|
#define REF_HAVE_OLD 0x10
|
2015-02-17 18:00:14 +01:00
|
|
|
|
ref_transaction_commit(): fix atomicity and avoid fd exhaustion
The old code was roughly
for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This has two problems.
Non-atomic updates
==================
The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:
* It verifies that new_sha1 is a valid object
* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).
If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.
So these checks have to be moved to the first loop.
File descriptor exhaustion
==========================
The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.
The solution
============
After this patch, the code looks like
for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This fixes both problems:
1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.
2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.
To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)
This change fixes two tests in t1400.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-24 13:35:49 +02:00
|
|
|
/*
|
|
|
|
* Used as a flag in ref_update::flags when the lockfile needs to be
|
|
|
|
* committed.
|
|
|
|
*/
|
|
|
|
#define REF_NEEDS_COMMIT 0x20
|
|
|
|
|
2014-06-04 05:38:10 +02:00
|
|
|
/*
|
|
|
|
* Try to read one refname component from the front of refname.
|
|
|
|
* Return the length of the component found, or -1 if the component is
|
|
|
|
* not legal. It is legal if it is something reasonable to have under
|
|
|
|
* ".git/refs/"; We do not like it if:
|
2012-04-10 07:30:13 +02:00
|
|
|
*
|
|
|
|
* - any path component of it begins with ".", or
|
|
|
|
* - it has double dots "..", or
|
|
|
|
* - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
|
2014-07-28 19:41:53 +02:00
|
|
|
* - it ends with a "/".
|
|
|
|
* - it ends with ".lock"
|
2012-04-10 07:30:13 +02:00
|
|
|
* - it contains a "\" (backslash)
|
|
|
|
*/
|
|
|
|
static int check_refname_component(const char *refname, int flags)
|
|
|
|
{
|
|
|
|
const char *cp;
|
|
|
|
char last = '\0';
|
|
|
|
|
|
|
|
for (cp = refname; ; cp++) {
|
2014-06-04 05:38:10 +02:00
|
|
|
int ch = *cp & 255;
|
|
|
|
unsigned char disp = refname_disposition[ch];
|
|
|
|
switch (disp) {
|
2014-07-28 19:41:53 +02:00
|
|
|
case 1:
|
2014-06-04 05:38:10 +02:00
|
|
|
goto out;
|
2014-07-28 19:41:53 +02:00
|
|
|
case 2:
|
2014-06-04 05:38:10 +02:00
|
|
|
if (last == '.')
|
|
|
|
return -1; /* Refname contains "..". */
|
|
|
|
break;
|
2014-07-28 19:41:53 +02:00
|
|
|
case 3:
|
2014-06-04 05:38:10 +02:00
|
|
|
if (last == '@')
|
|
|
|
return -1; /* Refname contains "@{". */
|
2012-04-10 07:30:13 +02:00
|
|
|
break;
|
2014-07-28 19:41:53 +02:00
|
|
|
case 4:
|
2014-06-04 05:38:10 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2012-04-10 07:30:13 +02:00
|
|
|
last = ch;
|
|
|
|
}
|
2014-06-04 05:38:10 +02:00
|
|
|
out:
|
2012-04-10 07:30:13 +02:00
|
|
|
if (cp == refname)
|
2012-04-10 07:30:22 +02:00
|
|
|
return 0; /* Component has zero length. */
|
2014-09-26 21:22:22 +02:00
|
|
|
if (refname[0] == '.')
|
|
|
|
return -1; /* Component starts with '.'. */
|
2014-10-01 12:28:15 +02:00
|
|
|
if (cp - refname >= LOCK_SUFFIX_LEN &&
|
|
|
|
!memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN))
|
2012-04-10 07:30:13 +02:00
|
|
|
return -1; /* Refname ends with ".lock". */
|
|
|
|
return cp - refname;
|
|
|
|
}
|
|
|
|
|
2014-07-28 19:41:53 +02:00
|
|
|
int check_refname_format(const char *refname, int flags)
|
2012-04-10 07:30:13 +02:00
|
|
|
{
|
|
|
|
int component_len, component_count = 0;
|
|
|
|
|
Add new @ shortcut for HEAD
Typing 'HEAD' is tedious, especially when we can use '@' instead.
The reason for choosing '@' is that it follows naturally from the
ref@op syntax (e.g. HEAD@{u}), except we have no ref, and no
operation, and when we don't have those, it makes sens to assume
'HEAD'.
So now we can use 'git show @~1', and all that goody goodness.
Until now '@' was a valid name, but it conflicts with this idea, so
let's make it invalid. Probably very few people, if any, used this name.
Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-02 08:34:30 +02:00
|
|
|
if (!strcmp(refname, "@"))
|
|
|
|
/* Refname is a single character '@'. */
|
|
|
|
return -1;
|
|
|
|
|
2012-04-10 07:30:13 +02:00
|
|
|
while (1) {
|
|
|
|
/* We are at the start of a path component. */
|
|
|
|
component_len = check_refname_component(refname, flags);
|
2012-04-10 07:30:22 +02:00
|
|
|
if (component_len <= 0) {
|
2012-04-10 07:30:13 +02:00
|
|
|
if ((flags & REFNAME_REFSPEC_PATTERN) &&
|
|
|
|
refname[0] == '*' &&
|
|
|
|
(refname[1] == '\0' || refname[1] == '/')) {
|
|
|
|
/* Accept one wildcard as a full refname component. */
|
|
|
|
flags &= ~REFNAME_REFSPEC_PATTERN;
|
|
|
|
component_len = 1;
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
component_count++;
|
|
|
|
if (refname[component_len] == '\0')
|
|
|
|
break;
|
|
|
|
/* Skip to next component. */
|
|
|
|
refname += component_len + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (refname[component_len - 1] == '.')
|
|
|
|
return -1; /* Refname ends with '.'. */
|
|
|
|
if (!(flags & REFNAME_ALLOW_ONELEVEL) && component_count < 2)
|
|
|
|
return -1; /* Refname has only one component. */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ref_entry;
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
|
2012-04-27 00:27:07 +02:00
|
|
|
/*
|
|
|
|
* Information used (along with the information in ref_entry) to
|
|
|
|
* describe a single cached reference. This data structure only
|
|
|
|
* occurs embedded in a union in struct ref_entry, and only when
|
|
|
|
* (ref_entry->flag & REF_DIR) is zero.
|
|
|
|
*/
|
2012-04-10 07:30:23 +02:00
|
|
|
struct ref_value {
|
2013-04-14 14:54:17 +02:00
|
|
|
/*
|
|
|
|
* The name of the object to which this reference resolves
|
|
|
|
* (which may be a tag object). If REF_ISBROKEN, this is
|
|
|
|
* null. If REF_ISSYMREF, then this is the name of the object
|
|
|
|
* referred to by the last reference in the symlink chain.
|
|
|
|
*/
|
2015-05-25 20:38:27 +02:00
|
|
|
struct object_id oid;
|
2013-04-14 14:54:17 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If REF_KNOWS_PEELED, then this field holds the peeled value
|
|
|
|
* of this reference, or null if the reference is known not to
|
2013-04-22 21:52:21 +02:00
|
|
|
* be peelable. See the documentation for peel_ref() for an
|
|
|
|
* exact definition of "peelable".
|
2013-04-14 14:54:17 +02:00
|
|
|
*/
|
2015-05-25 20:38:27 +02:00
|
|
|
struct object_id peeled;
|
2012-04-10 07:30:23 +02:00
|
|
|
};
|
|
|
|
|
2012-04-27 00:27:05 +02:00
|
|
|
struct ref_cache;
|
|
|
|
|
2012-04-27 00:27:07 +02:00
|
|
|
/*
|
|
|
|
* Information used (along with the information in ref_entry) to
|
|
|
|
* describe a level in the hierarchy of references. This data
|
|
|
|
* structure only occurs embedded in a union in struct ref_entry, and
|
|
|
|
* only when (ref_entry.flag & REF_DIR) is set. In that case,
|
|
|
|
* (ref_entry.flag & REF_INCOMPLETE) determines whether the references
|
|
|
|
* in the directory have already been read:
|
|
|
|
*
|
|
|
|
* (ref_entry.flag & REF_INCOMPLETE) unset -- a directory of loose
|
|
|
|
* or packed references, already read.
|
|
|
|
*
|
|
|
|
* (ref_entry.flag & REF_INCOMPLETE) set -- a directory of loose
|
|
|
|
* references that hasn't been read yet (nor has any of its
|
|
|
|
* subdirectories).
|
|
|
|
*
|
|
|
|
* Entries within a directory are stored within a growable array of
|
|
|
|
* pointers to ref_entries (entries, nr, alloc). Entries 0 <= i <
|
|
|
|
* sorted are sorted by their component name in strcmp() order and the
|
|
|
|
* remaining entries are unsorted.
|
|
|
|
*
|
|
|
|
* Loose references are read lazily, one directory at a time. When a
|
|
|
|
* directory of loose references is read, then all of the references
|
|
|
|
* in that directory are stored, and REF_INCOMPLETE stubs are created
|
|
|
|
* for any subdirectories, but the subdirectories themselves are not
|
|
|
|
* read. The reading is triggered by get_ref_dir().
|
|
|
|
*/
|
2012-04-10 07:30:24 +02:00
|
|
|
struct ref_dir {
|
2011-09-30 00:11:42 +02:00
|
|
|
int nr, alloc;
|
ref_array: keep track of whether references are sorted
Keep track of how many entries at the beginning of a ref_array are already
sorted. In sort_ref_array(), return early if the the array is already
sorted (i.e., if no new references has been appended to the end of the
list since the last call to sort_ref_array()).
Sort ref_arrays only when needed, namely in search_ref_array() and in
do_for_each_ref(). However, never call sort_ref_array() on the
extra_refs, because extra_refs can contain multiple entries with the same
name and because sort_ref_array() not only sorts, but de-dups its
contents.
This change is currently not useful, because entries are not added to
ref_arrays after they are created. But in a moment they will be...
Implementation note: we could store a binary "sorted" value instead of
an integer, but storing the number of sorted entries leaves the way
open for a couple of possible future optimizations:
* In sort_ref_array(), sort *only* the unsorted entries, then merge
them with the sorted entries. This should be faster if most of the
entries are already sorted.
* Teach search_ref_array() to do a binary search of any sorted
entries, and if unsuccessful do a linear search of any unsorted
entries. This would avoid the need to sort the list every time that
search_ref_array() is called, and (given some intelligence about how
often to sort) could significantly improve the speed in certain
hypothetical usage patterns.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-17 06:50:32 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Entries with index 0 <= i < sorted are sorted by name. New
|
|
|
|
* entries are appended to the list unsorted, and are sorted
|
|
|
|
* only when required; thus we avoid the need to sort the list
|
|
|
|
* after the addition of every reference.
|
|
|
|
*/
|
|
|
|
int sorted;
|
|
|
|
|
2012-04-27 00:27:05 +02:00
|
|
|
/* A pointer to the ref_cache that contains this ref_dir. */
|
|
|
|
struct ref_cache *ref_cache;
|
|
|
|
|
2012-04-10 07:30:24 +02:00
|
|
|
struct ref_entry **entries;
|
2011-09-30 00:11:42 +02:00
|
|
|
};
|
|
|
|
|
2013-04-14 14:54:16 +02:00
|
|
|
/*
|
|
|
|
* Bit values for ref_entry::flag. REF_ISSYMREF=0x01,
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
* REF_ISPACKED=0x02, REF_ISBROKEN=0x04 and REF_BAD_NAME=0x08 are
|
|
|
|
* public values; see refs.h.
|
2013-04-14 14:54:16 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The field ref_entry->u.value.peeled of this value entry contains
|
|
|
|
* the correct peeled value for the reference, which might be
|
|
|
|
* null_sha1 if the reference is not a tag or if it is broken.
|
|
|
|
*/
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
#define REF_KNOWS_PEELED 0x10
|
2012-04-27 00:27:07 +02:00
|
|
|
|
|
|
|
/* ref_entry represents a directory of references */
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
#define REF_DIR 0x20
|
2006-11-19 22:22:44 +01:00
|
|
|
|
2012-04-27 00:27:07 +02:00
|
|
|
/*
|
|
|
|
* Entry has not yet been read from disk (used only for REF_DIR
|
|
|
|
* entries representing loose references)
|
|
|
|
*/
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
#define REF_INCOMPLETE 0x40
|
2012-04-27 00:27:07 +02:00
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
/*
|
|
|
|
* A ref_entry represents either a reference or a "subdirectory" of
|
2012-04-27 00:27:07 +02:00
|
|
|
* references.
|
|
|
|
*
|
|
|
|
* Each directory in the reference namespace is represented by a
|
|
|
|
* ref_entry with (flags & REF_DIR) set and containing a subdir member
|
|
|
|
* that holds the entries in that directory that have been read so
|
|
|
|
* far. If (flags & REF_INCOMPLETE) is set, then the directory and
|
|
|
|
* its subdirectories haven't been read yet. REF_INCOMPLETE is only
|
|
|
|
* used for loose reference directories.
|
|
|
|
*
|
|
|
|
* References are represented by a ref_entry with (flags & REF_DIR)
|
|
|
|
* unset and a value member that describes the reference's value. The
|
|
|
|
* flag member is at the ref_entry level, but it is also needed to
|
|
|
|
* interpret the contents of the value field (in other words, a
|
|
|
|
* ref_value object is not very much use without the enclosing
|
|
|
|
* ref_entry).
|
2012-04-10 07:30:26 +02:00
|
|
|
*
|
|
|
|
* Reference names cannot end with slash and directories' names are
|
|
|
|
* always stored with a trailing slash (except for the top-level
|
|
|
|
* directory, which is always denoted by ""). This has two nice
|
|
|
|
* consequences: (1) when the entries in each subdir are sorted
|
|
|
|
* lexicographically by name (as they usually are), the references in
|
|
|
|
* a whole tree can be generated in lexicographic order by traversing
|
|
|
|
* the tree in left-to-right, depth-first order; (2) the names of
|
|
|
|
* references and subdirectories cannot conflict, and therefore the
|
|
|
|
* presence of an empty subdirectory does not block the creation of a
|
|
|
|
* similarly-named reference. (The fact that reference names with the
|
|
|
|
* same leading components can conflict *with each other* is a
|
2015-05-11 17:25:13 +02:00
|
|
|
* separate issue that is regulated by verify_refname_available().)
|
2012-04-10 07:30:26 +02:00
|
|
|
*
|
|
|
|
* Please note that the name field contains the fully-qualified
|
|
|
|
* reference (or subdirectory) name. Space could be saved by only
|
|
|
|
* storing the relative names. But that would require the full names
|
|
|
|
* to be generated on the fly when iterating in do_for_each_ref(), and
|
|
|
|
* would break callback functions, who have always been able to assume
|
|
|
|
* that the name strings that they are passed will not be freed during
|
|
|
|
* the iteration.
|
|
|
|
*/
|
2012-04-10 07:30:13 +02:00
|
|
|
struct ref_entry {
|
|
|
|
unsigned char flag; /* ISSYMREF? ISPACKED? */
|
2012-04-10 07:30:23 +02:00
|
|
|
union {
|
2012-04-10 07:30:26 +02:00
|
|
|
struct ref_value value; /* if not (flags&REF_DIR) */
|
|
|
|
struct ref_dir subdir; /* if (flags&REF_DIR) */
|
2012-04-10 07:30:23 +02:00
|
|
|
} u;
|
2012-04-10 07:30:26 +02:00
|
|
|
/*
|
|
|
|
* The full name of the reference (e.g., "refs/heads/master")
|
|
|
|
* or the full name of the directory with a trailing slash
|
|
|
|
* (e.g., "refs/heads/"):
|
|
|
|
*/
|
2012-04-10 07:30:13 +02:00
|
|
|
char name[FLEX_ARRAY];
|
|
|
|
};
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
|
2012-04-27 00:27:07 +02:00
|
|
|
static void read_loose_refs(const char *dirname, struct ref_dir *dir);
|
|
|
|
|
2012-04-27 00:27:03 +02:00
|
|
|
static struct ref_dir *get_ref_dir(struct ref_entry *entry)
|
|
|
|
{
|
2012-04-27 00:27:07 +02:00
|
|
|
struct ref_dir *dir;
|
2012-04-27 00:27:03 +02:00
|
|
|
assert(entry->flag & REF_DIR);
|
2012-04-27 00:27:07 +02:00
|
|
|
dir = &entry->u.subdir;
|
|
|
|
if (entry->flag & REF_INCOMPLETE) {
|
|
|
|
read_loose_refs(entry->name, dir);
|
|
|
|
entry->flag &= ~REF_INCOMPLETE;
|
|
|
|
}
|
|
|
|
return dir;
|
2012-04-27 00:27:03 +02:00
|
|
|
}
|
|
|
|
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
/*
|
|
|
|
* Check if a refname is safe.
|
|
|
|
* For refs that start with "refs/" we consider it safe as long they do
|
|
|
|
* not try to resolve to outside of refs/.
|
|
|
|
*
|
|
|
|
* For all other refs we only consider them safe iff they only contain
|
|
|
|
* upper case characters and '_' (like "HEAD" AND "MERGE_HEAD", and not like
|
|
|
|
* "config").
|
|
|
|
*/
|
|
|
|
static int refname_is_safe(const char *refname)
|
|
|
|
{
|
|
|
|
if (starts_with(refname, "refs/")) {
|
|
|
|
char *buf;
|
|
|
|
int result;
|
|
|
|
|
|
|
|
buf = xmalloc(strlen(refname) + 1);
|
|
|
|
/*
|
|
|
|
* Does the refname try to escape refs/?
|
|
|
|
* For example: refs/foo/../bar is safe but refs/foo/../../bar
|
|
|
|
* is not.
|
|
|
|
*/
|
|
|
|
result = !normalize_path_copy(buf, refname + strlen("refs/"));
|
|
|
|
free(buf);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
while (*refname) {
|
|
|
|
if (!isupper(*refname) && *refname != '_')
|
|
|
|
return 0;
|
|
|
|
refname++;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2011-12-12 06:38:22 +01:00
|
|
|
static struct ref_entry *create_ref_entry(const char *refname,
|
|
|
|
const unsigned char *sha1, int flag,
|
|
|
|
int check_name)
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
{
|
|
|
|
int len;
|
2011-12-12 06:38:22 +01:00
|
|
|
struct ref_entry *ref;
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
|
2011-11-17 01:54:32 +01:00
|
|
|
if (check_name &&
|
2014-09-26 21:22:22 +02:00
|
|
|
check_refname_format(refname, REFNAME_ALLOW_ONELEVEL))
|
2011-12-12 06:38:09 +01:00
|
|
|
die("Reference has invalid format: '%s'", refname);
|
2011-12-12 06:38:22 +01:00
|
|
|
len = strlen(refname) + 1;
|
|
|
|
ref = xmalloc(sizeof(struct ref_entry) + len);
|
2015-05-25 20:38:27 +02:00
|
|
|
hashcpy(ref->u.value.oid.hash, sha1);
|
|
|
|
oidclr(&ref->u.value.peeled);
|
2011-12-12 06:38:22 +01:00
|
|
|
memcpy(ref->name, refname, len);
|
|
|
|
ref->flag = flag;
|
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
static void clear_ref_dir(struct ref_dir *dir);
|
|
|
|
|
2012-04-10 07:30:21 +02:00
|
|
|
static void free_ref_entry(struct ref_entry *entry)
|
|
|
|
{
|
2012-05-20 08:49:32 +02:00
|
|
|
if (entry->flag & REF_DIR) {
|
|
|
|
/*
|
|
|
|
* Do not use get_ref_dir() here, as that might
|
|
|
|
* trigger the reading of loose refs.
|
|
|
|
*/
|
|
|
|
clear_ref_dir(&entry->u.subdir);
|
|
|
|
}
|
2012-04-10 07:30:21 +02:00
|
|
|
free(entry);
|
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
/*
|
|
|
|
* Add a ref_entry to the end of dir (unsorted). Entry is always
|
|
|
|
* stored directly in dir; no recursion into subdirectories is
|
|
|
|
* done.
|
|
|
|
*/
|
|
|
|
static void add_entry_to_dir(struct ref_dir *dir, struct ref_entry *entry)
|
2011-12-12 06:38:22 +01:00
|
|
|
{
|
2012-04-10 07:30:26 +02:00
|
|
|
ALLOC_GROW(dir->entries, dir->nr + 1, dir->alloc);
|
|
|
|
dir->entries[dir->nr++] = entry;
|
2012-05-24 14:16:50 +02:00
|
|
|
/* optimize for the case that entries are added in order */
|
|
|
|
if (dir->nr == 1 ||
|
|
|
|
(dir->nr == dir->sorted + 1 &&
|
|
|
|
strcmp(dir->entries[dir->nr - 2]->name,
|
|
|
|
dir->entries[dir->nr - 1]->name) < 0))
|
|
|
|
dir->sorted = dir->nr;
|
2007-04-17 03:42:50 +02:00
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
/*
|
|
|
|
* Clear and free all entries in dir, recursively.
|
|
|
|
*/
|
2012-04-10 07:30:24 +02:00
|
|
|
static void clear_ref_dir(struct ref_dir *dir)
|
2012-04-10 07:30:13 +02:00
|
|
|
{
|
|
|
|
int i;
|
2012-04-10 07:30:24 +02:00
|
|
|
for (i = 0; i < dir->nr; i++)
|
|
|
|
free_ref_entry(dir->entries[i]);
|
|
|
|
free(dir->entries);
|
|
|
|
dir->sorted = dir->nr = dir->alloc = 0;
|
|
|
|
dir->entries = NULL;
|
2012-04-10 07:30:13 +02:00
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
/*
|
|
|
|
* Create a struct ref_entry object for the specified dirname.
|
|
|
|
* dirname is the name of the directory with a trailing slash (e.g.,
|
|
|
|
* "refs/heads/") or "" for the top-level directory.
|
|
|
|
*/
|
2012-04-27 00:27:05 +02:00
|
|
|
static struct ref_entry *create_dir_entry(struct ref_cache *ref_cache,
|
2012-05-22 20:50:52 +02:00
|
|
|
const char *dirname, size_t len,
|
|
|
|
int incomplete)
|
2012-04-10 07:30:26 +02:00
|
|
|
{
|
|
|
|
struct ref_entry *direntry;
|
|
|
|
direntry = xcalloc(1, sizeof(struct ref_entry) + len + 1);
|
2012-05-22 20:50:52 +02:00
|
|
|
memcpy(direntry->name, dirname, len);
|
|
|
|
direntry->name[len] = '\0';
|
2012-04-27 00:27:05 +02:00
|
|
|
direntry->u.subdir.ref_cache = ref_cache;
|
2012-04-27 00:27:07 +02:00
|
|
|
direntry->flag = REF_DIR | (incomplete ? REF_INCOMPLETE : 0);
|
2012-04-10 07:30:26 +02:00
|
|
|
return direntry;
|
|
|
|
}
|
|
|
|
|
2011-09-30 00:11:42 +02:00
|
|
|
static int ref_entry_cmp(const void *a, const void *b)
|
2007-04-17 03:42:50 +02:00
|
|
|
{
|
2011-09-30 00:11:42 +02:00
|
|
|
struct ref_entry *one = *(struct ref_entry **)a;
|
|
|
|
struct ref_entry *two = *(struct ref_entry **)b;
|
|
|
|
return strcmp(one->name, two->name);
|
|
|
|
}
|
2007-04-17 03:42:50 +02:00
|
|
|
|
2012-04-10 07:30:24 +02:00
|
|
|
static void sort_ref_dir(struct ref_dir *dir);
|
2012-04-10 07:30:13 +02:00
|
|
|
|
2012-05-22 23:03:29 +02:00
|
|
|
struct string_slice {
|
|
|
|
size_t len;
|
|
|
|
const char *str;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int ref_entry_cmp_sslice(const void *key_, const void *ent_)
|
|
|
|
{
|
2013-01-16 02:08:16 +01:00
|
|
|
const struct string_slice *key = key_;
|
|
|
|
const struct ref_entry *ent = *(const struct ref_entry * const *)ent_;
|
|
|
|
int cmp = strncmp(key->str, ent->name, key->len);
|
2012-05-22 23:03:29 +02:00
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
2013-01-16 02:08:16 +01:00
|
|
|
return '\0' - (unsigned char)ent->name[key->len];
|
2012-05-22 23:03:29 +02:00
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
/*
|
2013-04-22 21:52:26 +02:00
|
|
|
* Return the index of the entry with the given refname from the
|
|
|
|
* ref_dir (non-recursively), sorting dir if necessary. Return -1 if
|
|
|
|
* no such entry is found. dir must already be complete.
|
2012-04-10 07:30:26 +02:00
|
|
|
*/
|
2013-04-22 21:52:26 +02:00
|
|
|
static int search_ref_dir(struct ref_dir *dir, const char *refname, size_t len)
|
2012-04-10 07:30:13 +02:00
|
|
|
{
|
2012-05-22 23:03:29 +02:00
|
|
|
struct ref_entry **r;
|
|
|
|
struct string_slice key;
|
2012-04-10 07:30:13 +02:00
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
if (refname == NULL || !dir->nr)
|
2013-04-22 21:52:26 +02:00
|
|
|
return -1;
|
2012-04-10 07:30:13 +02:00
|
|
|
|
2012-04-10 07:30:24 +02:00
|
|
|
sort_ref_dir(dir);
|
2012-05-22 23:03:29 +02:00
|
|
|
key.len = len;
|
|
|
|
key.str = refname;
|
|
|
|
r = bsearch(&key, dir->entries, dir->nr, sizeof(*dir->entries),
|
|
|
|
ref_entry_cmp_sslice);
|
2012-04-10 07:30:13 +02:00
|
|
|
|
|
|
|
if (r == NULL)
|
2013-04-22 21:52:26 +02:00
|
|
|
return -1;
|
2012-04-10 07:30:13 +02:00
|
|
|
|
2013-04-22 21:52:26 +02:00
|
|
|
return r - dir->entries;
|
2012-04-10 07:30:13 +02:00
|
|
|
}
|
|
|
|
|
2012-04-25 00:45:11 +02:00
|
|
|
/*
|
|
|
|
* Search for a directory entry directly within dir (without
|
|
|
|
* recursing). Sort dir if necessary. subdirname must be a directory
|
|
|
|
* name (i.e., end in '/'). If mkdir is set, then create the
|
|
|
|
* directory if it is missing; otherwise, return NULL if the desired
|
2012-04-27 00:27:07 +02:00
|
|
|
* directory cannot be found. dir must already be complete.
|
2012-04-25 00:45:11 +02:00
|
|
|
*/
|
2012-04-27 00:27:04 +02:00
|
|
|
static struct ref_dir *search_for_subdir(struct ref_dir *dir,
|
2012-05-22 20:50:58 +02:00
|
|
|
const char *subdirname, size_t len,
|
|
|
|
int mkdir)
|
2012-04-25 00:45:11 +02:00
|
|
|
{
|
2013-04-22 21:52:26 +02:00
|
|
|
int entry_index = search_ref_dir(dir, subdirname, len);
|
|
|
|
struct ref_entry *entry;
|
|
|
|
if (entry_index == -1) {
|
2012-04-25 00:45:11 +02:00
|
|
|
if (!mkdir)
|
|
|
|
return NULL;
|
2012-04-27 00:27:07 +02:00
|
|
|
/*
|
|
|
|
* Since dir is complete, the absence of a subdir
|
|
|
|
* means that the subdir really doesn't exist;
|
|
|
|
* therefore, create an empty record for it but mark
|
|
|
|
* the record complete.
|
|
|
|
*/
|
2012-05-22 20:50:52 +02:00
|
|
|
entry = create_dir_entry(dir->ref_cache, subdirname, len, 0);
|
2012-04-25 00:45:11 +02:00
|
|
|
add_entry_to_dir(dir, entry);
|
2013-04-22 21:52:26 +02:00
|
|
|
} else {
|
|
|
|
entry = dir->entries[entry_index];
|
2012-04-25 00:45:11 +02:00
|
|
|
}
|
2012-04-27 00:27:04 +02:00
|
|
|
return get_ref_dir(entry);
|
2012-04-25 00:45:11 +02:00
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
/*
|
|
|
|
* If refname is a reference name, find the ref_dir within the dir
|
|
|
|
* tree that should hold refname. If refname is a directory name
|
|
|
|
* (i.e., ends in '/'), then return that ref_dir itself. dir must
|
2012-04-27 00:27:07 +02:00
|
|
|
* represent the top-level directory and must already be complete.
|
|
|
|
* Sort ref_dirs and recurse into subdirectories as necessary. If
|
|
|
|
* mkdir is set, then create any missing directories; otherwise,
|
|
|
|
* return NULL if the desired directory cannot be found.
|
2012-04-10 07:30:26 +02:00
|
|
|
*/
|
|
|
|
static struct ref_dir *find_containing_dir(struct ref_dir *dir,
|
|
|
|
const char *refname, int mkdir)
|
|
|
|
{
|
2012-04-27 00:27:00 +02:00
|
|
|
const char *slash;
|
|
|
|
for (slash = strchr(refname, '/'); slash; slash = strchr(slash + 1, '/')) {
|
2012-05-22 20:50:58 +02:00
|
|
|
size_t dirnamelen = slash - refname + 1;
|
2012-04-27 00:27:04 +02:00
|
|
|
struct ref_dir *subdir;
|
2012-05-22 20:50:58 +02:00
|
|
|
subdir = search_for_subdir(dir, refname, dirnamelen, mkdir);
|
2012-05-04 00:12:54 +02:00
|
|
|
if (!subdir) {
|
|
|
|
dir = NULL;
|
2012-04-25 00:45:11 +02:00
|
|
|
break;
|
2012-04-10 07:30:26 +02:00
|
|
|
}
|
2012-04-27 00:27:04 +02:00
|
|
|
dir = subdir;
|
2012-04-10 07:30:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return dir;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the value entry with the given name in dir, sorting ref_dirs
|
|
|
|
* and recursing into subdirectories as necessary. If the name is not
|
|
|
|
* found or it corresponds to a directory entry, return NULL.
|
|
|
|
*/
|
|
|
|
static struct ref_entry *find_ref(struct ref_dir *dir, const char *refname)
|
|
|
|
{
|
2013-04-22 21:52:26 +02:00
|
|
|
int entry_index;
|
2012-04-10 07:30:26 +02:00
|
|
|
struct ref_entry *entry;
|
|
|
|
dir = find_containing_dir(dir, refname, 0);
|
|
|
|
if (!dir)
|
|
|
|
return NULL;
|
2013-04-22 21:52:26 +02:00
|
|
|
entry_index = search_ref_dir(dir, refname, strlen(refname));
|
|
|
|
if (entry_index == -1)
|
|
|
|
return NULL;
|
|
|
|
entry = dir->entries[entry_index];
|
|
|
|
return (entry->flag & REF_DIR) ? NULL : entry;
|
2012-04-10 07:30:26 +02:00
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:27 +02:00
|
|
|
/*
|
|
|
|
* Remove the entry with the given name from dir, recursing into
|
|
|
|
* subdirectories as necessary. If refname is the name of a directory
|
|
|
|
* (i.e., ends with '/'), then remove the directory and its contents.
|
|
|
|
* If the removal was successful, return the number of entries
|
|
|
|
* remaining in the directory entry that contained the deleted entry.
|
|
|
|
* If the name was not found, return -1. Please note that this
|
|
|
|
* function only deletes the entry from the cache; it does not delete
|
|
|
|
* it from the filesystem or ensure that other cache entries (which
|
|
|
|
* might be symbolic references to the removed entry) are updated.
|
|
|
|
* Nor does it remove any containing dir entries that might be made
|
|
|
|
* empty by the removal. dir must represent the top-level directory
|
|
|
|
* and must already be complete.
|
|
|
|
*/
|
|
|
|
static int remove_entry(struct ref_dir *dir, const char *refname)
|
|
|
|
{
|
|
|
|
int refname_len = strlen(refname);
|
|
|
|
int entry_index;
|
|
|
|
struct ref_entry *entry;
|
|
|
|
int is_dir = refname[refname_len - 1] == '/';
|
|
|
|
if (is_dir) {
|
|
|
|
/*
|
|
|
|
* refname represents a reference directory. Remove
|
|
|
|
* the trailing slash; otherwise we will get the
|
|
|
|
* directory *representing* refname rather than the
|
|
|
|
* one *containing* it.
|
|
|
|
*/
|
|
|
|
char *dirname = xmemdupz(refname, refname_len - 1);
|
|
|
|
dir = find_containing_dir(dir, dirname, 0);
|
|
|
|
free(dirname);
|
|
|
|
} else {
|
|
|
|
dir = find_containing_dir(dir, refname, 0);
|
|
|
|
}
|
|
|
|
if (!dir)
|
|
|
|
return -1;
|
|
|
|
entry_index = search_ref_dir(dir, refname, refname_len);
|
|
|
|
if (entry_index == -1)
|
|
|
|
return -1;
|
|
|
|
entry = dir->entries[entry_index];
|
|
|
|
|
|
|
|
memmove(&dir->entries[entry_index],
|
|
|
|
&dir->entries[entry_index + 1],
|
|
|
|
(dir->nr - entry_index - 1) * sizeof(*dir->entries)
|
|
|
|
);
|
|
|
|
dir->nr--;
|
|
|
|
if (dir->sorted > entry_index)
|
|
|
|
dir->sorted--;
|
|
|
|
free_ref_entry(entry);
|
|
|
|
return dir->nr;
|
2012-04-10 07:30:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add a ref_entry to the ref_dir (unsorted), recursing into
|
|
|
|
* subdirectories as necessary. dir must represent the top-level
|
|
|
|
* directory. Return 0 on success.
|
|
|
|
*/
|
|
|
|
static int add_ref(struct ref_dir *dir, struct ref_entry *ref)
|
|
|
|
{
|
|
|
|
dir = find_containing_dir(dir, ref->name, 1);
|
|
|
|
if (!dir)
|
|
|
|
return -1;
|
|
|
|
add_entry_to_dir(dir, ref);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-12-12 06:38:15 +01:00
|
|
|
/*
|
|
|
|
* Emit a warning and return true iff ref1 and ref2 have the same name
|
|
|
|
* and the same sha1. Die if they have the same name but different
|
|
|
|
* sha1s.
|
|
|
|
*/
|
|
|
|
static int is_dup_ref(const struct ref_entry *ref1, const struct ref_entry *ref2)
|
|
|
|
{
|
2012-04-10 07:30:26 +02:00
|
|
|
if (strcmp(ref1->name, ref2->name))
|
2011-12-12 06:38:15 +01:00
|
|
|
return 0;
|
2012-04-10 07:30:26 +02:00
|
|
|
|
|
|
|
/* Duplicate name; make sure that they don't conflict: */
|
|
|
|
|
|
|
|
if ((ref1->flag & REF_DIR) || (ref2->flag & REF_DIR))
|
|
|
|
/* This is impossible by construction */
|
|
|
|
die("Reference directory conflict: %s", ref1->name);
|
|
|
|
|
2015-05-25 20:38:27 +02:00
|
|
|
if (oidcmp(&ref1->u.value.oid, &ref2->u.value.oid))
|
2012-04-10 07:30:26 +02:00
|
|
|
die("Duplicated ref, and SHA1s don't match: %s", ref1->name);
|
|
|
|
|
|
|
|
warning("Duplicated ref: %s", ref1->name);
|
|
|
|
return 1;
|
2011-12-12 06:38:15 +01:00
|
|
|
}
|
|
|
|
|
ref_array: keep track of whether references are sorted
Keep track of how many entries at the beginning of a ref_array are already
sorted. In sort_ref_array(), return early if the the array is already
sorted (i.e., if no new references has been appended to the end of the
list since the last call to sort_ref_array()).
Sort ref_arrays only when needed, namely in search_ref_array() and in
do_for_each_ref(). However, never call sort_ref_array() on the
extra_refs, because extra_refs can contain multiple entries with the same
name and because sort_ref_array() not only sorts, but de-dups its
contents.
This change is currently not useful, because entries are not added to
ref_arrays after they are created. But in a moment they will be...
Implementation note: we could store a binary "sorted" value instead of
an integer, but storing the number of sorted entries leaves the way
open for a couple of possible future optimizations:
* In sort_ref_array(), sort *only* the unsorted entries, then merge
them with the sorted entries. This should be faster if most of the
entries are already sorted.
* Teach search_ref_array() to do a binary search of any sorted
entries, and if unsuccessful do a linear search of any unsorted
entries. This would avoid the need to sort the list every time that
search_ref_array() is called, and (given some intelligence about how
often to sort) could significantly improve the speed in certain
hypothetical usage patterns.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-17 06:50:32 +01:00
|
|
|
/*
|
2012-04-10 07:30:26 +02:00
|
|
|
* Sort the entries in dir non-recursively (if they are not already
|
|
|
|
* sorted) and remove any duplicate entries.
|
ref_array: keep track of whether references are sorted
Keep track of how many entries at the beginning of a ref_array are already
sorted. In sort_ref_array(), return early if the the array is already
sorted (i.e., if no new references has been appended to the end of the
list since the last call to sort_ref_array()).
Sort ref_arrays only when needed, namely in search_ref_array() and in
do_for_each_ref(). However, never call sort_ref_array() on the
extra_refs, because extra_refs can contain multiple entries with the same
name and because sort_ref_array() not only sorts, but de-dups its
contents.
This change is currently not useful, because entries are not added to
ref_arrays after they are created. But in a moment they will be...
Implementation note: we could store a binary "sorted" value instead of
an integer, but storing the number of sorted entries leaves the way
open for a couple of possible future optimizations:
* In sort_ref_array(), sort *only* the unsorted entries, then merge
them with the sorted entries. This should be faster if most of the
entries are already sorted.
* Teach search_ref_array() to do a binary search of any sorted
entries, and if unsuccessful do a linear search of any unsorted
entries. This would avoid the need to sort the list every time that
search_ref_array() is called, and (given some intelligence about how
often to sort) could significantly improve the speed in certain
hypothetical usage patterns.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-17 06:50:32 +01:00
|
|
|
*/
|
2012-04-10 07:30:24 +02:00
|
|
|
static void sort_ref_dir(struct ref_dir *dir)
|
2011-09-30 00:11:42 +02:00
|
|
|
{
|
2011-12-12 06:38:15 +01:00
|
|
|
int i, j;
|
2012-04-10 07:30:25 +02:00
|
|
|
struct ref_entry *last = NULL;
|
2007-04-17 03:42:50 +02:00
|
|
|
|
ref_array: keep track of whether references are sorted
Keep track of how many entries at the beginning of a ref_array are already
sorted. In sort_ref_array(), return early if the the array is already
sorted (i.e., if no new references has been appended to the end of the
list since the last call to sort_ref_array()).
Sort ref_arrays only when needed, namely in search_ref_array() and in
do_for_each_ref(). However, never call sort_ref_array() on the
extra_refs, because extra_refs can contain multiple entries with the same
name and because sort_ref_array() not only sorts, but de-dups its
contents.
This change is currently not useful, because entries are not added to
ref_arrays after they are created. But in a moment they will be...
Implementation note: we could store a binary "sorted" value instead of
an integer, but storing the number of sorted entries leaves the way
open for a couple of possible future optimizations:
* In sort_ref_array(), sort *only* the unsorted entries, then merge
them with the sorted entries. This should be faster if most of the
entries are already sorted.
* Teach search_ref_array() to do a binary search of any sorted
entries, and if unsuccessful do a linear search of any unsorted
entries. This would avoid the need to sort the list every time that
search_ref_array() is called, and (given some intelligence about how
often to sort) could significantly improve the speed in certain
hypothetical usage patterns.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-17 06:50:32 +01:00
|
|
|
/*
|
|
|
|
* This check also prevents passing a zero-length array to qsort(),
|
|
|
|
* which is a problem on some platforms.
|
|
|
|
*/
|
2012-04-10 07:30:24 +02:00
|
|
|
if (dir->sorted == dir->nr)
|
2011-09-30 00:11:42 +02:00
|
|
|
return;
|
2007-04-17 03:42:50 +02:00
|
|
|
|
2012-04-10 07:30:24 +02:00
|
|
|
qsort(dir->entries, dir->nr, sizeof(*dir->entries), ref_entry_cmp);
|
2007-04-17 03:42:50 +02:00
|
|
|
|
2012-04-10 07:30:25 +02:00
|
|
|
/* Remove any duplicates: */
|
|
|
|
for (i = 0, j = 0; j < dir->nr; j++) {
|
|
|
|
struct ref_entry *entry = dir->entries[j];
|
|
|
|
if (last && is_dup_ref(last, entry))
|
|
|
|
free_ref_entry(entry);
|
|
|
|
else
|
|
|
|
last = dir->entries[i++] = entry;
|
2011-09-30 00:11:42 +02:00
|
|
|
}
|
2012-04-10 07:30:25 +02:00
|
|
|
dir->sorted = dir->nr = i;
|
2011-09-30 00:11:42 +02:00
|
|
|
}
|
2007-04-17 03:42:50 +02:00
|
|
|
|
2013-04-22 21:52:11 +02:00
|
|
|
/* Include broken references in a do_for_each_ref*() iteration: */
|
|
|
|
#define DO_FOR_EACH_INCLUDE_BROKEN 0x01
|
2007-04-17 03:42:50 +02:00
|
|
|
|
2013-04-22 21:52:18 +02:00
|
|
|
/*
|
|
|
|
* Return true iff the reference described by entry can be resolved to
|
|
|
|
* an object in the database. Emit a warning if the referred-to
|
|
|
|
* object does not exist.
|
|
|
|
*/
|
|
|
|
static int ref_resolves_to_object(struct ref_entry *entry)
|
|
|
|
{
|
|
|
|
if (entry->flag & REF_ISBROKEN)
|
|
|
|
return 0;
|
2015-05-25 20:38:27 +02:00
|
|
|
if (!has_sha1_file(entry->u.value.oid.hash)) {
|
2013-04-22 21:52:18 +02:00
|
|
|
error("%s does not point to a valid object!", entry->name);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
2007-04-17 03:42:50 +02:00
|
|
|
|
2013-04-22 21:52:12 +02:00
|
|
|
/*
|
|
|
|
* current_ref is a performance hack: when iterating over references
|
|
|
|
* using the for_each_ref*() functions, current_ref is set to the
|
|
|
|
* current reference's entry before calling the callback function. If
|
|
|
|
* the callback function calls peel_ref(), then peel_ref() first
|
|
|
|
* checks whether the reference to be peeled is the current reference
|
|
|
|
* (it usually is) and if so, returns that reference's peeled version
|
|
|
|
* if it is available. This avoids a refname lookup in a common case.
|
|
|
|
*/
|
2012-04-10 07:30:13 +02:00
|
|
|
static struct ref_entry *current_ref;
|
2007-04-17 03:42:50 +02:00
|
|
|
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
typedef int each_ref_entry_fn(struct ref_entry *entry, void *cb_data);
|
|
|
|
|
|
|
|
struct ref_entry_cb {
|
|
|
|
const char *base;
|
|
|
|
int trim;
|
|
|
|
int flags;
|
|
|
|
each_ref_fn *fn;
|
|
|
|
void *cb_data;
|
|
|
|
};
|
|
|
|
|
2013-04-22 21:52:11 +02:00
|
|
|
/*
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
* Handle one reference in a do_for_each_ref*()-style iteration,
|
|
|
|
* calling an each_ref_fn for each entry.
|
2013-04-22 21:52:11 +02:00
|
|
|
*/
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
static int do_one_ref(struct ref_entry *entry, void *cb_data)
|
2012-04-10 07:30:13 +02:00
|
|
|
{
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
struct ref_entry_cb *data = cb_data;
|
2013-07-15 17:24:17 +02:00
|
|
|
struct ref_entry *old_current_ref;
|
2012-04-10 07:30:14 +02:00
|
|
|
int retval;
|
2013-07-15 17:24:17 +02:00
|
|
|
|
2013-11-30 21:55:40 +01:00
|
|
|
if (!starts_with(entry->name, data->base))
|
2012-04-10 07:30:13 +02:00
|
|
|
return 0;
|
2007-04-17 03:42:50 +02:00
|
|
|
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
if (!(data->flags & DO_FOR_EACH_INCLUDE_BROKEN) &&
|
2013-04-22 21:52:18 +02:00
|
|
|
!ref_resolves_to_object(entry))
|
2012-04-10 07:30:13 +02:00
|
|
|
return 0;
|
2007-04-17 03:42:50 +02:00
|
|
|
|
2013-07-15 17:24:17 +02:00
|
|
|
/* Store the old value, in case this is a recursive call: */
|
|
|
|
old_current_ref = current_ref;
|
2012-04-10 07:30:13 +02:00
|
|
|
current_ref = entry;
|
2015-05-25 20:38:28 +02:00
|
|
|
retval = data->fn(entry->name + data->trim, &entry->u.value.oid,
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
entry->flag, data->cb_data);
|
2013-07-15 17:24:17 +02:00
|
|
|
current_ref = old_current_ref;
|
2012-04-10 07:30:14 +02:00
|
|
|
return retval;
|
2012-04-10 07:30:13 +02:00
|
|
|
}
|
2007-04-17 03:42:50 +02:00
|
|
|
|
2012-04-10 07:30:15 +02:00
|
|
|
/*
|
2012-04-10 07:30:24 +02:00
|
|
|
* Call fn for each reference in dir that has index in the range
|
2012-04-10 07:30:26 +02:00
|
|
|
* offset <= index < dir->nr. Recurse into subdirectories that are in
|
|
|
|
* that index range, sorting them before iterating. This function
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
* does not sort dir itself; it should be sorted beforehand. fn is
|
|
|
|
* called for all references, including broken ones.
|
2012-04-10 07:30:15 +02:00
|
|
|
*/
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
static int do_for_each_entry_in_dir(struct ref_dir *dir, int offset,
|
|
|
|
each_ref_entry_fn fn, void *cb_data)
|
2012-04-10 07:30:15 +02:00
|
|
|
{
|
|
|
|
int i;
|
2012-04-10 07:30:24 +02:00
|
|
|
assert(dir->sorted == dir->nr);
|
|
|
|
for (i = offset; i < dir->nr; i++) {
|
2012-04-10 07:30:26 +02:00
|
|
|
struct ref_entry *entry = dir->entries[i];
|
|
|
|
int retval;
|
|
|
|
if (entry->flag & REF_DIR) {
|
2012-04-27 00:27:03 +02:00
|
|
|
struct ref_dir *subdir = get_ref_dir(entry);
|
|
|
|
sort_ref_dir(subdir);
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = do_for_each_entry_in_dir(subdir, 0, fn, cb_data);
|
2012-04-10 07:30:26 +02:00
|
|
|
} else {
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = fn(entry, cb_data);
|
2012-04-10 07:30:26 +02:00
|
|
|
}
|
2012-04-10 07:30:15 +02:00
|
|
|
if (retval)
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:16 +02:00
|
|
|
/*
|
2012-04-10 07:30:24 +02:00
|
|
|
* Call fn for each reference in the union of dir1 and dir2, in order
|
2012-04-10 07:30:26 +02:00
|
|
|
* by refname. Recurse into subdirectories. If a value entry appears
|
|
|
|
* in both dir1 and dir2, then only process the version that is in
|
|
|
|
* dir2. The input dirs must already be sorted, but subdirs will be
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
* sorted as needed. fn is called for all references, including
|
|
|
|
* broken ones.
|
2012-04-10 07:30:16 +02:00
|
|
|
*/
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
static int do_for_each_entry_in_dirs(struct ref_dir *dir1,
|
|
|
|
struct ref_dir *dir2,
|
|
|
|
each_ref_entry_fn fn, void *cb_data)
|
2012-04-10 07:30:16 +02:00
|
|
|
{
|
|
|
|
int retval;
|
|
|
|
int i1 = 0, i2 = 0;
|
|
|
|
|
2012-04-10 07:30:24 +02:00
|
|
|
assert(dir1->sorted == dir1->nr);
|
|
|
|
assert(dir2->sorted == dir2->nr);
|
2012-04-10 07:30:26 +02:00
|
|
|
while (1) {
|
|
|
|
struct ref_entry *e1, *e2;
|
|
|
|
int cmp;
|
|
|
|
if (i1 == dir1->nr) {
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
return do_for_each_entry_in_dir(dir2, i2, fn, cb_data);
|
2012-04-10 07:30:26 +02:00
|
|
|
}
|
|
|
|
if (i2 == dir2->nr) {
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
return do_for_each_entry_in_dir(dir1, i1, fn, cb_data);
|
2012-04-10 07:30:26 +02:00
|
|
|
}
|
|
|
|
e1 = dir1->entries[i1];
|
|
|
|
e2 = dir2->entries[i2];
|
|
|
|
cmp = strcmp(e1->name, e2->name);
|
|
|
|
if (cmp == 0) {
|
|
|
|
if ((e1->flag & REF_DIR) && (e2->flag & REF_DIR)) {
|
|
|
|
/* Both are directories; descend them in parallel. */
|
2012-04-27 00:27:03 +02:00
|
|
|
struct ref_dir *subdir1 = get_ref_dir(e1);
|
|
|
|
struct ref_dir *subdir2 = get_ref_dir(e2);
|
|
|
|
sort_ref_dir(subdir1);
|
|
|
|
sort_ref_dir(subdir2);
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = do_for_each_entry_in_dirs(
|
|
|
|
subdir1, subdir2, fn, cb_data);
|
2012-04-10 07:30:26 +02:00
|
|
|
i1++;
|
|
|
|
i2++;
|
|
|
|
} else if (!(e1->flag & REF_DIR) && !(e2->flag & REF_DIR)) {
|
|
|
|
/* Both are references; ignore the one from dir1. */
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = fn(e2, cb_data);
|
2012-04-10 07:30:26 +02:00
|
|
|
i1++;
|
|
|
|
i2++;
|
|
|
|
} else {
|
|
|
|
die("conflict between reference and directory: %s",
|
|
|
|
e1->name);
|
|
|
|
}
|
2012-04-10 07:30:16 +02:00
|
|
|
} else {
|
2012-04-10 07:30:26 +02:00
|
|
|
struct ref_entry *e;
|
|
|
|
if (cmp < 0) {
|
|
|
|
e = e1;
|
2012-04-10 07:30:16 +02:00
|
|
|
i1++;
|
2012-04-10 07:30:26 +02:00
|
|
|
} else {
|
|
|
|
e = e2;
|
|
|
|
i2++;
|
|
|
|
}
|
|
|
|
if (e->flag & REF_DIR) {
|
2012-04-27 00:27:03 +02:00
|
|
|
struct ref_dir *subdir = get_ref_dir(e);
|
|
|
|
sort_ref_dir(subdir);
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = do_for_each_entry_in_dir(
|
|
|
|
subdir, 0, fn, cb_data);
|
2012-04-10 07:30:26 +02:00
|
|
|
} else {
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = fn(e, cb_data);
|
2012-04-10 07:30:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (retval)
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
for_each_ref: load all loose refs before packed refs
If we are iterating through the refs using for_each_ref (or
any of its sister functions), we can get into a race
condition with a simultaneous "pack-refs --prune" that looks
like this:
0. We have a large number of loose refs, and a few packed
refs. refs/heads/z/foo is loose, with no matching entry
in the packed-refs file.
1. Process A starts iterating through the refs. It loads
the packed-refs file from disk, then starts lazily
traversing through the loose ref directories.
2. Process B, running "pack-refs --prune", writes out the
new packed-refs file. It then deletes the newly packed
refs, including refs/heads/z/foo.
3. Meanwhile, process A has finally gotten to
refs/heads/z (it traverses alphabetically). It
descends, but finds nothing there. It checks its
cached view of the packed-refs file, but it does not
mention anything in "refs/heads/z/" at all (it predates
the new file written by B in step 2).
The traversal completes successfully without mentioning
refs/heads/z/foo at all (the name, of course, isn't
important; but the more refs you have and the farther down
the alphabetical list a ref is, the more likely it is to hit
the race). If refs/heads/z/foo did exist in the packed refs
file at state 0, we would see an entry for it, but it would
show whatever sha1 the ref had the last time it was packed
(which could be an arbitrarily long time ago).
This can be especially dangerous when process A is "git
prune", as it means our set of reachable tips will be
incomplete, and we may erroneously prune objects reachable
from that tip (the same thing can happen if "repack -ad" is
used, as it simply drops unreachable objects that are
packed).
This patch solves it by loading all of the loose refs for
our traversal into our in-memory cache, and then refreshing
the packed-refs cache. Because a pack-refs writer will
always put the new packed-refs file into place before
starting the prune, we know that any loose refs we fail to
see will either truly be missing, or will have already been
put in the packed-refs file by the time we refresh.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:53 +02:00
|
|
|
/*
|
|
|
|
* Load all of the refs from the dir into our in-memory cache. The hard work
|
|
|
|
* of loading loose refs is done by get_ref_dir(), so we just need to recurse
|
|
|
|
* through all of the sub-directories. We do not even need to care about
|
|
|
|
* sorting, as traversal order does not matter to us.
|
|
|
|
*/
|
|
|
|
static void prime_ref_dir(struct ref_dir *dir)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < dir->nr; i++) {
|
|
|
|
struct ref_entry *entry = dir->entries[i];
|
|
|
|
if (entry->flag & REF_DIR)
|
|
|
|
prime_ref_dir(get_ref_dir(entry));
|
|
|
|
}
|
|
|
|
}
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
|
|
|
|
struct nonmatching_ref_data {
|
2014-05-01 20:16:07 +02:00
|
|
|
const struct string_list *skip;
|
2015-05-11 17:25:09 +02:00
|
|
|
const char *conflicting_refname;
|
2012-04-10 07:30:19 +02:00
|
|
|
};
|
|
|
|
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
static int nonmatching_ref_fn(struct ref_entry *entry, void *vdata)
|
2012-04-10 07:30:19 +02:00
|
|
|
{
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
struct nonmatching_ref_data *data = vdata;
|
|
|
|
|
2015-05-11 17:25:07 +02:00
|
|
|
if (data->skip && string_list_has_string(data->skip, entry->name))
|
2012-04-10 07:30:19 +02:00
|
|
|
return 0;
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
|
2015-05-11 17:25:09 +02:00
|
|
|
data->conflicting_refname = entry->name;
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:13 +02:00
|
|
|
/*
|
2015-05-11 17:25:13 +02:00
|
|
|
* Return 0 if a reference named refname could be created without
|
|
|
|
* conflicting with the name of an existing reference in dir.
|
2015-05-11 17:25:14 +02:00
|
|
|
* Otherwise, return a negative value and write an explanation to err.
|
|
|
|
* If extras is non-NULL, it is a list of additional refnames with
|
|
|
|
* which refname is not allowed to conflict. If skip is non-NULL,
|
|
|
|
* ignore potential conflicts with refs in skip (e.g., because they
|
|
|
|
* are scheduled for deletion in the same operation). Behavior is
|
|
|
|
* undefined if the same name is listed in both extras and skip.
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
*
|
|
|
|
* Two reference names conflict if one of them exactly matches the
|
2015-05-11 17:25:04 +02:00
|
|
|
* leading components of the other; e.g., "refs/foo/bar" conflicts
|
|
|
|
* with both "refs/foo" and with "refs/foo/bar/baz" but not with
|
|
|
|
* "refs/foo/bar" or "refs/foo/barbados".
|
2014-05-01 20:16:07 +02:00
|
|
|
*
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
* extras and skip must be sorted.
|
2012-04-10 07:30:13 +02:00
|
|
|
*/
|
2015-05-11 17:25:13 +02:00
|
|
|
static int verify_refname_available(const char *refname,
|
|
|
|
const struct string_list *extras,
|
|
|
|
const struct string_list *skip,
|
2015-05-11 17:25:14 +02:00
|
|
|
struct ref_dir *dir,
|
|
|
|
struct strbuf *err)
|
2012-04-10 07:30:13 +02:00
|
|
|
{
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
const char *slash;
|
|
|
|
int pos;
|
2015-05-11 17:25:06 +02:00
|
|
|
struct strbuf dirname = STRBUF_INIT;
|
2015-05-11 17:25:13 +02:00
|
|
|
int ret = -1;
|
2012-04-10 07:30:19 +02:00
|
|
|
|
2015-05-11 17:25:04 +02:00
|
|
|
/*
|
|
|
|
* For the sake of comments in this function, suppose that
|
|
|
|
* refname is "refs/foo/bar".
|
|
|
|
*/
|
2012-04-10 07:30:19 +02:00
|
|
|
|
2015-05-11 17:25:14 +02:00
|
|
|
assert(err);
|
|
|
|
|
2015-05-11 17:25:10 +02:00
|
|
|
strbuf_grow(&dirname, strlen(refname) + 1);
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
for (slash = strchr(refname, '/'); slash; slash = strchr(slash + 1, '/')) {
|
2015-05-11 17:25:10 +02:00
|
|
|
/* Expand dirname to the new prefix, not including the trailing slash: */
|
|
|
|
strbuf_add(&dirname, refname + dirname.len, slash - refname - dirname.len);
|
|
|
|
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
/*
|
2015-05-11 17:25:04 +02:00
|
|
|
* We are still at a leading dir of the refname (e.g.,
|
|
|
|
* "refs/foo"; if there is a reference with that name,
|
|
|
|
* it is a conflict, *unless* it is in skip.
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
*/
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
if (dir) {
|
|
|
|
pos = search_ref_dir(dir, dirname.buf, dirname.len);
|
|
|
|
if (pos >= 0 &&
|
|
|
|
(!skip || !string_list_has_string(skip, dirname.buf))) {
|
2015-05-11 17:25:04 +02:00
|
|
|
/*
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
* We found a reference whose name is
|
|
|
|
* a proper prefix of refname; e.g.,
|
|
|
|
* "refs/foo", and is not in skip.
|
2015-05-11 17:25:04 +02:00
|
|
|
*/
|
2015-05-11 17:25:14 +02:00
|
|
|
strbuf_addf(err, "'%s' exists; cannot create '%s'",
|
|
|
|
dirname.buf, refname);
|
2015-05-11 17:25:10 +02:00
|
|
|
goto cleanup;
|
2015-05-11 17:25:04 +02:00
|
|
|
}
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
}
|
|
|
|
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
if (extras && string_list_has_string(extras, dirname.buf) &&
|
|
|
|
(!skip || !string_list_has_string(skip, dirname.buf))) {
|
2015-05-11 17:25:14 +02:00
|
|
|
strbuf_addf(err, "cannot process '%s' and '%s' at the same time",
|
|
|
|
refname, dirname.buf);
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Otherwise, we can try to continue our search with
|
2015-05-11 17:25:04 +02:00
|
|
|
* the next component. So try to look up the
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
* directory, e.g., "refs/foo/". If we come up empty,
|
|
|
|
* we know there is nothing under this whole prefix,
|
|
|
|
* but even in that case we still have to continue the
|
|
|
|
* search for conflicts with extras.
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
*/
|
2015-05-11 17:25:10 +02:00
|
|
|
strbuf_addch(&dirname, '/');
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
if (dir) {
|
|
|
|
pos = search_ref_dir(dir, dirname.buf, dirname.len);
|
|
|
|
if (pos < 0) {
|
|
|
|
/*
|
|
|
|
* There was no directory "refs/foo/",
|
|
|
|
* so there is nothing under this
|
|
|
|
* whole prefix. So there is no need
|
|
|
|
* to continue looking for conflicting
|
|
|
|
* references. But we need to continue
|
|
|
|
* looking for conflicting extras.
|
|
|
|
*/
|
|
|
|
dir = NULL;
|
|
|
|
} else {
|
|
|
|
dir = get_ref_dir(dir->entries[pos]);
|
|
|
|
}
|
2015-05-11 17:25:04 +02:00
|
|
|
}
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-05-11 17:25:04 +02:00
|
|
|
* We are at the leaf of our refname (e.g., "refs/foo/bar").
|
|
|
|
* There is no point in searching for a reference with that
|
|
|
|
* name, because a refname isn't considered to conflict with
|
|
|
|
* itself. But we still need to check for references whose
|
|
|
|
* names are in the "refs/foo/bar/" namespace, because they
|
|
|
|
* *do* conflict.
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
*/
|
2015-05-11 17:25:10 +02:00
|
|
|
strbuf_addstr(&dirname, refname + dirname.len);
|
2015-05-11 17:25:06 +02:00
|
|
|
strbuf_addch(&dirname, '/');
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
if (dir) {
|
|
|
|
pos = search_ref_dir(dir, dirname.buf, dirname.len);
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
if (pos >= 0) {
|
|
|
|
/*
|
|
|
|
* We found a directory named "$refname/"
|
|
|
|
* (e.g., "refs/foo/bar/"). It is a problem
|
|
|
|
* iff it contains any ref that is not in
|
|
|
|
* "skip".
|
|
|
|
*/
|
|
|
|
struct nonmatching_ref_data data;
|
|
|
|
|
|
|
|
data.skip = skip;
|
|
|
|
data.conflicting_refname = NULL;
|
|
|
|
dir = get_ref_dir(dir->entries[pos]);
|
|
|
|
sort_ref_dir(dir);
|
|
|
|
if (do_for_each_entry_in_dir(dir, 0, nonmatching_ref_fn, &data)) {
|
2015-05-11 17:25:14 +02:00
|
|
|
strbuf_addf(err, "'%s' exists; cannot create '%s'",
|
|
|
|
data.conflicting_refname, refname);
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (extras) {
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
/*
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
* Check for entries in extras that start with
|
|
|
|
* "$refname/". We do that by looking for the place
|
|
|
|
* where "$refname/" would be inserted in extras. If
|
|
|
|
* there is an entry at that position that starts with
|
|
|
|
* "$refname/" and is not in skip, then we have a
|
|
|
|
* conflict.
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
*/
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
for (pos = string_list_find_insert_index(extras, dirname.buf, 0);
|
|
|
|
pos < extras->nr; pos++) {
|
|
|
|
const char *extra_refname = extras->items[pos].string;
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
if (!starts_with(extra_refname, dirname.buf))
|
|
|
|
break;
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
if (!skip || !string_list_has_string(skip, extra_refname)) {
|
2015-05-11 17:25:14 +02:00
|
|
|
strbuf_addf(err, "cannot process '%s' and '%s' at the same time",
|
|
|
|
refname, extra_refname);
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
2012-04-10 07:30:13 +02:00
|
|
|
}
|
refs: speed up is_refname_available
Our filesystem ref storage does not allow D/F conflicts; so
if "refs/heads/a/b" exists, we do not allow "refs/heads/a"
to exist (and vice versa). This falls out naturally for
loose refs, where the filesystem enforces the condition. But
for packed-refs, we have to make the check ourselves.
We do so by iterating over the entire packed-refs namespace
and checking whether each name creates a conflict. If you
have a very large number of refs, this is quite inefficient,
as you end up doing a large number of comparisons with
uninteresting bits of the ref tree (e.g., we know that all
of "refs/tags" is uninteresting in the example above, yet we
check each entry in it).
Instead, let's take advantage of the fact that we have the
packed refs stored as a trie of ref_entry structs. We can
find each component of the proposed refname as we walk
through the trie, checking for D/F conflicts as we go. For a
refname of depth N (i.e., 4 in the above example), we only
have to visit N nodes. And at each visit, we can binary
search the M names at that level, for a total complexity of
O(N lg M). ("M" is different at each level, of course, but
we can take the worst-case "M" as a bound).
In a pathological case of fetching 30,000 fresh refs into a
repository with 8.5 million refs, this dropped the time to
run "git fetch" from tens of minutes to ~30s.
This may also help smaller cases in which we check against
loose refs (which we do when renaming a ref), as we may
avoid a disk access for unrelated loose directories.
Note that the tests we add appear at first glance to be
redundant with what is already in t3210. However, the early
tests are not robust; they are run with reflogs turned on,
meaning that we are not actually testing
is_refname_available at all! The operations will still fail
because the reflogs will hit D/F conflicts in the
filesystem. To get a true test, we must turn off reflogs
(but we don't want to do so for the entire script, because
the point of turning them on was to cover some other cases).
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-10 13:11:55 +02:00
|
|
|
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
/* No conflicts were found */
|
2015-05-11 17:25:13 +02:00
|
|
|
ret = 0;
|
2015-05-11 17:25:10 +02:00
|
|
|
|
|
|
|
cleanup:
|
|
|
|
strbuf_release(&dirname);
|
|
|
|
return ret;
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
}
|
|
|
|
|
2013-06-20 10:37:45 +02:00
|
|
|
struct packed_ref_cache {
|
|
|
|
struct ref_entry *root;
|
2013-06-20 10:37:46 +02:00
|
|
|
|
2013-06-20 10:37:47 +02:00
|
|
|
/*
|
|
|
|
* Count of references to the data structure in this instance,
|
|
|
|
* including the pointer from ref_cache::packed if any. The
|
|
|
|
* data will not be freed as long as the reference count is
|
|
|
|
* nonzero.
|
|
|
|
*/
|
|
|
|
unsigned int referrers;
|
|
|
|
|
2013-06-20 10:37:46 +02:00
|
|
|
/*
|
|
|
|
* Iff the packed-refs file associated with this instance is
|
|
|
|
* currently locked for writing, this points at the associated
|
2013-06-20 10:37:49 +02:00
|
|
|
* lock (which is owned by somebody else). The referrer count
|
|
|
|
* is also incremented when the file is locked and decremented
|
|
|
|
* when it is unlocked.
|
2013-06-20 10:37:46 +02:00
|
|
|
*/
|
|
|
|
struct lock_file *lock;
|
get_packed_ref_cache: reload packed-refs file when it changes
Once we read the packed-refs file into memory, we cache it
to save work on future ref lookups. However, our cache may
be out of date with respect to what is on disk if another
process is simultaneously packing the refs. Normally it
is acceptable for us to be a little out of date, since there
is no guarantee whether we read the file before or after the
simultaneous update. However, there is an important special
case: our packed-refs file must be up to date with respect
to any loose refs we read. Otherwise, we risk the following
race condition:
0. There exists a loose ref refs/heads/master.
1. Process A starts and looks up the ref "master". It
first checks $GIT_DIR/master, which does not exist. It
then loads (and caches) the packed-refs file to see if
"master" exists in it, which it does not.
2. Meanwhile, process B runs "pack-refs --all --prune". It
creates a new packed-refs file which contains
refs/heads/master, and removes the loose copy at
$GIT_DIR/refs/heads/master.
3. Process A continues its lookup, and eventually tries
$GIT_DIR/refs/heads/master. It sees that the loose ref
is missing, and falls back to the packed-refs file. But
it examines its cached version, which does not have
refs/heads/master. After trying a few other prefixes,
it reports master as a non-existent ref.
There are many variants (e.g., step 1 may involve process A
looking up another ref entirely, so even a fully qualified
refname can fail). One of the most interesting ones is if
"refs/heads/master" is already packed. In that case process
A will not see it as missing, but rather will report
whatever value happened to be in the packed-refs file before
process B repacked (which might be an arbitrarily old
value).
We can fix this by making sure we reload the packed-refs
file from disk after looking at any loose refs. That's
unacceptably slow, so we can check its stat()-validity as a
proxy, and read it only when it appears to have changed.
Reading the packed-refs file after performing any loose-ref
system calls is sufficient because we know the ordering of
the pack-refs process: it always makes sure the newly
written packed-refs file is installed into place before
pruning any loose refs. As long as those operations by B
appear in their executed order to process A, by the time A
sees the missing loose ref, the new packed-refs file must be
in place.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:52 +02:00
|
|
|
|
|
|
|
/* The metadata from when this packed-refs cache was read */
|
|
|
|
struct stat_validity validity;
|
2013-06-20 10:37:45 +02:00
|
|
|
};
|
|
|
|
|
2006-09-30 21:37:37 +02:00
|
|
|
/*
|
|
|
|
* Future: need to be in "struct repository"
|
|
|
|
* when doing a full libification.
|
|
|
|
*/
|
2011-10-17 04:38:05 +02:00
|
|
|
static struct ref_cache {
|
|
|
|
struct ref_cache *next;
|
2012-04-27 00:27:01 +02:00
|
|
|
struct ref_entry *loose;
|
2013-06-20 10:37:45 +02:00
|
|
|
struct packed_ref_cache *packed;
|
2013-04-22 21:52:41 +02:00
|
|
|
/*
|
|
|
|
* The submodule name, or "" for the main repo. We allocate
|
|
|
|
* length 1 rather than FLEX_ARRAY so that the main ref_cache
|
|
|
|
* is initialized correctly.
|
|
|
|
*/
|
|
|
|
char name[1];
|
|
|
|
} ref_cache, *submodule_ref_caches;
|
2011-08-13 00:36:29 +02:00
|
|
|
|
2013-06-20 10:37:46 +02:00
|
|
|
/* Lock used for the main packed-refs file: */
|
|
|
|
static struct lock_file packlock;
|
|
|
|
|
2013-06-20 10:37:47 +02:00
|
|
|
/*
|
|
|
|
* Increment the reference count of *packed_refs.
|
|
|
|
*/
|
|
|
|
static void acquire_packed_ref_cache(struct packed_ref_cache *packed_refs)
|
|
|
|
{
|
|
|
|
packed_refs->referrers++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Decrease the reference count of *packed_refs. If it goes to zero,
|
|
|
|
* free *packed_refs and return true; otherwise return false.
|
|
|
|
*/
|
|
|
|
static int release_packed_ref_cache(struct packed_ref_cache *packed_refs)
|
|
|
|
{
|
|
|
|
if (!--packed_refs->referrers) {
|
|
|
|
free_ref_entry(packed_refs->root);
|
get_packed_ref_cache: reload packed-refs file when it changes
Once we read the packed-refs file into memory, we cache it
to save work on future ref lookups. However, our cache may
be out of date with respect to what is on disk if another
process is simultaneously packing the refs. Normally it
is acceptable for us to be a little out of date, since there
is no guarantee whether we read the file before or after the
simultaneous update. However, there is an important special
case: our packed-refs file must be up to date with respect
to any loose refs we read. Otherwise, we risk the following
race condition:
0. There exists a loose ref refs/heads/master.
1. Process A starts and looks up the ref "master". It
first checks $GIT_DIR/master, which does not exist. It
then loads (and caches) the packed-refs file to see if
"master" exists in it, which it does not.
2. Meanwhile, process B runs "pack-refs --all --prune". It
creates a new packed-refs file which contains
refs/heads/master, and removes the loose copy at
$GIT_DIR/refs/heads/master.
3. Process A continues its lookup, and eventually tries
$GIT_DIR/refs/heads/master. It sees that the loose ref
is missing, and falls back to the packed-refs file. But
it examines its cached version, which does not have
refs/heads/master. After trying a few other prefixes,
it reports master as a non-existent ref.
There are many variants (e.g., step 1 may involve process A
looking up another ref entirely, so even a fully qualified
refname can fail). One of the most interesting ones is if
"refs/heads/master" is already packed. In that case process
A will not see it as missing, but rather will report
whatever value happened to be in the packed-refs file before
process B repacked (which might be an arbitrarily old
value).
We can fix this by making sure we reload the packed-refs
file from disk after looking at any loose refs. That's
unacceptably slow, so we can check its stat()-validity as a
proxy, and read it only when it appears to have changed.
Reading the packed-refs file after performing any loose-ref
system calls is sufficient because we know the ordering of
the pack-refs process: it always makes sure the newly
written packed-refs file is installed into place before
pruning any loose refs. As long as those operations by B
appear in their executed order to process A, by the time A
sees the missing loose ref, the new packed-refs file must be
in place.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:52 +02:00
|
|
|
stat_validity_clear(&packed_refs->validity);
|
2013-06-20 10:37:47 +02:00
|
|
|
free(packed_refs);
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-17 04:38:09 +02:00
|
|
|
static void clear_packed_ref_cache(struct ref_cache *refs)
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
{
|
2012-04-27 00:27:01 +02:00
|
|
|
if (refs->packed) {
|
2013-06-20 10:37:47 +02:00
|
|
|
struct packed_ref_cache *packed_refs = refs->packed;
|
|
|
|
|
|
|
|
if (packed_refs->lock)
|
2013-06-20 10:37:46 +02:00
|
|
|
die("internal error: packed-ref cache cleared while locked");
|
2012-04-27 00:27:01 +02:00
|
|
|
refs->packed = NULL;
|
2013-06-20 10:37:47 +02:00
|
|
|
release_packed_ref_cache(packed_refs);
|
2012-04-27 00:27:01 +02:00
|
|
|
}
|
2006-09-30 21:37:37 +02:00
|
|
|
}
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
|
2011-10-17 04:38:09 +02:00
|
|
|
static void clear_loose_ref_cache(struct ref_cache *refs)
|
|
|
|
{
|
2012-04-27 00:27:01 +02:00
|
|
|
if (refs->loose) {
|
|
|
|
free_ref_entry(refs->loose);
|
|
|
|
refs->loose = NULL;
|
|
|
|
}
|
2011-10-17 04:38:09 +02:00
|
|
|
}
|
|
|
|
|
2011-10-17 04:38:05 +02:00
|
|
|
static struct ref_cache *create_ref_cache(const char *submodule)
|
2011-08-13 00:36:27 +02:00
|
|
|
{
|
2011-08-13 00:36:28 +02:00
|
|
|
int len;
|
2011-10-17 04:38:05 +02:00
|
|
|
struct ref_cache *refs;
|
2011-08-13 00:36:28 +02:00
|
|
|
if (!submodule)
|
|
|
|
submodule = "";
|
|
|
|
len = strlen(submodule) + 1;
|
2011-10-17 04:38:05 +02:00
|
|
|
refs = xcalloc(1, sizeof(struct ref_cache) + len);
|
2011-08-13 00:36:28 +02:00
|
|
|
memcpy(refs->name, submodule, len);
|
2011-08-13 00:36:27 +02:00
|
|
|
return refs;
|
|
|
|
}
|
|
|
|
|
2011-08-13 00:36:25 +02:00
|
|
|
/*
|
2011-10-17 04:38:05 +02:00
|
|
|
* Return a pointer to a ref_cache for the specified submodule. For
|
2011-08-13 00:36:25 +02:00
|
|
|
* the main repository, use submodule==NULL. The returned structure
|
|
|
|
* will be allocated and initialized but not necessarily populated; it
|
|
|
|
* should not be freed.
|
|
|
|
*/
|
2011-10-17 04:38:05 +02:00
|
|
|
static struct ref_cache *get_ref_cache(const char *submodule)
|
2011-08-13 00:36:25 +02:00
|
|
|
{
|
2013-04-22 21:52:41 +02:00
|
|
|
struct ref_cache *refs;
|
|
|
|
|
|
|
|
if (!submodule || !*submodule)
|
|
|
|
return &ref_cache;
|
|
|
|
|
|
|
|
for (refs = submodule_ref_caches; refs; refs = refs->next)
|
2011-08-13 00:36:29 +02:00
|
|
|
if (!strcmp(submodule, refs->name))
|
|
|
|
return refs;
|
|
|
|
|
2011-10-17 04:38:05 +02:00
|
|
|
refs = create_ref_cache(submodule);
|
2013-04-22 21:52:41 +02:00
|
|
|
refs->next = submodule_ref_caches;
|
|
|
|
submodule_ref_caches = refs;
|
2011-08-13 00:36:29 +02:00
|
|
|
return refs;
|
2011-08-13 00:36:25 +02:00
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:13 +02:00
|
|
|
/* The length of a peeled reference line in packed-refs, including EOL: */
|
|
|
|
#define PEELED_LINE_LENGTH 42
|
|
|
|
|
repack_without_ref(): write peeled refs in the rewritten file
When a reference that existed in the packed-refs file is deleted, the
packed-refs file must be rewritten. Previously, the file was
rewritten without any peeled refs, even if the file contained peeled
refs when it was read. This was not a bug, because the packed-refs
file header didn't claim that the file contained peeled values. But
it had a performance cost, because the repository would lose the
benefit of having precomputed peeled references until pack-refs was
run again.
Teach repack_without_ref() to write peeled refs to the packed-refs
file (regardless of whether they were present in the old version of
the file).
This means that if the old version of the packed-refs file was not
fully peeled, then repack_without_ref() will have to peel references.
To avoid the expense of reading lots of loose references, we take two
shortcuts relative to pack-refs:
* If the peeled value of a reference is already known (i.e., because
it was read from the old version of the packed-refs file), then
output that peeled value again without any checks. This is the
usual code path and should avoid any noticeable overhead. (This is
different than pack-refs, which always re-peels references.)
* We don't verify that the packed ref is still current. It could be
that a packed references is overridden by a loose reference, in
which case the packed ref is no longer needed and might even refer
to an object that has been garbage collected. But we don't check;
instead, we just try to peel all references. If peeling is
successful, the peeled value is written out (even though it might
not be needed any more); if not, then the reference is silently
omitted from the output.
The extra overhead of peeling references in repack_without_ref()
should only be incurred the first time the packed-refs file is written
by a version of Git that knows about the "fully-peeled" attribute.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:29 +02:00
|
|
|
/*
|
|
|
|
* The packed-refs header line that we write out. Perhaps other
|
|
|
|
* traits will be added later. The trailing space is required.
|
|
|
|
*/
|
|
|
|
static const char PACKED_REFS_HEADER[] =
|
|
|
|
"# pack-refs with: peeled fully-peeled \n";
|
|
|
|
|
2012-04-10 07:30:13 +02:00
|
|
|
/*
|
|
|
|
* Parse one line from a packed-refs file. Write the SHA1 to sha1.
|
|
|
|
* Return a pointer to the refname within the line (null-terminated),
|
|
|
|
* or NULL if there was a problem.
|
|
|
|
*/
|
2014-12-10 11:40:19 +01:00
|
|
|
static const char *parse_ref_line(struct strbuf *line, unsigned char *sha1)
|
2012-04-10 07:30:13 +02:00
|
|
|
{
|
2014-12-10 11:40:19 +01:00
|
|
|
const char *ref;
|
|
|
|
|
2012-04-10 07:30:13 +02:00
|
|
|
/*
|
|
|
|
* 42: the answer to everything.
|
|
|
|
*
|
|
|
|
* In this case, it happens to be the answer to
|
|
|
|
* 40 (length of sha1 hex representation)
|
|
|
|
* +1 (space in between hex and name)
|
|
|
|
* +1 (newline at the end of the line)
|
|
|
|
*/
|
2014-12-10 11:40:19 +01:00
|
|
|
if (line->len <= 42)
|
2012-04-10 07:30:13 +02:00
|
|
|
return NULL;
|
2014-12-10 11:40:19 +01:00
|
|
|
|
|
|
|
if (get_sha1_hex(line->buf, sha1) < 0)
|
2012-04-10 07:30:13 +02:00
|
|
|
return NULL;
|
2014-12-10 11:40:19 +01:00
|
|
|
if (!isspace(line->buf[40]))
|
2012-04-10 07:30:13 +02:00
|
|
|
return NULL;
|
2014-12-10 11:40:19 +01:00
|
|
|
|
|
|
|
ref = line->buf + 41;
|
|
|
|
if (isspace(*ref))
|
2012-04-10 07:30:13 +02:00
|
|
|
return NULL;
|
2014-12-10 11:40:19 +01:00
|
|
|
|
|
|
|
if (line->buf[line->len - 1] != '\n')
|
2012-04-10 07:30:13 +02:00
|
|
|
return NULL;
|
2014-12-10 11:40:19 +01:00
|
|
|
line->buf[--line->len] = 0;
|
2012-04-10 07:30:13 +02:00
|
|
|
|
2014-12-10 11:40:19 +01:00
|
|
|
return ref;
|
2012-04-10 07:30:13 +02:00
|
|
|
}
|
|
|
|
|
pack-refs: add fully-peeled trait
Older versions of pack-refs did not write peel lines for
refs outside of refs/tags. This meant that on reading the
pack-refs file, we might set the REF_KNOWS_PEELED flag for
such a ref, even though we do not know anything about its
peeled value.
The previous commit updated the writer to always peel, no
matter what the ref is. That means that packed-refs files
written by newer versions of git are fine to be read by both
old and new versions of git. However, we still have the
problem of reading packed-refs files written by older
versions of git, or by other implementations which have not
yet learned the same trick.
The simplest fix would be to always unset the
REF_KNOWS_PEELED flag for refs outside of refs/tags that do
not have a peel line (if it has a peel line, we know it is
valid, but we cannot assume a missing peel line means
anything). But that loses an important optimization, as
upload-pack should not need to load the object pointed to by
refs/heads/foo to determine that it is not a tag.
Instead, we add a "fully-peeled" trait to the packed-refs
file. If it is set, we know that we can trust a missing peel
line to mean that a ref cannot be peeled. Otherwise, we fall
back to assuming nothing.
[commit message and tests by Jeff King <peff@peff.net>]
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-18 12:37:32 +01:00
|
|
|
/*
|
|
|
|
* Read f, which is a packed-refs file, into dir.
|
|
|
|
*
|
|
|
|
* A comment line of the form "# pack-refs with: " may contain zero or
|
|
|
|
* more traits. We interpret the traits as follows:
|
|
|
|
*
|
|
|
|
* No traits:
|
|
|
|
*
|
|
|
|
* Probably no references are peeled. But if the file contains a
|
|
|
|
* peeled value for a reference, we will use it.
|
|
|
|
*
|
|
|
|
* peeled:
|
|
|
|
*
|
|
|
|
* References under "refs/tags/", if they *can* be peeled, *are*
|
|
|
|
* peeled in this file. References outside of "refs/tags/" are
|
|
|
|
* probably not peeled even if they could have been, but if we find
|
|
|
|
* a peeled value for such a reference we will use it.
|
|
|
|
*
|
|
|
|
* fully-peeled:
|
|
|
|
*
|
|
|
|
* All references in the file that can be peeled are peeled.
|
|
|
|
* Inversely (and this is more important), any references in the
|
|
|
|
* file for which no peeled value is recorded is not peelable. This
|
|
|
|
* trait should typically be written alongside "peeled" for
|
|
|
|
* compatibility with older clients, but we do not require it
|
|
|
|
* (i.e., "peeled" is a no-op if "fully-peeled" is set).
|
|
|
|
*/
|
2012-04-10 07:30:24 +02:00
|
|
|
static void read_packed_refs(FILE *f, struct ref_dir *dir)
|
2006-11-22 08:36:35 +01:00
|
|
|
{
|
2011-09-30 00:11:42 +02:00
|
|
|
struct ref_entry *last = NULL;
|
read_packed_refs: use a strbuf for reading lines
Current code uses a fixed PATH_MAX-sized buffer for reading
packed-refs lines. This is a reasonable guess, in the sense
that git generally cannot work with refs larger than
PATH_MAX. However, there are a few cases where it is not
great:
1. Some systems may have a low value of PATH_MAX, but can
actually handle larger paths in practice. Fixing this
code path probably isn't enough to make them work
completely with long refs, but it is a step in the
right direction.
2. We use fgets, which will happily give us half a line on
the first read, and then the rest of the line on the
second. This is probably OK in practice, because our
refline parser is careful enough to look for the
trailing newline on the first line. The second line may
look like a peeled line to us, but since "^" is illegal
in refnames, it is not likely to come up.
Still, it does not hurt to be more careful.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-10 11:40:07 +01:00
|
|
|
struct strbuf line = STRBUF_INIT;
|
pack-refs: add fully-peeled trait
Older versions of pack-refs did not write peel lines for
refs outside of refs/tags. This meant that on reading the
pack-refs file, we might set the REF_KNOWS_PEELED flag for
such a ref, even though we do not know anything about its
peeled value.
The previous commit updated the writer to always peel, no
matter what the ref is. That means that packed-refs files
written by newer versions of git are fine to be read by both
old and new versions of git. However, we still have the
problem of reading packed-refs files written by older
versions of git, or by other implementations which have not
yet learned the same trick.
The simplest fix would be to always unset the
REF_KNOWS_PEELED flag for refs outside of refs/tags that do
not have a peel line (if it has a peel line, we know it is
valid, but we cannot assume a missing peel line means
anything). But that loses an important optimization, as
upload-pack should not need to load the object pointed to by
refs/heads/foo to determine that it is not a tag.
Instead, we add a "fully-peeled" trait to the packed-refs
file. If it is set, we know that we can trust a missing peel
line to mean that a ref cannot be peeled. Otherwise, we fall
back to assuming nothing.
[commit message and tests by Jeff King <peff@peff.net>]
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-18 12:37:32 +01:00
|
|
|
enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled = PEELED_NONE;
|
2006-11-22 08:36:35 +01:00
|
|
|
|
read_packed_refs: use a strbuf for reading lines
Current code uses a fixed PATH_MAX-sized buffer for reading
packed-refs lines. This is a reasonable guess, in the sense
that git generally cannot work with refs larger than
PATH_MAX. However, there are a few cases where it is not
great:
1. Some systems may have a low value of PATH_MAX, but can
actually handle larger paths in practice. Fixing this
code path probably isn't enough to make them work
completely with long refs, but it is a step in the
right direction.
2. We use fgets, which will happily give us half a line on
the first read, and then the rest of the line on the
second. This is probably OK in practice, because our
refline parser is careful enough to look for the
trailing newline on the first line. The second line may
look like a peeled line to us, but since "^" is illegal
in refnames, it is not likely to come up.
Still, it does not hurt to be more careful.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-10 11:40:07 +01:00
|
|
|
while (strbuf_getwholeline(&line, f, '\n') != EOF) {
|
2006-11-22 08:36:35 +01:00
|
|
|
unsigned char sha1[20];
|
2011-12-12 06:38:09 +01:00
|
|
|
const char *refname;
|
2014-12-10 11:40:36 +01:00
|
|
|
const char *traits;
|
2006-11-22 08:36:35 +01:00
|
|
|
|
2014-12-10 11:40:36 +01:00
|
|
|
if (skip_prefix(line.buf, "# pack-refs with:", &traits)) {
|
pack-refs: add fully-peeled trait
Older versions of pack-refs did not write peel lines for
refs outside of refs/tags. This meant that on reading the
pack-refs file, we might set the REF_KNOWS_PEELED flag for
such a ref, even though we do not know anything about its
peeled value.
The previous commit updated the writer to always peel, no
matter what the ref is. That means that packed-refs files
written by newer versions of git are fine to be read by both
old and new versions of git. However, we still have the
problem of reading packed-refs files written by older
versions of git, or by other implementations which have not
yet learned the same trick.
The simplest fix would be to always unset the
REF_KNOWS_PEELED flag for refs outside of refs/tags that do
not have a peel line (if it has a peel line, we know it is
valid, but we cannot assume a missing peel line means
anything). But that loses an important optimization, as
upload-pack should not need to load the object pointed to by
refs/heads/foo to determine that it is not a tag.
Instead, we add a "fully-peeled" trait to the packed-refs
file. If it is set, we know that we can trust a missing peel
line to mean that a ref cannot be peeled. Otherwise, we fall
back to assuming nothing.
[commit message and tests by Jeff King <peff@peff.net>]
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-18 12:37:32 +01:00
|
|
|
if (strstr(traits, " fully-peeled "))
|
|
|
|
peeled = PEELED_FULLY;
|
|
|
|
else if (strstr(traits, " peeled "))
|
|
|
|
peeled = PEELED_TAGS;
|
2006-11-22 08:36:35 +01:00
|
|
|
/* perhaps other traits later as well */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-12-10 11:40:19 +01:00
|
|
|
refname = parse_ref_line(&line, sha1);
|
2011-12-12 06:38:09 +01:00
|
|
|
if (refname) {
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
int flag = REF_ISPACKED;
|
|
|
|
|
|
|
|
if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
|
read_packed_refs: avoid double-checking sane refs
Prior to d0f810f (refs.c: allow listing and deleting badly
named refs, 2014-09-03), read_packed_refs would barf on any
malformed refnames by virtue of calling create_ref_entry
with the "check" parameter set to 1. That commit loosened
our reading so that we call check_refname_format ourselves
and just set a REF_BAD_NAME flag.
We then call create_ref_entry with the check parameter set
to 0. That function learned to do an extra safety check even
when the check parameter is 0, so that we don't load any
dangerous refnames (like "../../../etc/passwd"). This is
implemented by calling refname_is_safe() in
create_ref_entry().
However, we can observe that refname_is_safe() can only be
true if check_refname_format() also failed. So in the common
case of a sanely named ref, we perform _both_ checks, even
though we know that the latter will never trigger. This has
a noticeable performance impact when the packed-refs file is
large.
Let's drop the refname_is_safe check from create_ref_entry(),
and make it the responsibility of the caller. Of the three
callers that pass a check parameter of "0", two will have
just called check_refname_format(), and can check the
refname-safety only when it fails. The third case,
pack_if_possible_fn, is copying from an existing ref entry,
which must have previously passed our safety check.
With this patch, running "git rev-parse refs/heads/does-not-exist"
on a repo with a large (1.6GB) packed-refs file went from:
real 0m6.768s
user 0m6.340s
sys 0m0.432s
to:
real 0m5.703s
user 0m5.276s
sys 0m0.432s
for a wall-clock speedup of 15%.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-16 11:03:26 +02:00
|
|
|
if (!refname_is_safe(refname))
|
|
|
|
die("packed refname is dangerous: %s", refname);
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
hashclr(sha1);
|
|
|
|
flag |= REF_BAD_NAME | REF_ISBROKEN;
|
|
|
|
}
|
|
|
|
last = create_ref_entry(refname, sha1, flag, 0);
|
pack-refs: add fully-peeled trait
Older versions of pack-refs did not write peel lines for
refs outside of refs/tags. This meant that on reading the
pack-refs file, we might set the REF_KNOWS_PEELED flag for
such a ref, even though we do not know anything about its
peeled value.
The previous commit updated the writer to always peel, no
matter what the ref is. That means that packed-refs files
written by newer versions of git are fine to be read by both
old and new versions of git. However, we still have the
problem of reading packed-refs files written by older
versions of git, or by other implementations which have not
yet learned the same trick.
The simplest fix would be to always unset the
REF_KNOWS_PEELED flag for refs outside of refs/tags that do
not have a peel line (if it has a peel line, we know it is
valid, but we cannot assume a missing peel line means
anything). But that loses an important optimization, as
upload-pack should not need to load the object pointed to by
refs/heads/foo to determine that it is not a tag.
Instead, we add a "fully-peeled" trait to the packed-refs
file. If it is set, we know that we can trust a missing peel
line to mean that a ref cannot be peeled. Otherwise, we fall
back to assuming nothing.
[commit message and tests by Jeff King <peff@peff.net>]
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-18 12:37:32 +01:00
|
|
|
if (peeled == PEELED_FULLY ||
|
2013-11-30 21:55:40 +01:00
|
|
|
(peeled == PEELED_TAGS && starts_with(refname, "refs/tags/")))
|
pack-refs: add fully-peeled trait
Older versions of pack-refs did not write peel lines for
refs outside of refs/tags. This meant that on reading the
pack-refs file, we might set the REF_KNOWS_PEELED flag for
such a ref, even though we do not know anything about its
peeled value.
The previous commit updated the writer to always peel, no
matter what the ref is. That means that packed-refs files
written by newer versions of git are fine to be read by both
old and new versions of git. However, we still have the
problem of reading packed-refs files written by older
versions of git, or by other implementations which have not
yet learned the same trick.
The simplest fix would be to always unset the
REF_KNOWS_PEELED flag for refs outside of refs/tags that do
not have a peel line (if it has a peel line, we know it is
valid, but we cannot assume a missing peel line means
anything). But that loses an important optimization, as
upload-pack should not need to load the object pointed to by
refs/heads/foo to determine that it is not a tag.
Instead, we add a "fully-peeled" trait to the packed-refs
file. If it is set, we know that we can trust a missing peel
line to mean that a ref cannot be peeled. Otherwise, we fall
back to assuming nothing.
[commit message and tests by Jeff King <peff@peff.net>]
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-18 12:37:32 +01:00
|
|
|
last->flag |= REF_KNOWS_PEELED;
|
2012-04-10 07:30:24 +02:00
|
|
|
add_ref(dir, last);
|
2006-11-22 08:36:35 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (last &&
|
read_packed_refs: use a strbuf for reading lines
Current code uses a fixed PATH_MAX-sized buffer for reading
packed-refs lines. This is a reasonable guess, in the sense
that git generally cannot work with refs larger than
PATH_MAX. However, there are a few cases where it is not
great:
1. Some systems may have a low value of PATH_MAX, but can
actually handle larger paths in practice. Fixing this
code path probably isn't enough to make them work
completely with long refs, but it is a step in the
right direction.
2. We use fgets, which will happily give us half a line on
the first read, and then the rest of the line on the
second. This is probably OK in practice, because our
refline parser is careful enough to look for the
trailing newline on the first line. The second line may
look like a peeled line to us, but since "^" is illegal
in refnames, it is not likely to come up.
Still, it does not hurt to be more careful.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-10 11:40:07 +01:00
|
|
|
line.buf[0] == '^' &&
|
|
|
|
line.len == PEELED_LINE_LENGTH &&
|
|
|
|
line.buf[PEELED_LINE_LENGTH - 1] == '\n' &&
|
|
|
|
!get_sha1_hex(line.buf + 1, sha1)) {
|
2015-05-25 20:38:27 +02:00
|
|
|
hashcpy(last->u.value.peeled.hash, sha1);
|
pack-refs: add fully-peeled trait
Older versions of pack-refs did not write peel lines for
refs outside of refs/tags. This meant that on reading the
pack-refs file, we might set the REF_KNOWS_PEELED flag for
such a ref, even though we do not know anything about its
peeled value.
The previous commit updated the writer to always peel, no
matter what the ref is. That means that packed-refs files
written by newer versions of git are fine to be read by both
old and new versions of git. However, we still have the
problem of reading packed-refs files written by older
versions of git, or by other implementations which have not
yet learned the same trick.
The simplest fix would be to always unset the
REF_KNOWS_PEELED flag for refs outside of refs/tags that do
not have a peel line (if it has a peel line, we know it is
valid, but we cannot assume a missing peel line means
anything). But that loses an important optimization, as
upload-pack should not need to load the object pointed to by
refs/heads/foo to determine that it is not a tag.
Instead, we add a "fully-peeled" trait to the packed-refs
file. If it is set, we know that we can trust a missing peel
line to mean that a ref cannot be peeled. Otherwise, we fall
back to assuming nothing.
[commit message and tests by Jeff King <peff@peff.net>]
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-18 12:37:32 +01:00
|
|
|
/*
|
|
|
|
* Regardless of what the file header said,
|
|
|
|
* we definitely know the value of *this*
|
|
|
|
* reference:
|
|
|
|
*/
|
|
|
|
last->flag |= REF_KNOWS_PEELED;
|
|
|
|
}
|
2006-11-22 08:36:35 +01:00
|
|
|
}
|
read_packed_refs: use a strbuf for reading lines
Current code uses a fixed PATH_MAX-sized buffer for reading
packed-refs lines. This is a reasonable guess, in the sense
that git generally cannot work with refs larger than
PATH_MAX. However, there are a few cases where it is not
great:
1. Some systems may have a low value of PATH_MAX, but can
actually handle larger paths in practice. Fixing this
code path probably isn't enough to make them work
completely with long refs, but it is a step in the
right direction.
2. We use fgets, which will happily give us half a line on
the first read, and then the rest of the line on the
second. This is probably OK in practice, because our
refline parser is careful enough to look for the
trailing newline on the first line. The second line may
look like a peeled line to us, but since "^" is illegal
in refnames, it is not likely to come up.
Still, it does not hurt to be more careful.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-10 11:40:07 +01:00
|
|
|
|
|
|
|
strbuf_release(&line);
|
2006-11-22 08:36:35 +01:00
|
|
|
}
|
|
|
|
|
2013-06-20 10:37:45 +02:00
|
|
|
/*
|
|
|
|
* Get the packed_ref_cache for the specified ref_cache, creating it
|
|
|
|
* if necessary.
|
|
|
|
*/
|
|
|
|
static struct packed_ref_cache *get_packed_ref_cache(struct ref_cache *refs)
|
2006-09-30 21:37:37 +02:00
|
|
|
{
|
get_packed_ref_cache: reload packed-refs file when it changes
Once we read the packed-refs file into memory, we cache it
to save work on future ref lookups. However, our cache may
be out of date with respect to what is on disk if another
process is simultaneously packing the refs. Normally it
is acceptable for us to be a little out of date, since there
is no guarantee whether we read the file before or after the
simultaneous update. However, there is an important special
case: our packed-refs file must be up to date with respect
to any loose refs we read. Otherwise, we risk the following
race condition:
0. There exists a loose ref refs/heads/master.
1. Process A starts and looks up the ref "master". It
first checks $GIT_DIR/master, which does not exist. It
then loads (and caches) the packed-refs file to see if
"master" exists in it, which it does not.
2. Meanwhile, process B runs "pack-refs --all --prune". It
creates a new packed-refs file which contains
refs/heads/master, and removes the loose copy at
$GIT_DIR/refs/heads/master.
3. Process A continues its lookup, and eventually tries
$GIT_DIR/refs/heads/master. It sees that the loose ref
is missing, and falls back to the packed-refs file. But
it examines its cached version, which does not have
refs/heads/master. After trying a few other prefixes,
it reports master as a non-existent ref.
There are many variants (e.g., step 1 may involve process A
looking up another ref entirely, so even a fully qualified
refname can fail). One of the most interesting ones is if
"refs/heads/master" is already packed. In that case process
A will not see it as missing, but rather will report
whatever value happened to be in the packed-refs file before
process B repacked (which might be an arbitrarily old
value).
We can fix this by making sure we reload the packed-refs
file from disk after looking at any loose refs. That's
unacceptably slow, so we can check its stat()-validity as a
proxy, and read it only when it appears to have changed.
Reading the packed-refs file after performing any loose-ref
system calls is sufficient because we know the ordering of
the pack-refs process: it always makes sure the newly
written packed-refs file is installed into place before
pruning any loose refs. As long as those operations by B
appear in their executed order to process A, by the time A
sees the missing loose ref, the new packed-refs file must be
in place.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:52 +02:00
|
|
|
const char *packed_refs_file;
|
|
|
|
|
|
|
|
if (*refs->name)
|
|
|
|
packed_refs_file = git_path_submodule(refs->name, "packed-refs");
|
|
|
|
else
|
|
|
|
packed_refs_file = git_path("packed-refs");
|
|
|
|
|
|
|
|
if (refs->packed &&
|
|
|
|
!stat_validity_check(&refs->packed->validity, packed_refs_file))
|
|
|
|
clear_packed_ref_cache(refs);
|
|
|
|
|
2012-04-27 00:27:01 +02:00
|
|
|
if (!refs->packed) {
|
2011-08-13 00:36:25 +02:00
|
|
|
FILE *f;
|
2010-07-07 15:39:11 +02:00
|
|
|
|
2013-06-20 10:37:45 +02:00
|
|
|
refs->packed = xcalloc(1, sizeof(*refs->packed));
|
2013-06-20 10:37:47 +02:00
|
|
|
acquire_packed_ref_cache(refs->packed);
|
2013-06-20 10:37:45 +02:00
|
|
|
refs->packed->root = create_dir_entry(refs, "", 0, 0);
|
2011-08-13 00:36:25 +02:00
|
|
|
f = fopen(packed_refs_file, "r");
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
if (f) {
|
get_packed_ref_cache: reload packed-refs file when it changes
Once we read the packed-refs file into memory, we cache it
to save work on future ref lookups. However, our cache may
be out of date with respect to what is on disk if another
process is simultaneously packing the refs. Normally it
is acceptable for us to be a little out of date, since there
is no guarantee whether we read the file before or after the
simultaneous update. However, there is an important special
case: our packed-refs file must be up to date with respect
to any loose refs we read. Otherwise, we risk the following
race condition:
0. There exists a loose ref refs/heads/master.
1. Process A starts and looks up the ref "master". It
first checks $GIT_DIR/master, which does not exist. It
then loads (and caches) the packed-refs file to see if
"master" exists in it, which it does not.
2. Meanwhile, process B runs "pack-refs --all --prune". It
creates a new packed-refs file which contains
refs/heads/master, and removes the loose copy at
$GIT_DIR/refs/heads/master.
3. Process A continues its lookup, and eventually tries
$GIT_DIR/refs/heads/master. It sees that the loose ref
is missing, and falls back to the packed-refs file. But
it examines its cached version, which does not have
refs/heads/master. After trying a few other prefixes,
it reports master as a non-existent ref.
There are many variants (e.g., step 1 may involve process A
looking up another ref entirely, so even a fully qualified
refname can fail). One of the most interesting ones is if
"refs/heads/master" is already packed. In that case process
A will not see it as missing, but rather will report
whatever value happened to be in the packed-refs file before
process B repacked (which might be an arbitrarily old
value).
We can fix this by making sure we reload the packed-refs
file from disk after looking at any loose refs. That's
unacceptably slow, so we can check its stat()-validity as a
proxy, and read it only when it appears to have changed.
Reading the packed-refs file after performing any loose-ref
system calls is sufficient because we know the ordering of
the pack-refs process: it always makes sure the newly
written packed-refs file is installed into place before
pruning any loose refs. As long as those operations by B
appear in their executed order to process A, by the time A
sees the missing loose ref, the new packed-refs file must be
in place.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:52 +02:00
|
|
|
stat_validity_update(&refs->packed->validity, fileno(f));
|
2013-06-20 10:37:45 +02:00
|
|
|
read_packed_refs(f, get_ref_dir(refs->packed->root));
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
fclose(f);
|
|
|
|
}
|
|
|
|
}
|
2013-06-20 10:37:45 +02:00
|
|
|
return refs->packed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref_dir *get_packed_ref_dir(struct packed_ref_cache *packed_ref_cache)
|
|
|
|
{
|
|
|
|
return get_ref_dir(packed_ref_cache->root);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref_dir *get_packed_refs(struct ref_cache *refs)
|
|
|
|
{
|
|
|
|
return get_packed_ref_dir(get_packed_ref_cache(refs));
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
}
|
|
|
|
|
2012-01-17 06:50:33 +01:00
|
|
|
void add_packed_ref(const char *refname, const unsigned char *sha1)
|
|
|
|
{
|
2013-06-20 10:37:46 +02:00
|
|
|
struct packed_ref_cache *packed_ref_cache =
|
|
|
|
get_packed_ref_cache(&ref_cache);
|
|
|
|
|
|
|
|
if (!packed_ref_cache->lock)
|
|
|
|
die("internal error: packed refs not locked");
|
|
|
|
add_ref(get_packed_ref_dir(packed_ref_cache),
|
2013-04-22 21:52:41 +02:00
|
|
|
create_ref_entry(refname, sha1, REF_ISPACKED, 1));
|
2012-01-17 06:50:33 +01:00
|
|
|
}
|
|
|
|
|
2012-04-25 00:45:10 +02:00
|
|
|
/*
|
2012-04-27 00:27:07 +02:00
|
|
|
* Read the loose references from the namespace dirname into dir
|
|
|
|
* (without recursing). dirname must end with '/'. dir must be the
|
|
|
|
* directory entry corresponding to dirname.
|
2012-04-25 00:45:10 +02:00
|
|
|
*/
|
2012-04-27 00:27:06 +02:00
|
|
|
static void read_loose_refs(const char *dirname, struct ref_dir *dir)
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
{
|
2012-04-27 00:27:06 +02:00
|
|
|
struct ref_cache *refs = dir->ref_cache;
|
2012-04-10 07:30:24 +02:00
|
|
|
DIR *d;
|
2010-07-07 15:39:11 +02:00
|
|
|
const char *path;
|
2012-04-25 00:45:07 +02:00
|
|
|
struct dirent *de;
|
2012-04-25 00:45:10 +02:00
|
|
|
int dirnamelen = strlen(dirname);
|
2012-04-25 00:45:08 +02:00
|
|
|
struct strbuf refname;
|
2010-07-07 15:39:11 +02:00
|
|
|
|
2011-12-12 06:38:17 +01:00
|
|
|
if (*refs->name)
|
2012-04-25 00:45:09 +02:00
|
|
|
path = git_path_submodule(refs->name, "%s", dirname);
|
2010-07-07 15:39:11 +02:00
|
|
|
else
|
2012-04-25 00:45:09 +02:00
|
|
|
path = git_path("%s", dirname);
|
2010-07-07 15:39:11 +02:00
|
|
|
|
2012-04-10 07:30:24 +02:00
|
|
|
d = opendir(path);
|
2012-04-25 00:45:07 +02:00
|
|
|
if (!d)
|
|
|
|
return;
|
|
|
|
|
2012-04-25 00:45:09 +02:00
|
|
|
strbuf_init(&refname, dirnamelen + 257);
|
|
|
|
strbuf_add(&refname, dirname, dirnamelen);
|
2012-04-25 00:45:07 +02:00
|
|
|
|
|
|
|
while ((de = readdir(d)) != NULL) {
|
|
|
|
unsigned char sha1[20];
|
|
|
|
struct stat st;
|
|
|
|
int flag;
|
|
|
|
const char *refdir;
|
|
|
|
|
|
|
|
if (de->d_name[0] == '.')
|
|
|
|
continue;
|
2014-06-30 18:58:25 +02:00
|
|
|
if (ends_with(de->d_name, ".lock"))
|
2012-04-25 00:45:07 +02:00
|
|
|
continue;
|
2012-04-25 00:45:08 +02:00
|
|
|
strbuf_addstr(&refname, de->d_name);
|
2012-04-25 00:45:07 +02:00
|
|
|
refdir = *refs->name
|
2012-04-25 00:45:08 +02:00
|
|
|
? git_path_submodule(refs->name, "%s", refname.buf)
|
|
|
|
: git_path("%s", refname.buf);
|
|
|
|
if (stat(refdir, &st) < 0) {
|
|
|
|
; /* silently ignore */
|
|
|
|
} else if (S_ISDIR(st.st_mode)) {
|
2012-04-25 00:45:10 +02:00
|
|
|
strbuf_addch(&refname, '/');
|
2012-04-27 00:27:07 +02:00
|
|
|
add_entry_to_dir(dir,
|
2012-05-22 20:50:52 +02:00
|
|
|
create_dir_entry(refs, refname.buf,
|
|
|
|
refname.len, 1));
|
2012-04-25 00:45:08 +02:00
|
|
|
} else {
|
2011-12-12 06:38:17 +01:00
|
|
|
if (*refs->name) {
|
2009-02-09 08:27:10 +01:00
|
|
|
hashclr(sha1);
|
2010-07-07 15:39:11 +02:00
|
|
|
flag = 0;
|
2012-04-25 00:45:08 +02:00
|
|
|
if (resolve_gitlink_ref(refs->name, refname.buf, sha1) < 0) {
|
2010-07-07 15:39:11 +02:00
|
|
|
hashclr(sha1);
|
2011-10-19 22:45:50 +02:00
|
|
|
flag |= REF_ISBROKEN;
|
2010-07-07 15:39:11 +02:00
|
|
|
}
|
2014-07-15 21:59:36 +02:00
|
|
|
} else if (read_ref_full(refname.buf,
|
|
|
|
RESOLVE_REF_READING,
|
|
|
|
sha1, &flag)) {
|
2011-11-17 01:54:32 +01:00
|
|
|
hashclr(sha1);
|
|
|
|
flag |= REF_ISBROKEN;
|
|
|
|
}
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
if (check_refname_format(refname.buf,
|
|
|
|
REFNAME_ALLOW_ONELEVEL)) {
|
read_packed_refs: avoid double-checking sane refs
Prior to d0f810f (refs.c: allow listing and deleting badly
named refs, 2014-09-03), read_packed_refs would barf on any
malformed refnames by virtue of calling create_ref_entry
with the "check" parameter set to 1. That commit loosened
our reading so that we call check_refname_format ourselves
and just set a REF_BAD_NAME flag.
We then call create_ref_entry with the check parameter set
to 0. That function learned to do an extra safety check even
when the check parameter is 0, so that we don't load any
dangerous refnames (like "../../../etc/passwd"). This is
implemented by calling refname_is_safe() in
create_ref_entry().
However, we can observe that refname_is_safe() can only be
true if check_refname_format() also failed. So in the common
case of a sanely named ref, we perform _both_ checks, even
though we know that the latter will never trigger. This has
a noticeable performance impact when the packed-refs file is
large.
Let's drop the refname_is_safe check from create_ref_entry(),
and make it the responsibility of the caller. Of the three
callers that pass a check parameter of "0", two will have
just called check_refname_format(), and can check the
refname-safety only when it fails. The third case,
pack_if_possible_fn, is copying from an existing ref entry,
which must have previously passed our safety check.
With this patch, running "git rev-parse refs/heads/does-not-exist"
on a repo with a large (1.6GB) packed-refs file went from:
real 0m6.768s
user 0m6.340s
sys 0m0.432s
to:
real 0m5.703s
user 0m5.276s
sys 0m0.432s
for a wall-clock speedup of 15%.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-16 11:03:26 +02:00
|
|
|
if (!refname_is_safe(refname.buf))
|
|
|
|
die("loose refname is dangerous: %s", refname.buf);
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
hashclr(sha1);
|
|
|
|
flag |= REF_BAD_NAME | REF_ISBROKEN;
|
|
|
|
}
|
2012-04-25 00:45:12 +02:00
|
|
|
add_entry_to_dir(dir,
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
create_ref_entry(refname.buf, sha1, flag, 0));
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
}
|
2012-04-25 00:45:09 +02:00
|
|
|
strbuf_setlen(&refname, dirnamelen);
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
}
|
2012-04-25 00:45:08 +02:00
|
|
|
strbuf_release(&refname);
|
2012-04-25 00:45:07 +02:00
|
|
|
closedir(d);
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:24 +02:00
|
|
|
static struct ref_dir *get_loose_refs(struct ref_cache *refs)
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
{
|
2012-04-27 00:27:01 +02:00
|
|
|
if (!refs->loose) {
|
2012-04-27 00:27:07 +02:00
|
|
|
/*
|
|
|
|
* Mark the top-level directory complete because we
|
|
|
|
* are about to read the only subdirectory that can
|
|
|
|
* hold references:
|
|
|
|
*/
|
2012-05-22 20:50:52 +02:00
|
|
|
refs->loose = create_dir_entry(refs, "", 0, 0);
|
2012-04-27 00:27:07 +02:00
|
|
|
/*
|
|
|
|
* Create an incomplete entry for "refs/":
|
|
|
|
*/
|
|
|
|
add_entry_to_dir(get_ref_dir(refs->loose),
|
2012-05-22 20:50:52 +02:00
|
|
|
create_dir_entry(refs, "refs/", 5, 1));
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
}
|
2012-04-27 00:27:03 +02:00
|
|
|
return get_ref_dir(refs->loose);
|
Start handling references internally as a sorted in-memory list
This also adds some very rudimentary support for the notion of packed
refs. HOWEVER! At this point it isn't used to actually look up a ref
yet, only for listing them (ie "for_each_ref()" and friends see the
packed refs, but none of the other single-ref lookup routines).
Note how we keep two separate lists: one for the loose refs, and one for
the packed refs we read. That's so that we can easily keep the two apart,
and read only one set or the other (and still always make sure that the
loose refs take precedence).
[ From this, it's not actually obvious why we'd keep the two separate
lists, but it's important to have the packed refs on their own list
later on, when I add support for looking up a single loose one.
For that case, we will want to read _just_ the packed refs in case the
single-ref lookup fails, yet we may end up needing the other list at
some point in the future, so keeping them separated is important ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-12 01:37:32 +02:00
|
|
|
}
|
|
|
|
|
2005-09-25 18:59:37 +02:00
|
|
|
/* We allow "recursive" symbolic refs. Only within reason, though */
|
|
|
|
#define MAXDEPTH 5
|
2007-04-10 06:14:26 +02:00
|
|
|
#define MAXREFLEN (1024)
|
|
|
|
|
2011-10-17 20:43:30 +02:00
|
|
|
/*
|
|
|
|
* Called by resolve_gitlink_ref_recursive() after it failed to read
|
2011-12-12 06:38:19 +01:00
|
|
|
* from the loose refs in ref_cache refs. Find <refname> in the
|
|
|
|
* packed-refs file for the submodule.
|
2011-10-17 20:43:30 +02:00
|
|
|
*/
|
2011-12-12 06:38:19 +01:00
|
|
|
static int resolve_gitlink_packed_ref(struct ref_cache *refs,
|
2011-12-12 06:38:10 +01:00
|
|
|
const char *refname, unsigned char *sha1)
|
2007-04-10 06:14:26 +02:00
|
|
|
{
|
2011-10-11 00:56:19 +02:00
|
|
|
struct ref_entry *ref;
|
2012-04-10 07:30:24 +02:00
|
|
|
struct ref_dir *dir = get_packed_refs(refs);
|
2007-04-10 06:14:26 +02:00
|
|
|
|
2012-04-10 07:30:26 +02:00
|
|
|
ref = find_ref(dir, refname);
|
2011-12-12 06:38:19 +01:00
|
|
|
if (ref == NULL)
|
|
|
|
return -1;
|
|
|
|
|
2015-05-25 20:38:27 +02:00
|
|
|
hashcpy(sha1, ref->u.value.oid.hash);
|
2011-12-12 06:38:19 +01:00
|
|
|
return 0;
|
2007-04-10 06:14:26 +02:00
|
|
|
}
|
|
|
|
|
2011-12-12 06:38:19 +01:00
|
|
|
static int resolve_gitlink_ref_recursive(struct ref_cache *refs,
|
2011-12-12 06:38:10 +01:00
|
|
|
const char *refname, unsigned char *sha1,
|
2011-12-12 06:38:09 +01:00
|
|
|
int recursion)
|
2007-04-10 06:14:26 +02:00
|
|
|
{
|
2011-12-12 06:38:20 +01:00
|
|
|
int fd, len;
|
2007-04-10 06:14:26 +02:00
|
|
|
char buffer[128], *p;
|
2014-11-30 09:24:27 +01:00
|
|
|
const char *path;
|
2007-04-10 06:14:26 +02:00
|
|
|
|
2011-12-12 06:38:20 +01:00
|
|
|
if (recursion > MAXDEPTH || strlen(refname) > MAXREFLEN)
|
2007-04-10 06:14:26 +02:00
|
|
|
return -1;
|
2011-12-12 06:38:20 +01:00
|
|
|
path = *refs->name
|
|
|
|
? git_path_submodule(refs->name, "%s", refname)
|
|
|
|
: git_path("%s", refname);
|
|
|
|
fd = open(path, O_RDONLY);
|
2007-04-10 06:14:26 +02:00
|
|
|
if (fd < 0)
|
2011-12-12 06:38:19 +01:00
|
|
|
return resolve_gitlink_packed_ref(refs, refname, sha1);
|
2007-04-10 06:14:26 +02:00
|
|
|
|
|
|
|
len = read(fd, buffer, sizeof(buffer)-1);
|
|
|
|
close(fd);
|
|
|
|
if (len < 0)
|
|
|
|
return -1;
|
|
|
|
while (len && isspace(buffer[len-1]))
|
|
|
|
len--;
|
|
|
|
buffer[len] = 0;
|
|
|
|
|
|
|
|
/* Was it a detached head or an old-fashioned symlink? */
|
2011-12-12 06:38:10 +01:00
|
|
|
if (!get_sha1_hex(buffer, sha1))
|
2007-04-10 06:14:26 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Symref? */
|
|
|
|
if (strncmp(buffer, "ref:", 4))
|
|
|
|
return -1;
|
|
|
|
p = buffer + 4;
|
|
|
|
while (isspace(*p))
|
|
|
|
p++;
|
|
|
|
|
2011-12-12 06:38:20 +01:00
|
|
|
return resolve_gitlink_ref_recursive(refs, p, sha1, recursion+1);
|
2007-04-10 06:14:26 +02:00
|
|
|
}
|
|
|
|
|
2011-12-12 06:38:10 +01:00
|
|
|
int resolve_gitlink_ref(const char *path, const char *refname, unsigned char *sha1)
|
2007-04-10 06:14:26 +02:00
|
|
|
{
|
|
|
|
int len = strlen(path), retval;
|
2011-12-12 06:38:20 +01:00
|
|
|
char *submodule;
|
2011-12-12 06:38:19 +01:00
|
|
|
struct ref_cache *refs;
|
2007-04-10 06:14:26 +02:00
|
|
|
|
|
|
|
while (len && path[len-1] == '/')
|
|
|
|
len--;
|
|
|
|
if (!len)
|
|
|
|
return -1;
|
2011-12-12 06:38:19 +01:00
|
|
|
submodule = xstrndup(path, len);
|
|
|
|
refs = get_ref_cache(submodule);
|
|
|
|
free(submodule);
|
|
|
|
|
2011-12-12 06:38:20 +01:00
|
|
|
retval = resolve_gitlink_ref_recursive(refs, refname, sha1, 0);
|
2007-04-10 06:14:26 +02:00
|
|
|
return retval;
|
|
|
|
}
|
2005-09-25 18:59:37 +02:00
|
|
|
|
2008-09-09 07:10:56 +02:00
|
|
|
/*
|
2013-04-22 21:52:15 +02:00
|
|
|
* Return the ref_entry for the given refname from the packed
|
|
|
|
* references. If it does not exist, return NULL.
|
2008-09-09 07:10:56 +02:00
|
|
|
*/
|
2013-04-22 21:52:15 +02:00
|
|
|
static struct ref_entry *get_packed_ref(const char *refname)
|
2011-09-15 23:10:35 +02:00
|
|
|
{
|
2013-04-22 21:52:41 +02:00
|
|
|
return find_ref(get_packed_refs(&ref_cache), refname);
|
2011-09-15 23:10:35 +02:00
|
|
|
}
|
|
|
|
|
2013-06-19 08:36:26 +02:00
|
|
|
/*
|
|
|
|
* A loose ref file doesn't exist; check for a packed ref. The
|
|
|
|
* options are forwarded from resolve_safe_unsafe().
|
|
|
|
*/
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
static int resolve_missing_loose_ref(const char *refname,
|
|
|
|
int resolve_flags,
|
|
|
|
unsigned char *sha1,
|
|
|
|
int *flags)
|
2013-06-19 08:36:26 +02:00
|
|
|
{
|
|
|
|
struct ref_entry *entry;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The loose reference file does not exist; check for a packed
|
|
|
|
* reference.
|
|
|
|
*/
|
|
|
|
entry = get_packed_ref(refname);
|
|
|
|
if (entry) {
|
2015-05-25 20:38:27 +02:00
|
|
|
hashcpy(sha1, entry->u.value.oid.hash);
|
2014-07-15 21:59:36 +02:00
|
|
|
if (flags)
|
|
|
|
*flags |= REF_ISPACKED;
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
return 0;
|
2013-06-19 08:36:26 +02:00
|
|
|
}
|
|
|
|
/* The reference is not a packed reference, either. */
|
2014-07-15 21:59:36 +02:00
|
|
|
if (resolve_flags & RESOLVE_REF_READING) {
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
errno = ENOENT;
|
|
|
|
return -1;
|
2013-06-19 08:36:26 +02:00
|
|
|
} else {
|
|
|
|
hashclr(sha1);
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
return 0;
|
2013-06-19 08:36:26 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-20 16:42:54 +02:00
|
|
|
/* This function needs to return a meaningful errno on failure */
|
2014-11-30 09:24:28 +01:00
|
|
|
static const char *resolve_ref_unsafe_1(const char *refname,
|
|
|
|
int resolve_flags,
|
|
|
|
unsigned char *sha1,
|
|
|
|
int *flags,
|
|
|
|
struct strbuf *sb_path)
|
2005-07-03 05:23:36 +02:00
|
|
|
{
|
2008-04-27 20:21:58 +02:00
|
|
|
int depth = MAXDEPTH;
|
|
|
|
ssize_t len;
|
2005-09-30 23:08:25 +02:00
|
|
|
char buffer[256];
|
2011-12-12 06:38:09 +01:00
|
|
|
static char refname_buffer[256];
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
int bad_name = 0;
|
2005-09-25 18:59:37 +02:00
|
|
|
|
2014-07-15 21:59:36 +02:00
|
|
|
if (flags)
|
|
|
|
*flags = 0;
|
2006-09-21 07:02:01 +02:00
|
|
|
|
2014-06-20 16:42:54 +02:00
|
|
|
if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
if (flags)
|
|
|
|
*flags |= REF_BAD_NAME;
|
|
|
|
|
|
|
|
if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
|
|
|
|
!refname_is_safe(refname)) {
|
|
|
|
errno = EINVAL;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* dwim_ref() uses REF_ISBROKEN to distinguish between
|
|
|
|
* missing refs and refs that were present but invalid,
|
|
|
|
* to complain about the latter to stderr.
|
|
|
|
*
|
|
|
|
* We don't know whether the ref exists, so don't set
|
|
|
|
* REF_ISBROKEN yet.
|
|
|
|
*/
|
|
|
|
bad_name = 1;
|
2014-06-20 16:42:54 +02:00
|
|
|
}
|
2005-09-30 23:08:25 +02:00
|
|
|
for (;;) {
|
2014-11-30 09:24:28 +01:00
|
|
|
const char *path;
|
2005-09-30 23:08:25 +02:00
|
|
|
struct stat st;
|
|
|
|
char *buf;
|
|
|
|
int fd;
|
2005-07-03 05:23:36 +02:00
|
|
|
|
2014-06-20 16:42:54 +02:00
|
|
|
if (--depth < 0) {
|
|
|
|
errno = ELOOP;
|
2005-09-30 23:08:25 +02:00
|
|
|
return NULL;
|
2014-06-20 16:42:54 +02:00
|
|
|
}
|
2005-09-25 18:59:37 +02:00
|
|
|
|
2014-11-30 09:24:28 +01:00
|
|
|
strbuf_reset(sb_path);
|
|
|
|
strbuf_git_path(sb_path, "%s", refname);
|
|
|
|
path = sb_path->buf;
|
2011-09-15 23:10:35 +02:00
|
|
|
|
resolve_ref_unsafe(): close race condition reading loose refs
We read loose references in two steps. The code is roughly:
lstat()
if error ENOENT:
loose ref is missing; look for corresponding packed ref
else if S_ISLNK:
readlink()
if error:
report failure
else if S_ISDIR:
report failure
else
open()
if error:
report failure
read()
The problem is that the first filesystem call, to lstat(), is not
atomic with the second filesystem call, to readlink() or open().
Therefore it is possible for another process to change the file
between our two calls, for example:
* If the other process deletes the file, our second call will fail
with ENOENT, which we *should* interpret as "loose ref is missing;
look for corresponding packed ref". This can arise if the other
process is pack-refs; it might have just written a new packed-refs
file containing the old contents of the reference then deleted the
loose ref.
* If the other process changes a symlink into a plain file, our call
to readlink() will fail with EINVAL, which we *should* respond to by
trying to open() and read() the file.
The old code treats the reference as missing in both of these cases,
which is incorrect.
So instead, handle errors more selectively: if the result of
readline()/open() is a failure that is inconsistent with the result of
the previous lstat(), then something is fishy. In this case jump back
and start over again with a fresh call to lstat().
One race is still possible and undetected: another process could
change the file from a regular file into a symlink between the call to
lstat and the call to open(). The open() call would silently follow
the symlink and not know that something is wrong. This situation
could be detected in two ways:
* On systems that support O_NOFOLLOW, pass that option to the open().
* On other systems, call fstat() on the fd returned by open() and make
sure that it agrees with the stat info from the original lstat().
However, we don't use symlinks anymore, so this situation is unlikely.
Moreover, it doesn't appear that treating a symlink as a regular file
would have grave consequences; after all, this is exactly how the code
handles non-relative symlinks. So this commit leaves that race
unaddressed.
Note that this solves only the part of the race within
resolve_ref_unsafe. In the situation described above, we may still be
depending on a cached view of the packed-refs file; that race will be
dealt with in a future patch.
This problem was reported and diagnosed by Jeff King <peff@peff.net>,
and this solution is derived from his patch.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-19 08:36:28 +02:00
|
|
|
/*
|
|
|
|
* We might have to loop back here to avoid a race
|
|
|
|
* condition: first we lstat() the file, then we try
|
|
|
|
* to read it as a link or as a file. But if somebody
|
|
|
|
* changes the type of the file (file <-> directory
|
|
|
|
* <-> symlink) between the lstat() and reading, then
|
|
|
|
* we don't want to report that as an error but rather
|
|
|
|
* try again starting with the lstat().
|
|
|
|
*/
|
|
|
|
stat_ref:
|
2005-09-30 23:08:25 +02:00
|
|
|
if (lstat(path, &st) < 0) {
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
if (errno != ENOENT)
|
|
|
|
return NULL;
|
|
|
|
if (resolve_missing_loose_ref(refname, resolve_flags,
|
|
|
|
sha1, flags))
|
2005-09-30 23:08:25 +02:00
|
|
|
return NULL;
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
if (bad_name) {
|
|
|
|
hashclr(sha1);
|
|
|
|
if (flags)
|
|
|
|
*flags |= REF_ISBROKEN;
|
|
|
|
}
|
|
|
|
return refname;
|
2005-09-30 23:08:25 +02:00
|
|
|
}
|
2005-09-25 18:59:37 +02:00
|
|
|
|
2005-09-30 23:08:25 +02:00
|
|
|
/* Follow "normalized" - ie "refs/.." symlinks by hand */
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
|
|
len = readlink(path, buffer, sizeof(buffer)-1);
|
resolve_ref_unsafe(): close race condition reading loose refs
We read loose references in two steps. The code is roughly:
lstat()
if error ENOENT:
loose ref is missing; look for corresponding packed ref
else if S_ISLNK:
readlink()
if error:
report failure
else if S_ISDIR:
report failure
else
open()
if error:
report failure
read()
The problem is that the first filesystem call, to lstat(), is not
atomic with the second filesystem call, to readlink() or open().
Therefore it is possible for another process to change the file
between our two calls, for example:
* If the other process deletes the file, our second call will fail
with ENOENT, which we *should* interpret as "loose ref is missing;
look for corresponding packed ref". This can arise if the other
process is pack-refs; it might have just written a new packed-refs
file containing the old contents of the reference then deleted the
loose ref.
* If the other process changes a symlink into a plain file, our call
to readlink() will fail with EINVAL, which we *should* respond to by
trying to open() and read() the file.
The old code treats the reference as missing in both of these cases,
which is incorrect.
So instead, handle errors more selectively: if the result of
readline()/open() is a failure that is inconsistent with the result of
the previous lstat(), then something is fishy. In this case jump back
and start over again with a fresh call to lstat().
One race is still possible and undetected: another process could
change the file from a regular file into a symlink between the call to
lstat and the call to open(). The open() call would silently follow
the symlink and not know that something is wrong. This situation
could be detected in two ways:
* On systems that support O_NOFOLLOW, pass that option to the open().
* On other systems, call fstat() on the fd returned by open() and make
sure that it agrees with the stat info from the original lstat().
However, we don't use symlinks anymore, so this situation is unlikely.
Moreover, it doesn't appear that treating a symlink as a regular file
would have grave consequences; after all, this is exactly how the code
handles non-relative symlinks. So this commit leaves that race
unaddressed.
Note that this solves only the part of the race within
resolve_ref_unsafe. In the situation described above, we may still be
depending on a cached view of the packed-refs file; that race will be
dealt with in a future patch.
This problem was reported and diagnosed by Jeff King <peff@peff.net>,
and this solution is derived from his patch.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-19 08:36:28 +02:00
|
|
|
if (len < 0) {
|
|
|
|
if (errno == ENOENT || errno == EINVAL)
|
|
|
|
/* inconsistent with lstat; retry */
|
|
|
|
goto stat_ref;
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
2011-09-15 23:10:32 +02:00
|
|
|
buffer[len] = 0;
|
2013-11-30 21:55:40 +01:00
|
|
|
if (starts_with(buffer, "refs/") &&
|
2011-09-15 23:10:33 +02:00
|
|
|
!check_refname_format(buffer, 0)) {
|
2011-12-12 06:38:09 +01:00
|
|
|
strcpy(refname_buffer, buffer);
|
|
|
|
refname = refname_buffer;
|
2014-07-15 21:59:36 +02:00
|
|
|
if (flags)
|
|
|
|
*flags |= REF_ISSYMREF;
|
2014-09-11 03:22:48 +02:00
|
|
|
if (resolve_flags & RESOLVE_REF_NO_RECURSE) {
|
|
|
|
hashclr(sha1);
|
|
|
|
return refname;
|
|
|
|
}
|
2005-09-30 23:08:25 +02:00
|
|
|
continue;
|
|
|
|
}
|
2005-09-25 18:59:37 +02:00
|
|
|
}
|
2005-09-30 23:08:25 +02:00
|
|
|
|
2006-10-02 19:23:53 +02:00
|
|
|
/* Is it a directory? */
|
|
|
|
if (S_ISDIR(st.st_mode)) {
|
|
|
|
errno = EISDIR;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-09-30 23:08:25 +02:00
|
|
|
/*
|
|
|
|
* Anything else, just open it and try to use it as
|
|
|
|
* a ref
|
|
|
|
*/
|
|
|
|
fd = open(path, O_RDONLY);
|
resolve_ref_unsafe(): close race condition reading loose refs
We read loose references in two steps. The code is roughly:
lstat()
if error ENOENT:
loose ref is missing; look for corresponding packed ref
else if S_ISLNK:
readlink()
if error:
report failure
else if S_ISDIR:
report failure
else
open()
if error:
report failure
read()
The problem is that the first filesystem call, to lstat(), is not
atomic with the second filesystem call, to readlink() or open().
Therefore it is possible for another process to change the file
between our two calls, for example:
* If the other process deletes the file, our second call will fail
with ENOENT, which we *should* interpret as "loose ref is missing;
look for corresponding packed ref". This can arise if the other
process is pack-refs; it might have just written a new packed-refs
file containing the old contents of the reference then deleted the
loose ref.
* If the other process changes a symlink into a plain file, our call
to readlink() will fail with EINVAL, which we *should* respond to by
trying to open() and read() the file.
The old code treats the reference as missing in both of these cases,
which is incorrect.
So instead, handle errors more selectively: if the result of
readline()/open() is a failure that is inconsistent with the result of
the previous lstat(), then something is fishy. In this case jump back
and start over again with a fresh call to lstat().
One race is still possible and undetected: another process could
change the file from a regular file into a symlink between the call to
lstat and the call to open(). The open() call would silently follow
the symlink and not know that something is wrong. This situation
could be detected in two ways:
* On systems that support O_NOFOLLOW, pass that option to the open().
* On other systems, call fstat() on the fd returned by open() and make
sure that it agrees with the stat info from the original lstat().
However, we don't use symlinks anymore, so this situation is unlikely.
Moreover, it doesn't appear that treating a symlink as a regular file
would have grave consequences; after all, this is exactly how the code
handles non-relative symlinks. So this commit leaves that race
unaddressed.
Note that this solves only the part of the race within
resolve_ref_unsafe. In the situation described above, we may still be
depending on a cached view of the packed-refs file; that race will be
dealt with in a future patch.
This problem was reported and diagnosed by Jeff King <peff@peff.net>,
and this solution is derived from his patch.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-19 08:36:28 +02:00
|
|
|
if (fd < 0) {
|
|
|
|
if (errno == ENOENT)
|
|
|
|
/* inconsistent with lstat; retry */
|
|
|
|
goto stat_ref;
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
2007-01-08 16:58:08 +01:00
|
|
|
len = read_in_full(fd, buffer, sizeof(buffer)-1);
|
2014-06-20 16:42:54 +02:00
|
|
|
if (len < 0) {
|
|
|
|
int save_errno = errno;
|
|
|
|
close(fd);
|
|
|
|
errno = save_errno;
|
2011-09-15 23:10:34 +02:00
|
|
|
return NULL;
|
2014-06-20 16:42:54 +02:00
|
|
|
}
|
|
|
|
close(fd);
|
2011-09-15 23:10:34 +02:00
|
|
|
while (len && isspace(buffer[len-1]))
|
|
|
|
len--;
|
|
|
|
buffer[len] = '\0';
|
2005-09-30 23:08:25 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Is it a symbolic ref?
|
|
|
|
*/
|
2013-11-30 21:55:40 +01:00
|
|
|
if (!starts_with(buffer, "ref:")) {
|
2013-06-19 08:36:27 +02:00
|
|
|
/*
|
|
|
|
* Please note that FETCH_HEAD has a second
|
|
|
|
* line containing other data.
|
|
|
|
*/
|
|
|
|
if (get_sha1_hex(buffer, sha1) ||
|
|
|
|
(buffer[40] != '\0' && !isspace(buffer[40]))) {
|
2014-07-15 21:59:36 +02:00
|
|
|
if (flags)
|
|
|
|
*flags |= REF_ISBROKEN;
|
2014-06-20 16:42:54 +02:00
|
|
|
errno = EINVAL;
|
2013-06-19 08:36:27 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
if (bad_name) {
|
|
|
|
hashclr(sha1);
|
|
|
|
if (flags)
|
|
|
|
*flags |= REF_ISBROKEN;
|
|
|
|
}
|
2013-06-19 08:36:27 +02:00
|
|
|
return refname;
|
|
|
|
}
|
2014-07-15 21:59:36 +02:00
|
|
|
if (flags)
|
|
|
|
*flags |= REF_ISSYMREF;
|
2005-09-30 23:08:25 +02:00
|
|
|
buf = buffer + 4;
|
2011-09-15 23:10:34 +02:00
|
|
|
while (isspace(*buf))
|
|
|
|
buf++;
|
2014-09-11 03:22:48 +02:00
|
|
|
refname = strcpy(refname_buffer, buf);
|
|
|
|
if (resolve_flags & RESOLVE_REF_NO_RECURSE) {
|
|
|
|
hashclr(sha1);
|
|
|
|
return refname;
|
|
|
|
}
|
2011-09-15 23:10:36 +02:00
|
|
|
if (check_refname_format(buf, REFNAME_ALLOW_ONELEVEL)) {
|
2014-07-15 21:59:36 +02:00
|
|
|
if (flags)
|
|
|
|
*flags |= REF_ISBROKEN;
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
|
|
|
|
if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
|
|
|
|
!refname_is_safe(buf)) {
|
|
|
|
errno = EINVAL;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
bad_name = 1;
|
2011-09-15 23:10:36 +02:00
|
|
|
}
|
2005-07-03 05:23:36 +02:00
|
|
|
}
|
2005-09-30 23:08:25 +02:00
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:28 +01:00
|
|
|
const char *resolve_ref_unsafe(const char *refname, int resolve_flags,
|
|
|
|
unsigned char *sha1, int *flags)
|
|
|
|
{
|
|
|
|
struct strbuf sb_path = STRBUF_INIT;
|
|
|
|
const char *ret = resolve_ref_unsafe_1(refname, resolve_flags,
|
|
|
|
sha1, flags, &sb_path);
|
|
|
|
strbuf_release(&sb_path);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-07-15 21:59:36 +02:00
|
|
|
char *resolve_refdup(const char *ref, int resolve_flags, unsigned char *sha1, int *flags)
|
2011-12-13 15:17:48 +01:00
|
|
|
{
|
2015-02-11 22:39:50 +01:00
|
|
|
return xstrdup_or_null(resolve_ref_unsafe(ref, resolve_flags, sha1, flags));
|
2011-12-13 15:17:48 +01:00
|
|
|
}
|
|
|
|
|
2010-01-20 10:48:25 +01:00
|
|
|
/* The argument to filter_refs */
|
|
|
|
struct ref_filter {
|
|
|
|
const char *pattern;
|
|
|
|
each_ref_fn *fn;
|
|
|
|
void *cb_data;
|
|
|
|
};
|
|
|
|
|
2014-07-15 21:59:36 +02:00
|
|
|
int read_ref_full(const char *refname, int resolve_flags, unsigned char *sha1, int *flags)
|
2005-09-30 23:08:25 +02:00
|
|
|
{
|
2014-07-15 21:59:36 +02:00
|
|
|
if (resolve_ref_unsafe(refname, resolve_flags, sha1, flags))
|
2005-09-30 23:08:25 +02:00
|
|
|
return 0;
|
|
|
|
return -1;
|
2005-07-03 05:23:36 +02:00
|
|
|
}
|
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
int read_ref(const char *refname, unsigned char *sha1)
|
2011-11-13 11:22:14 +01:00
|
|
|
{
|
2014-07-15 21:59:36 +02:00
|
|
|
return read_ref_full(refname, RESOLVE_REF_READING, sha1, NULL);
|
2011-11-13 11:22:14 +01:00
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:13 +02:00
|
|
|
int ref_exists(const char *refname)
|
2006-11-19 07:13:33 +01:00
|
|
|
{
|
2012-04-10 07:30:13 +02:00
|
|
|
unsigned char sha1[20];
|
2014-07-15 21:59:36 +02:00
|
|
|
return !!resolve_ref_unsafe(refname, RESOLVE_REF_READING, sha1, NULL);
|
2006-11-19 07:13:33 +01:00
|
|
|
}
|
|
|
|
|
2015-05-25 20:38:28 +02:00
|
|
|
static int filter_refs(const char *refname, const struct object_id *oid,
|
2015-05-25 20:39:21 +02:00
|
|
|
int flags, void *data)
|
2010-01-20 10:48:25 +01:00
|
|
|
{
|
|
|
|
struct ref_filter *filter = (struct ref_filter *)data;
|
2015-05-25 20:38:28 +02:00
|
|
|
|
2014-02-15 03:01:46 +01:00
|
|
|
if (wildmatch(filter->pattern, refname, 0, NULL))
|
2010-01-20 10:48:25 +01:00
|
|
|
return 0;
|
2015-05-25 20:38:28 +02:00
|
|
|
return filter->fn(refname, oid, flags, filter->cb_data);
|
2010-01-20 10:48:25 +01:00
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:20 +02:00
|
|
|
enum peel_status {
|
|
|
|
/* object was peeled successfully: */
|
|
|
|
PEEL_PEELED = 0,
|
|
|
|
|
|
|
|
/*
|
|
|
|
* object cannot be peeled because the named object (or an
|
|
|
|
* object referred to by a tag in the peel chain), does not
|
|
|
|
* exist.
|
|
|
|
*/
|
|
|
|
PEEL_INVALID = -1,
|
|
|
|
|
|
|
|
/* object cannot be peeled because it is not a tag: */
|
2013-04-22 21:52:22 +02:00
|
|
|
PEEL_NON_TAG = -2,
|
|
|
|
|
|
|
|
/* ref_entry contains no peeled value because it is a symref: */
|
|
|
|
PEEL_IS_SYMREF = -3,
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ref_entry cannot be peeled because it is broken (i.e., the
|
|
|
|
* symbolic reference cannot even be resolved to an object
|
|
|
|
* name):
|
|
|
|
*/
|
|
|
|
PEEL_BROKEN = -4
|
2013-04-22 21:52:20 +02:00
|
|
|
};
|
|
|
|
|
2013-04-22 21:52:19 +02:00
|
|
|
/*
|
|
|
|
* Peel the named object; i.e., if the object is a tag, resolve the
|
2013-04-22 21:52:20 +02:00
|
|
|
* tag recursively until a non-tag is found. If successful, store the
|
|
|
|
* result to sha1 and return PEEL_PEELED. If the object is not a tag
|
|
|
|
* or is not valid, return PEEL_NON_TAG or PEEL_INVALID, respectively,
|
|
|
|
* and leave sha1 unchanged.
|
2013-04-22 21:52:19 +02:00
|
|
|
*/
|
2013-04-22 21:52:20 +02:00
|
|
|
static enum peel_status peel_object(const unsigned char *name, unsigned char *sha1)
|
2013-04-22 21:52:19 +02:00
|
|
|
{
|
|
|
|
struct object *o = lookup_unknown_object(name);
|
|
|
|
|
|
|
|
if (o->type == OBJ_NONE) {
|
|
|
|
int type = sha1_object_info(name, NULL);
|
add object_as_type helper for casting objects
When we call lookup_commit, lookup_tree, etc, the logic goes
something like:
1. Look for an existing object struct. If we don't have
one, allocate and return a new one.
2. Double check that any object we have is the expected
type (and complain and return NULL otherwise).
3. Convert an object with type OBJ_NONE (from a prior
call to lookup_unknown_object) to the expected type.
We can encapsulate steps 2 and 3 in a helper function which
checks whether we have the expected object type, converts
OBJ_NONE as appropriate, and returns the object.
Not only does this shorten the code, but it also provides
one central location for converting OBJ_NONE objects into
objects of other types. Future patches will use that to
enforce type-specific invariants.
Since this is a refactoring, we would want it to behave
exactly as the current code. It takes a little reasoning to
see that this is the case:
- for lookup_{commit,tree,etc} functions, we are just
pulling steps 2 and 3 into a function that does the same
thing.
- for the call in peel_object, we currently only do step 3
(but we want to consolidate it with the others, as
mentioned above). However, step 2 is a noop here, as the
surrounding conditional makes sure we have OBJ_NONE
(which we want to keep to avoid an extraneous call to
sha1_object_info).
- for the call in lookup_commit_reference_gently, we are
currently doing step 2 but not step 3. However, step 3
is a noop here. The object we got will have just come
from deref_tag, which must have figured out the type for
each object in order to know when to stop peeling.
Therefore the type will never be OBJ_NONE.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-07-13 08:42:03 +02:00
|
|
|
if (type < 0 || !object_as_type(o, type, 0))
|
2013-04-22 21:52:20 +02:00
|
|
|
return PEEL_INVALID;
|
2013-04-22 21:52:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (o->type != OBJ_TAG)
|
2013-04-22 21:52:20 +02:00
|
|
|
return PEEL_NON_TAG;
|
2013-04-22 21:52:19 +02:00
|
|
|
|
|
|
|
o = deref_tag_noverify(o);
|
|
|
|
if (!o)
|
2013-04-22 21:52:20 +02:00
|
|
|
return PEEL_INVALID;
|
2013-04-22 21:52:19 +02:00
|
|
|
|
|
|
|
hashcpy(sha1, o->sha1);
|
2013-04-22 21:52:20 +02:00
|
|
|
return PEEL_PEELED;
|
2013-04-22 21:52:19 +02:00
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:22 +02:00
|
|
|
/*
|
2013-04-22 21:52:37 +02:00
|
|
|
* Peel the entry (if possible) and return its new peel_status. If
|
|
|
|
* repeel is true, re-peel the entry even if there is an old peeled
|
|
|
|
* value that is already stored in it.
|
repack_without_ref(): write peeled refs in the rewritten file
When a reference that existed in the packed-refs file is deleted, the
packed-refs file must be rewritten. Previously, the file was
rewritten without any peeled refs, even if the file contained peeled
refs when it was read. This was not a bug, because the packed-refs
file header didn't claim that the file contained peeled values. But
it had a performance cost, because the repository would lose the
benefit of having precomputed peeled references until pack-refs was
run again.
Teach repack_without_ref() to write peeled refs to the packed-refs
file (regardless of whether they were present in the old version of
the file).
This means that if the old version of the packed-refs file was not
fully peeled, then repack_without_ref() will have to peel references.
To avoid the expense of reading lots of loose references, we take two
shortcuts relative to pack-refs:
* If the peeled value of a reference is already known (i.e., because
it was read from the old version of the packed-refs file), then
output that peeled value again without any checks. This is the
usual code path and should avoid any noticeable overhead. (This is
different than pack-refs, which always re-peels references.)
* We don't verify that the packed ref is still current. It could be
that a packed references is overridden by a loose reference, in
which case the packed ref is no longer needed and might even refer
to an object that has been garbage collected. But we don't check;
instead, we just try to peel all references. If peeling is
successful, the peeled value is written out (even though it might
not be needed any more); if not, then the reference is silently
omitted from the output.
The extra overhead of peeling references in repack_without_ref()
should only be incurred the first time the packed-refs file is written
by a version of Git that knows about the "fully-peeled" attribute.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:29 +02:00
|
|
|
*
|
|
|
|
* It is OK to call this function with a packed reference entry that
|
|
|
|
* might be stale and might even refer to an object that has since
|
|
|
|
* been garbage-collected. In such a case, if the entry has
|
|
|
|
* REF_KNOWS_PEELED then leave the status unchanged and return
|
|
|
|
* PEEL_PEELED or PEEL_NON_TAG; otherwise, return PEEL_INVALID.
|
2013-04-22 21:52:22 +02:00
|
|
|
*/
|
2013-04-22 21:52:37 +02:00
|
|
|
static enum peel_status peel_entry(struct ref_entry *entry, int repeel)
|
2013-04-22 21:52:22 +02:00
|
|
|
{
|
|
|
|
enum peel_status status;
|
|
|
|
|
2013-04-22 21:52:37 +02:00
|
|
|
if (entry->flag & REF_KNOWS_PEELED) {
|
|
|
|
if (repeel) {
|
|
|
|
entry->flag &= ~REF_KNOWS_PEELED;
|
2015-05-25 20:38:27 +02:00
|
|
|
oidclr(&entry->u.value.peeled);
|
2013-04-22 21:52:37 +02:00
|
|
|
} else {
|
2015-05-25 20:38:27 +02:00
|
|
|
return is_null_oid(&entry->u.value.peeled) ?
|
2013-04-22 21:52:37 +02:00
|
|
|
PEEL_NON_TAG : PEEL_PEELED;
|
|
|
|
}
|
|
|
|
}
|
2013-04-22 21:52:22 +02:00
|
|
|
if (entry->flag & REF_ISBROKEN)
|
|
|
|
return PEEL_BROKEN;
|
|
|
|
if (entry->flag & REF_ISSYMREF)
|
|
|
|
return PEEL_IS_SYMREF;
|
|
|
|
|
2015-05-25 20:38:27 +02:00
|
|
|
status = peel_object(entry->u.value.oid.hash, entry->u.value.peeled.hash);
|
2013-04-22 21:52:22 +02:00
|
|
|
if (status == PEEL_PEELED || status == PEEL_NON_TAG)
|
|
|
|
entry->flag |= REF_KNOWS_PEELED;
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
int peel_ref(const char *refname, unsigned char *sha1)
|
2006-11-19 22:22:44 +01:00
|
|
|
{
|
|
|
|
int flag;
|
|
|
|
unsigned char base[20];
|
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
if (current_ref && (current_ref->name == refname
|
2013-04-22 21:52:22 +02:00
|
|
|
|| !strcmp(current_ref->name, refname))) {
|
2013-04-22 21:52:37 +02:00
|
|
|
if (peel_entry(current_ref, 0))
|
2013-04-22 21:52:22 +02:00
|
|
|
return -1;
|
2015-05-25 20:38:27 +02:00
|
|
|
hashcpy(sha1, current_ref->u.value.peeled.hash);
|
2013-04-22 21:52:22 +02:00
|
|
|
return 0;
|
2008-02-24 09:07:22 +01:00
|
|
|
}
|
|
|
|
|
2014-07-15 21:59:36 +02:00
|
|
|
if (read_ref_full(refname, RESOLVE_REF_READING, base, &flag))
|
2006-11-19 22:22:44 +01:00
|
|
|
return -1;
|
|
|
|
|
2013-04-22 21:52:22 +02:00
|
|
|
/*
|
|
|
|
* If the reference is packed, read its ref_entry from the
|
|
|
|
* cache in the hope that we already know its peeled value.
|
|
|
|
* We only try this optimization on packed references because
|
|
|
|
* (a) forcing the filling of the loose reference cache could
|
|
|
|
* be expensive and (b) loose references anyway usually do not
|
|
|
|
* have REF_KNOWS_PEELED.
|
|
|
|
*/
|
|
|
|
if (flag & REF_ISPACKED) {
|
2013-04-22 21:52:16 +02:00
|
|
|
struct ref_entry *r = get_packed_ref(refname);
|
2013-04-22 21:52:22 +02:00
|
|
|
if (r) {
|
2013-04-22 21:52:37 +02:00
|
|
|
if (peel_entry(r, 0))
|
2013-04-22 21:52:22 +02:00
|
|
|
return -1;
|
2015-05-25 20:38:27 +02:00
|
|
|
hashcpy(sha1, r->u.value.peeled.hash);
|
2011-09-30 00:11:42 +02:00
|
|
|
return 0;
|
2006-11-19 22:22:44 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:19 +02:00
|
|
|
return peel_object(base, sha1);
|
2006-11-19 22:22:44 +01:00
|
|
|
}
|
|
|
|
|
2012-04-10 07:30:13 +02:00
|
|
|
struct warn_if_dangling_data {
|
|
|
|
FILE *fp;
|
|
|
|
const char *refname;
|
2014-05-23 12:30:25 +02:00
|
|
|
const struct string_list *refnames;
|
2012-04-10 07:30:13 +02:00
|
|
|
const char *msg_fmt;
|
|
|
|
};
|
|
|
|
|
2015-05-25 20:38:28 +02:00
|
|
|
static int warn_if_dangling_symref(const char *refname, const struct object_id *oid,
|
2012-04-10 07:30:13 +02:00
|
|
|
int flags, void *cb_data)
|
|
|
|
{
|
|
|
|
struct warn_if_dangling_data *d = cb_data;
|
|
|
|
const char *resolves_to;
|
2015-05-25 20:39:21 +02:00
|
|
|
struct object_id junk;
|
2012-04-10 07:30:13 +02:00
|
|
|
|
|
|
|
if (!(flags & REF_ISSYMREF))
|
|
|
|
return 0;
|
|
|
|
|
2015-05-25 20:39:21 +02:00
|
|
|
resolves_to = resolve_ref_unsafe(refname, 0, junk.hash, NULL);
|
2014-05-23 12:30:25 +02:00
|
|
|
if (!resolves_to
|
|
|
|
|| (d->refname
|
|
|
|
? strcmp(resolves_to, d->refname)
|
|
|
|
: !string_list_has_string(d->refnames, resolves_to))) {
|
2012-04-10 07:30:13 +02:00
|
|
|
return 0;
|
2014-05-23 12:30:25 +02:00
|
|
|
}
|
2012-04-10 07:30:13 +02:00
|
|
|
|
|
|
|
fprintf(d->fp, d->msg_fmt, refname);
|
2012-05-02 22:51:35 +02:00
|
|
|
fputc('\n', d->fp);
|
2012-04-10 07:30:13 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname)
|
|
|
|
{
|
|
|
|
struct warn_if_dangling_data data;
|
|
|
|
|
|
|
|
data.fp = fp;
|
|
|
|
data.refname = refname;
|
2014-05-23 12:30:25 +02:00
|
|
|
data.refnames = NULL;
|
|
|
|
data.msg_fmt = msg_fmt;
|
|
|
|
for_each_rawref(warn_if_dangling_symref, &data);
|
|
|
|
}
|
|
|
|
|
|
|
|
void warn_dangling_symrefs(FILE *fp, const char *msg_fmt, const struct string_list *refnames)
|
|
|
|
{
|
|
|
|
struct warn_if_dangling_data data;
|
|
|
|
|
|
|
|
data.fp = fp;
|
|
|
|
data.refname = NULL;
|
|
|
|
data.refnames = refnames;
|
2012-04-10 07:30:13 +02:00
|
|
|
data.msg_fmt = msg_fmt;
|
|
|
|
for_each_rawref(warn_if_dangling_symref, &data);
|
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:11 +02:00
|
|
|
/*
|
2013-04-22 21:52:40 +02:00
|
|
|
* Call fn for each reference in the specified ref_cache, omitting
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
* references not in the containing_dir of base. fn is called for all
|
|
|
|
* references, including broken ones. If fn ever returns a non-zero
|
2013-04-22 21:52:11 +02:00
|
|
|
* value, stop the iteration and return that value; otherwise, return
|
|
|
|
* 0.
|
|
|
|
*/
|
2013-04-22 21:52:40 +02:00
|
|
|
static int do_for_each_entry(struct ref_cache *refs, const char *base,
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
each_ref_entry_fn fn, void *cb_data)
|
2005-07-03 05:23:36 +02:00
|
|
|
{
|
for_each_ref: load all loose refs before packed refs
If we are iterating through the refs using for_each_ref (or
any of its sister functions), we can get into a race
condition with a simultaneous "pack-refs --prune" that looks
like this:
0. We have a large number of loose refs, and a few packed
refs. refs/heads/z/foo is loose, with no matching entry
in the packed-refs file.
1. Process A starts iterating through the refs. It loads
the packed-refs file from disk, then starts lazily
traversing through the loose ref directories.
2. Process B, running "pack-refs --prune", writes out the
new packed-refs file. It then deletes the newly packed
refs, including refs/heads/z/foo.
3. Meanwhile, process A has finally gotten to
refs/heads/z (it traverses alphabetically). It
descends, but finds nothing there. It checks its
cached view of the packed-refs file, but it does not
mention anything in "refs/heads/z/" at all (it predates
the new file written by B in step 2).
The traversal completes successfully without mentioning
refs/heads/z/foo at all (the name, of course, isn't
important; but the more refs you have and the farther down
the alphabetical list a ref is, the more likely it is to hit
the race). If refs/heads/z/foo did exist in the packed refs
file at state 0, we would see an entry for it, but it would
show whatever sha1 the ref had the last time it was packed
(which could be an arbitrarily long time ago).
This can be especially dangerous when process A is "git
prune", as it means our set of reachable tips will be
incomplete, and we may erroneously prune objects reachable
from that tip (the same thing can happen if "repack -ad" is
used, as it simply drops unreachable objects that are
packed).
This patch solves it by loading all of the loose refs for
our traversal into our in-memory cache, and then refreshing
the packed-refs cache. Because a pack-refs writer will
always put the new packed-refs file into place before
starting the prune, we know that any loose refs we fail to
see will either truly be missing, or will have already been
put in the packed-refs file by the time we refresh.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:53 +02:00
|
|
|
struct packed_ref_cache *packed_ref_cache;
|
|
|
|
struct ref_dir *loose_dir;
|
|
|
|
struct ref_dir *packed_dir;
|
2012-04-10 07:30:27 +02:00
|
|
|
int retval = 0;
|
|
|
|
|
for_each_ref: load all loose refs before packed refs
If we are iterating through the refs using for_each_ref (or
any of its sister functions), we can get into a race
condition with a simultaneous "pack-refs --prune" that looks
like this:
0. We have a large number of loose refs, and a few packed
refs. refs/heads/z/foo is loose, with no matching entry
in the packed-refs file.
1. Process A starts iterating through the refs. It loads
the packed-refs file from disk, then starts lazily
traversing through the loose ref directories.
2. Process B, running "pack-refs --prune", writes out the
new packed-refs file. It then deletes the newly packed
refs, including refs/heads/z/foo.
3. Meanwhile, process A has finally gotten to
refs/heads/z (it traverses alphabetically). It
descends, but finds nothing there. It checks its
cached view of the packed-refs file, but it does not
mention anything in "refs/heads/z/" at all (it predates
the new file written by B in step 2).
The traversal completes successfully without mentioning
refs/heads/z/foo at all (the name, of course, isn't
important; but the more refs you have and the farther down
the alphabetical list a ref is, the more likely it is to hit
the race). If refs/heads/z/foo did exist in the packed refs
file at state 0, we would see an entry for it, but it would
show whatever sha1 the ref had the last time it was packed
(which could be an arbitrarily long time ago).
This can be especially dangerous when process A is "git
prune", as it means our set of reachable tips will be
incomplete, and we may erroneously prune objects reachable
from that tip (the same thing can happen if "repack -ad" is
used, as it simply drops unreachable objects that are
packed).
This patch solves it by loading all of the loose refs for
our traversal into our in-memory cache, and then refreshing
the packed-refs cache. Because a pack-refs writer will
always put the new packed-refs file into place before
starting the prune, we know that any loose refs we fail to
see will either truly be missing, or will have already been
put in the packed-refs file by the time we refresh.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:53 +02:00
|
|
|
/*
|
|
|
|
* We must make sure that all loose refs are read before accessing the
|
|
|
|
* packed-refs file; this avoids a race condition in which loose refs
|
|
|
|
* are migrated to the packed-refs file by a simultaneous process, but
|
|
|
|
* our in-memory view is from before the migration. get_packed_ref_cache()
|
|
|
|
* takes care of making sure our view is up to date with what is on
|
|
|
|
* disk.
|
|
|
|
*/
|
|
|
|
loose_dir = get_loose_refs(refs);
|
2012-04-10 07:30:27 +02:00
|
|
|
if (base && *base) {
|
|
|
|
loose_dir = find_containing_dir(loose_dir, base, 0);
|
|
|
|
}
|
for_each_ref: load all loose refs before packed refs
If we are iterating through the refs using for_each_ref (or
any of its sister functions), we can get into a race
condition with a simultaneous "pack-refs --prune" that looks
like this:
0. We have a large number of loose refs, and a few packed
refs. refs/heads/z/foo is loose, with no matching entry
in the packed-refs file.
1. Process A starts iterating through the refs. It loads
the packed-refs file from disk, then starts lazily
traversing through the loose ref directories.
2. Process B, running "pack-refs --prune", writes out the
new packed-refs file. It then deletes the newly packed
refs, including refs/heads/z/foo.
3. Meanwhile, process A has finally gotten to
refs/heads/z (it traverses alphabetically). It
descends, but finds nothing there. It checks its
cached view of the packed-refs file, but it does not
mention anything in "refs/heads/z/" at all (it predates
the new file written by B in step 2).
The traversal completes successfully without mentioning
refs/heads/z/foo at all (the name, of course, isn't
important; but the more refs you have and the farther down
the alphabetical list a ref is, the more likely it is to hit
the race). If refs/heads/z/foo did exist in the packed refs
file at state 0, we would see an entry for it, but it would
show whatever sha1 the ref had the last time it was packed
(which could be an arbitrarily long time ago).
This can be especially dangerous when process A is "git
prune", as it means our set of reachable tips will be
incomplete, and we may erroneously prune objects reachable
from that tip (the same thing can happen if "repack -ad" is
used, as it simply drops unreachable objects that are
packed).
This patch solves it by loading all of the loose refs for
our traversal into our in-memory cache, and then refreshing
the packed-refs cache. Because a pack-refs writer will
always put the new packed-refs file into place before
starting the prune, we know that any loose refs we fail to
see will either truly be missing, or will have already been
put in the packed-refs file by the time we refresh.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:53 +02:00
|
|
|
if (loose_dir)
|
|
|
|
prime_ref_dir(loose_dir);
|
|
|
|
|
|
|
|
packed_ref_cache = get_packed_ref_cache(refs);
|
2013-06-20 10:37:48 +02:00
|
|
|
acquire_packed_ref_cache(packed_ref_cache);
|
for_each_ref: load all loose refs before packed refs
If we are iterating through the refs using for_each_ref (or
any of its sister functions), we can get into a race
condition with a simultaneous "pack-refs --prune" that looks
like this:
0. We have a large number of loose refs, and a few packed
refs. refs/heads/z/foo is loose, with no matching entry
in the packed-refs file.
1. Process A starts iterating through the refs. It loads
the packed-refs file from disk, then starts lazily
traversing through the loose ref directories.
2. Process B, running "pack-refs --prune", writes out the
new packed-refs file. It then deletes the newly packed
refs, including refs/heads/z/foo.
3. Meanwhile, process A has finally gotten to
refs/heads/z (it traverses alphabetically). It
descends, but finds nothing there. It checks its
cached view of the packed-refs file, but it does not
mention anything in "refs/heads/z/" at all (it predates
the new file written by B in step 2).
The traversal completes successfully without mentioning
refs/heads/z/foo at all (the name, of course, isn't
important; but the more refs you have and the farther down
the alphabetical list a ref is, the more likely it is to hit
the race). If refs/heads/z/foo did exist in the packed refs
file at state 0, we would see an entry for it, but it would
show whatever sha1 the ref had the last time it was packed
(which could be an arbitrarily long time ago).
This can be especially dangerous when process A is "git
prune", as it means our set of reachable tips will be
incomplete, and we may erroneously prune objects reachable
from that tip (the same thing can happen if "repack -ad" is
used, as it simply drops unreachable objects that are
packed).
This patch solves it by loading all of the loose refs for
our traversal into our in-memory cache, and then refreshing
the packed-refs cache. Because a pack-refs writer will
always put the new packed-refs file into place before
starting the prune, we know that any loose refs we fail to
see will either truly be missing, or will have already been
put in the packed-refs file by the time we refresh.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-20 10:37:53 +02:00
|
|
|
packed_dir = get_packed_ref_dir(packed_ref_cache);
|
2012-04-10 07:30:27 +02:00
|
|
|
if (base && *base) {
|
|
|
|
packed_dir = find_containing_dir(packed_dir, base, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (packed_dir && loose_dir) {
|
|
|
|
sort_ref_dir(packed_dir);
|
|
|
|
sort_ref_dir(loose_dir);
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = do_for_each_entry_in_dirs(
|
|
|
|
packed_dir, loose_dir, fn, cb_data);
|
2012-04-10 07:30:27 +02:00
|
|
|
} else if (packed_dir) {
|
|
|
|
sort_ref_dir(packed_dir);
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = do_for_each_entry_in_dir(
|
|
|
|
packed_dir, 0, fn, cb_data);
|
2012-04-10 07:30:27 +02:00
|
|
|
} else if (loose_dir) {
|
|
|
|
sort_ref_dir(loose_dir);
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
retval = do_for_each_entry_in_dir(
|
|
|
|
loose_dir, 0, fn, cb_data);
|
2012-04-10 07:30:27 +02:00
|
|
|
}
|
|
|
|
|
2013-06-20 10:37:48 +02:00
|
|
|
release_packed_ref_cache(packed_ref_cache);
|
2012-04-10 07:30:27 +02:00
|
|
|
return retval;
|
2005-07-03 05:23:36 +02:00
|
|
|
}
|
|
|
|
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
/*
|
2013-04-22 21:52:40 +02:00
|
|
|
* Call fn for each reference in the specified ref_cache for which the
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
* refname begins with base. If trim is non-zero, then trim that many
|
|
|
|
* characters off the beginning of each refname before passing the
|
|
|
|
* refname to fn. flags can be DO_FOR_EACH_INCLUDE_BROKEN to include
|
|
|
|
* broken references in the iteration. If fn ever returns a non-zero
|
|
|
|
* value, stop the iteration and return that value; otherwise, return
|
|
|
|
* 0.
|
|
|
|
*/
|
2013-04-22 21:52:40 +02:00
|
|
|
static int do_for_each_ref(struct ref_cache *refs, const char *base,
|
|
|
|
each_ref_fn fn, int trim, int flags, void *cb_data)
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
{
|
|
|
|
struct ref_entry_cb data;
|
|
|
|
data.base = base;
|
|
|
|
data.trim = trim;
|
|
|
|
data.flags = flags;
|
|
|
|
data.fn = fn;
|
|
|
|
data.cb_data = cb_data;
|
|
|
|
|
2015-03-20 19:43:06 +01:00
|
|
|
if (ref_paranoia < 0)
|
|
|
|
ref_paranoia = git_env_bool("GIT_REF_PARANOIA", 0);
|
|
|
|
if (ref_paranoia)
|
|
|
|
data.flags |= DO_FOR_EACH_INCLUDE_BROKEN;
|
|
|
|
|
2013-04-22 21:52:40 +02:00
|
|
|
return do_for_each_entry(refs, base, do_one_ref, &data);
|
refs: change the internal reference-iteration API
Establish an internal API for iterating over references, which gives
the callback functions direct access to the ref_entry structure
describing the reference. (Do not change the iteration API that is
exposed outside of the module.)
Define a new internal callback signature
int each_ref_entry_fn(struct ref_entry *entry, void *cb_data)
Change do_for_each_ref_in_dir() and do_for_each_ref_in_dirs() to
accept each_ref_entry_fn callbacks, and rename them to
do_for_each_entry_in_dir() and do_for_each_entry_in_dirs(),
respectively. Adapt their callers accordingly.
Add a new function do_for_each_entry() analogous to do_for_each_ref()
but using the new callback style.
Change do_one_ref() into an each_ref_entry_fn that does some
bookkeeping and then calls a wrapped each_ref_fn.
Reimplement do_for_each_ref() in terms of do_for_each_entry(), using
do_one_ref() as an adapter.
Please note that the responsibility for setting current_ref remains in
do_one_ref(), which means that current_ref is *not* set when iterating
over references via the new internal API. This is not a disadvantage,
because current_ref is not needed by callers of the internal API (they
receive a pointer to the current ref_entry anyway). But more
importantly, this change prevents peel_ref() from returning invalid
results in the following scenario:
When iterating via the external API, the iteration always includes
both packed and loose references, and in particular never presents a
packed ref if there is a loose ref with the same name. The internal
API, on the other hand, gives the option to iterate over only the
packed references. During such an iteration, there is no check
whether the packed ref might be hidden by a loose ref of the same
name. But until now the packed ref was recorded in current_ref during
the iteration. So if peel_ref() were called with the reference name
corresponding to current ref, it would return the peeled version of
the packed ref even though there might be a loose ref that peels to a
different value. This scenario doesn't currently occur in the code,
but fix it to prevent things from breaking in a very confusing way in
the future.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-22 21:52:23 +02:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:11 +02:00
|
|
|
static int do_head_ref(const char *submodule, each_ref_fn fn, void *cb_data)
|
2005-07-05 20:31:32 +02:00
|
|
|
{
|
2015-05-25 20:38:28 +02:00
|
|
|
struct object_id oid;
|
2006-09-21 07:02:01 +02:00
|
|
|
int flag;
|
|
|
|
|
2010-07-07 15:39:11 +02:00
|
|
|
if (submodule) {
|
2015-05-25 20:38:28 +02:00
|
|
|
if (resolve_gitlink_ref(submodule, "HEAD", oid.hash) == 0)
|
|
|
|
return fn("HEAD", &oid, 0, cb_data);
|
2010-07-07 15:39:11 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-05-25 20:38:28 +02:00
|
|
|
if (!read_ref_full("HEAD", RESOLVE_REF_READING, oid.hash, &flag))
|
|
|
|
return fn("HEAD", &oid, flag, cb_data);
|
2010-07-07 15:39:11 +02:00
|
|
|
|
2005-07-06 00:45:00 +02:00
|
|
|
return 0;
|
2005-07-05 20:31:32 +02:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:11 +02:00
|
|
|
int head_ref(each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
|
|
|
return do_head_ref(NULL, fn, cb_data);
|
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
|
|
|
return do_head_ref(submodule, fn, cb_data);
|
|
|
|
}
|
|
|
|
|
2006-09-21 06:47:42 +02:00
|
|
|
int for_each_ref(each_ref_fn fn, void *cb_data)
|
2005-07-03 05:23:36 +02:00
|
|
|
{
|
2013-04-22 21:52:41 +02:00
|
|
|
return do_for_each_ref(&ref_cache, "", fn, 0, 0, cb_data);
|
2006-05-14 03:43:00 +02:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
2013-04-22 21:52:40 +02:00
|
|
|
return do_for_each_ref(get_ref_cache(submodule), "", fn, 0, 0, cb_data);
|
2006-05-14 03:43:00 +02:00
|
|
|
}
|
|
|
|
|
2009-03-30 05:07:15 +02:00
|
|
|
int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
2013-04-22 21:52:41 +02:00
|
|
|
return do_for_each_ref(&ref_cache, prefix, fn, strlen(prefix), 0, cb_data);
|
2009-03-30 05:07:15 +02:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
int for_each_ref_in_submodule(const char *submodule, const char *prefix,
|
|
|
|
each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
2013-04-22 21:52:40 +02:00
|
|
|
return do_for_each_ref(get_ref_cache(submodule), prefix, fn, strlen(prefix), 0, cb_data);
|
2009-03-30 05:07:15 +02:00
|
|
|
}
|
|
|
|
|
2006-09-21 06:47:42 +02:00
|
|
|
int for_each_tag_ref(each_ref_fn fn, void *cb_data)
|
2006-05-14 03:43:00 +02:00
|
|
|
{
|
2009-03-30 05:07:15 +02:00
|
|
|
return for_each_ref_in("refs/tags/", fn, cb_data);
|
2006-05-14 03:43:00 +02:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
int for_each_tag_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
|
|
|
return for_each_ref_in_submodule(submodule, "refs/tags/", fn, cb_data);
|
|
|
|
}
|
|
|
|
|
2006-09-21 06:47:42 +02:00
|
|
|
int for_each_branch_ref(each_ref_fn fn, void *cb_data)
|
2006-05-14 03:43:00 +02:00
|
|
|
{
|
2009-03-30 05:07:15 +02:00
|
|
|
return for_each_ref_in("refs/heads/", fn, cb_data);
|
2006-05-14 03:43:00 +02:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
int for_each_branch_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
|
|
|
return for_each_ref_in_submodule(submodule, "refs/heads/", fn, cb_data);
|
|
|
|
}
|
|
|
|
|
2006-09-21 06:47:42 +02:00
|
|
|
int for_each_remote_ref(each_ref_fn fn, void *cb_data)
|
2006-05-14 03:43:00 +02:00
|
|
|
{
|
2009-03-30 05:07:15 +02:00
|
|
|
return for_each_ref_in("refs/remotes/", fn, cb_data);
|
2009-02-09 08:27:10 +01:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
int for_each_remote_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
|
|
|
return for_each_ref_in_submodule(submodule, "refs/remotes/", fn, cb_data);
|
|
|
|
}
|
|
|
|
|
2009-01-23 10:06:38 +01:00
|
|
|
int for_each_replace_ref(each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
2013-04-22 21:52:41 +02:00
|
|
|
return do_for_each_ref(&ref_cache, "refs/replace/", fn, 13, 0, cb_data);
|
2009-01-23 10:06:38 +01:00
|
|
|
}
|
|
|
|
|
ref namespaces: infrastructure
Add support for dividing the refs of a single repository into multiple
namespaces, each of which can have its own branches, tags, and HEAD.
Git can expose each namespace as an independent repository to pull from
and push to, while sharing the object store, and exposing all the refs
to operations such as git-gc.
Storing multiple repositories as namespaces of a single repository
avoids storing duplicate copies of the same objects, such as when
storing multiple branches of the same source. The alternates mechanism
provides similar support for avoiding duplicates, but alternates do not
prevent duplication between new objects added to the repositories
without ongoing maintenance, while namespaces do.
To specify a namespace, set the GIT_NAMESPACE environment variable to
the namespace. For each ref namespace, git stores the corresponding
refs in a directory under refs/namespaces/. For example,
GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can
also specify namespaces via the --namespace option to git.
Note that namespaces which include a / will expand to a hierarchy of
namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under
refs/namespaces/foo/refs/namespaces/bar/. This makes paths in
GIT_NAMESPACE behave hierarchically, so that cloning with
GIT_NAMESPACE=foo/bar produces the same result as cloning with
GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It
also avoids ambiguity with strange namespace paths such as
foo/refs/heads/, which could otherwise generate directory/file conflicts
within the refs directory.
Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE
environment variable and --namespace option, and support iterating over
refs in a namespace.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
|
|
|
int head_ref_namespaced(each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
int ret = 0;
|
2015-05-25 20:38:28 +02:00
|
|
|
struct object_id oid;
|
ref namespaces: infrastructure
Add support for dividing the refs of a single repository into multiple
namespaces, each of which can have its own branches, tags, and HEAD.
Git can expose each namespace as an independent repository to pull from
and push to, while sharing the object store, and exposing all the refs
to operations such as git-gc.
Storing multiple repositories as namespaces of a single repository
avoids storing duplicate copies of the same objects, such as when
storing multiple branches of the same source. The alternates mechanism
provides similar support for avoiding duplicates, but alternates do not
prevent duplication between new objects added to the repositories
without ongoing maintenance, while namespaces do.
To specify a namespace, set the GIT_NAMESPACE environment variable to
the namespace. For each ref namespace, git stores the corresponding
refs in a directory under refs/namespaces/. For example,
GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can
also specify namespaces via the --namespace option to git.
Note that namespaces which include a / will expand to a hierarchy of
namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under
refs/namespaces/foo/refs/namespaces/bar/. This makes paths in
GIT_NAMESPACE behave hierarchically, so that cloning with
GIT_NAMESPACE=foo/bar produces the same result as cloning with
GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It
also avoids ambiguity with strange namespace paths such as
foo/refs/heads/, which could otherwise generate directory/file conflicts
within the refs directory.
Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE
environment variable and --namespace option, and support iterating over
refs in a namespace.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
|
|
|
int flag;
|
|
|
|
|
|
|
|
strbuf_addf(&buf, "%sHEAD", get_git_namespace());
|
2015-05-25 20:38:28 +02:00
|
|
|
if (!read_ref_full(buf.buf, RESOLVE_REF_READING, oid.hash, &flag))
|
|
|
|
ret = fn(buf.buf, &oid, flag, cb_data);
|
ref namespaces: infrastructure
Add support for dividing the refs of a single repository into multiple
namespaces, each of which can have its own branches, tags, and HEAD.
Git can expose each namespace as an independent repository to pull from
and push to, while sharing the object store, and exposing all the refs
to operations such as git-gc.
Storing multiple repositories as namespaces of a single repository
avoids storing duplicate copies of the same objects, such as when
storing multiple branches of the same source. The alternates mechanism
provides similar support for avoiding duplicates, but alternates do not
prevent duplication between new objects added to the repositories
without ongoing maintenance, while namespaces do.
To specify a namespace, set the GIT_NAMESPACE environment variable to
the namespace. For each ref namespace, git stores the corresponding
refs in a directory under refs/namespaces/. For example,
GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can
also specify namespaces via the --namespace option to git.
Note that namespaces which include a / will expand to a hierarchy of
namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under
refs/namespaces/foo/refs/namespaces/bar/. This makes paths in
GIT_NAMESPACE behave hierarchically, so that cloning with
GIT_NAMESPACE=foo/bar produces the same result as cloning with
GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It
also avoids ambiguity with strange namespace paths such as
foo/refs/heads/, which could otherwise generate directory/file conflicts
within the refs directory.
Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE
environment variable and --namespace option, and support iterating over
refs in a namespace.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
|
|
|
strbuf_release(&buf);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int for_each_namespaced_ref(each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
int ret;
|
|
|
|
strbuf_addf(&buf, "%srefs/", get_git_namespace());
|
2013-04-22 21:52:41 +02:00
|
|
|
ret = do_for_each_ref(&ref_cache, buf.buf, fn, 0, 0, cb_data);
|
ref namespaces: infrastructure
Add support for dividing the refs of a single repository into multiple
namespaces, each of which can have its own branches, tags, and HEAD.
Git can expose each namespace as an independent repository to pull from
and push to, while sharing the object store, and exposing all the refs
to operations such as git-gc.
Storing multiple repositories as namespaces of a single repository
avoids storing duplicate copies of the same objects, such as when
storing multiple branches of the same source. The alternates mechanism
provides similar support for avoiding duplicates, but alternates do not
prevent duplication between new objects added to the repositories
without ongoing maintenance, while namespaces do.
To specify a namespace, set the GIT_NAMESPACE environment variable to
the namespace. For each ref namespace, git stores the corresponding
refs in a directory under refs/namespaces/. For example,
GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can
also specify namespaces via the --namespace option to git.
Note that namespaces which include a / will expand to a hierarchy of
namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under
refs/namespaces/foo/refs/namespaces/bar/. This makes paths in
GIT_NAMESPACE behave hierarchically, so that cloning with
GIT_NAMESPACE=foo/bar produces the same result as cloning with
GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It
also avoids ambiguity with strange namespace paths such as
foo/refs/heads/, which could otherwise generate directory/file conflicts
within the refs directory.
Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE
environment variable and --namespace option, and support iterating over
refs in a namespace.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
|
|
|
strbuf_release(&buf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-01-20 10:48:26 +01:00
|
|
|
int for_each_glob_ref_in(each_ref_fn fn, const char *pattern,
|
|
|
|
const char *prefix, void *cb_data)
|
2010-01-20 10:48:25 +01:00
|
|
|
{
|
|
|
|
struct strbuf real_pattern = STRBUF_INIT;
|
|
|
|
struct ref_filter filter;
|
|
|
|
int ret;
|
|
|
|
|
2013-11-30 21:55:40 +01:00
|
|
|
if (!prefix && !starts_with(pattern, "refs/"))
|
2010-01-20 10:48:25 +01:00
|
|
|
strbuf_addstr(&real_pattern, "refs/");
|
2010-01-20 10:48:26 +01:00
|
|
|
else if (prefix)
|
|
|
|
strbuf_addstr(&real_pattern, prefix);
|
2010-01-20 10:48:25 +01:00
|
|
|
strbuf_addstr(&real_pattern, pattern);
|
|
|
|
|
2010-03-12 18:04:26 +01:00
|
|
|
if (!has_glob_specials(pattern)) {
|
2010-02-04 06:23:18 +01:00
|
|
|
/* Append implied '/' '*' if not present. */
|
2010-01-20 10:48:25 +01:00
|
|
|
if (real_pattern.buf[real_pattern.len - 1] != '/')
|
|
|
|
strbuf_addch(&real_pattern, '/');
|
|
|
|
/* No need to check for '*', there is none. */
|
|
|
|
strbuf_addch(&real_pattern, '*');
|
|
|
|
}
|
|
|
|
|
|
|
|
filter.pattern = real_pattern.buf;
|
|
|
|
filter.fn = fn;
|
|
|
|
filter.cb_data = cb_data;
|
|
|
|
ret = for_each_ref(filter_refs, &filter);
|
|
|
|
|
|
|
|
strbuf_release(&real_pattern);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-01-20 10:48:26 +01:00
|
|
|
int for_each_glob_ref(each_ref_fn fn, const char *pattern, void *cb_data)
|
|
|
|
{
|
|
|
|
return for_each_glob_ref_in(fn, pattern, NULL, cb_data);
|
|
|
|
}
|
|
|
|
|
2009-02-09 08:27:10 +01:00
|
|
|
int for_each_rawref(each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
2013-04-22 21:52:41 +02:00
|
|
|
return do_for_each_ref(&ref_cache, "", fn, 0,
|
2009-02-09 08:27:10 +01:00
|
|
|
DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
|
2005-07-03 05:23:36 +02:00
|
|
|
}
|
|
|
|
|
2009-05-13 23:22:04 +02:00
|
|
|
const char *prettify_refname(const char *name)
|
2009-03-09 02:06:05 +01:00
|
|
|
{
|
|
|
|
return name + (
|
2013-11-30 21:55:40 +01:00
|
|
|
starts_with(name, "refs/heads/") ? 11 :
|
|
|
|
starts_with(name, "refs/tags/") ? 10 :
|
|
|
|
starts_with(name, "refs/remotes/") ? 13 :
|
2009-03-09 02:06:05 +01:00
|
|
|
0);
|
|
|
|
}
|
|
|
|
|
2014-01-14 04:16:07 +01:00
|
|
|
static const char *ref_rev_parse_rules[] = {
|
add refname_match()
We use at least two rulesets for matching abbreviated refnames with
full refnames (starting with 'refs/'). git-rev-parse and git-fetch
use slightly different rules.
This commit introduces a new function refname_match
(const char *abbrev_name, const char *full_name, const char **rules).
abbrev_name is expanded using the rules and matched against full_name.
If a match is found the function returns true. rules is a NULL-terminate
list of format patterns with "%.*s", for example:
const char *ref_rev_parse_rules[] = {
"%.*s",
"refs/%.*s",
"refs/tags/%.*s",
"refs/heads/%.*s",
"refs/remotes/%.*s",
"refs/remotes/%.*s/HEAD",
NULL
};
Asterisks are included in the format strings because this is the form
required in sha1_name.c. Sharing the list with the functions there is
a good idea to avoid duplicating the rules. Hopefully this
facilitates unified matching rules in the future.
This commit makes the rules used by rev-parse for resolving refs to
sha1s available for string comparison. Before this change, the rules
were buried in get_sha1*() and dwim_ref().
A follow-up commit will refactor the rules used by fetch.
refname_match() will be used for matching refspecs in git-send-pack.
Thanks to Daniel Barkalow <barkalow@iabervon.org> for pointing
out that ref_matches_abbrev in remote.c solves a similar problem
and care should be taken to avoid confusion.
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-11 15:01:46 +01:00
|
|
|
"%.*s",
|
|
|
|
"refs/%.*s",
|
|
|
|
"refs/tags/%.*s",
|
|
|
|
"refs/heads/%.*s",
|
|
|
|
"refs/remotes/%.*s",
|
|
|
|
"refs/remotes/%.*s/HEAD",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2014-01-14 04:16:07 +01:00
|
|
|
int refname_match(const char *abbrev_name, const char *full_name)
|
add refname_match()
We use at least two rulesets for matching abbreviated refnames with
full refnames (starting with 'refs/'). git-rev-parse and git-fetch
use slightly different rules.
This commit introduces a new function refname_match
(const char *abbrev_name, const char *full_name, const char **rules).
abbrev_name is expanded using the rules and matched against full_name.
If a match is found the function returns true. rules is a NULL-terminate
list of format patterns with "%.*s", for example:
const char *ref_rev_parse_rules[] = {
"%.*s",
"refs/%.*s",
"refs/tags/%.*s",
"refs/heads/%.*s",
"refs/remotes/%.*s",
"refs/remotes/%.*s/HEAD",
NULL
};
Asterisks are included in the format strings because this is the form
required in sha1_name.c. Sharing the list with the functions there is
a good idea to avoid duplicating the rules. Hopefully this
facilitates unified matching rules in the future.
This commit makes the rules used by rev-parse for resolving refs to
sha1s available for string comparison. Before this change, the rules
were buried in get_sha1*() and dwim_ref().
A follow-up commit will refactor the rules used by fetch.
refname_match() will be used for matching refspecs in git-send-pack.
Thanks to Daniel Barkalow <barkalow@iabervon.org> for pointing
out that ref_matches_abbrev in remote.c solves a similar problem
and care should be taken to avoid confusion.
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-11 15:01:46 +01:00
|
|
|
{
|
|
|
|
const char **p;
|
|
|
|
const int abbrev_name_len = strlen(abbrev_name);
|
|
|
|
|
2014-01-14 04:16:07 +01:00
|
|
|
for (p = ref_rev_parse_rules; *p; p++) {
|
add refname_match()
We use at least two rulesets for matching abbreviated refnames with
full refnames (starting with 'refs/'). git-rev-parse and git-fetch
use slightly different rules.
This commit introduces a new function refname_match
(const char *abbrev_name, const char *full_name, const char **rules).
abbrev_name is expanded using the rules and matched against full_name.
If a match is found the function returns true. rules is a NULL-terminate
list of format patterns with "%.*s", for example:
const char *ref_rev_parse_rules[] = {
"%.*s",
"refs/%.*s",
"refs/tags/%.*s",
"refs/heads/%.*s",
"refs/remotes/%.*s",
"refs/remotes/%.*s/HEAD",
NULL
};
Asterisks are included in the format strings because this is the form
required in sha1_name.c. Sharing the list with the functions there is
a good idea to avoid duplicating the rules. Hopefully this
facilitates unified matching rules in the future.
This commit makes the rules used by rev-parse for resolving refs to
sha1s available for string comparison. Before this change, the rules
were buried in get_sha1*() and dwim_ref().
A follow-up commit will refactor the rules used by fetch.
refname_match() will be used for matching refspecs in git-send-pack.
Thanks to Daniel Barkalow <barkalow@iabervon.org> for pointing
out that ref_matches_abbrev in remote.c solves a similar problem
and care should be taken to avoid confusion.
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-11 15:01:46 +01:00
|
|
|
if (!strcmp(full_name, mkpath(*p, abbrev_name_len, abbrev_name))) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-12-12 09:57:00 +01:00
|
|
|
static void unlock_ref(struct ref_lock *lock)
|
|
|
|
{
|
|
|
|
/* Do not free lock->lk -- atexit() still looks at them */
|
|
|
|
if (lock->lk)
|
|
|
|
rollback_lock_file(lock->lk);
|
|
|
|
free(lock->ref_name);
|
|
|
|
free(lock->orig_ref_name);
|
|
|
|
free(lock);
|
|
|
|
}
|
|
|
|
|
2015-05-23 01:34:53 +02:00
|
|
|
/*
|
|
|
|
* Verify that the reference locked by lock has the value old_sha1.
|
|
|
|
* Fail if the reference doesn't exist and mustexist is set. Return 0
|
2015-05-23 01:34:55 +02:00
|
|
|
* on success. On error, write an error message to err, set errno, and
|
|
|
|
* return a negative value.
|
2015-05-23 01:34:53 +02:00
|
|
|
*/
|
|
|
|
static int verify_lock(struct ref_lock *lock,
|
2015-05-23 01:34:55 +02:00
|
|
|
const unsigned char *old_sha1, int mustexist,
|
|
|
|
struct strbuf *err)
|
2006-05-17 11:55:02 +02:00
|
|
|
{
|
2015-05-23 01:34:55 +02:00
|
|
|
assert(err);
|
|
|
|
|
2014-07-15 21:59:36 +02:00
|
|
|
if (read_ref_full(lock->ref_name,
|
|
|
|
mustexist ? RESOLVE_REF_READING : 0,
|
2015-05-25 20:39:22 +02:00
|
|
|
lock->old_oid.hash, NULL)) {
|
2014-06-20 16:42:51 +02:00
|
|
|
int save_errno = errno;
|
2015-05-23 01:34:56 +02:00
|
|
|
strbuf_addf(err, "can't verify ref %s", lock->ref_name);
|
2014-06-20 16:42:51 +02:00
|
|
|
errno = save_errno;
|
2015-05-23 01:34:53 +02:00
|
|
|
return -1;
|
2006-05-17 11:55:02 +02:00
|
|
|
}
|
2015-05-25 20:39:22 +02:00
|
|
|
if (hashcmp(lock->old_oid.hash, old_sha1)) {
|
2015-05-23 01:34:56 +02:00
|
|
|
strbuf_addf(err, "ref %s is at %s but expected %s",
|
2015-05-23 01:34:55 +02:00
|
|
|
lock->ref_name,
|
2015-06-11 18:29:54 +02:00
|
|
|
sha1_to_hex(lock->old_oid.hash),
|
2015-05-23 01:34:55 +02:00
|
|
|
sha1_to_hex(old_sha1));
|
2014-06-20 16:42:51 +02:00
|
|
|
errno = EBUSY;
|
2015-05-23 01:34:53 +02:00
|
|
|
return -1;
|
2006-05-17 11:55:02 +02:00
|
|
|
}
|
2015-05-23 01:34:53 +02:00
|
|
|
return 0;
|
2006-05-17 11:55:02 +02:00
|
|
|
}
|
|
|
|
|
2007-09-28 17:28:54 +02:00
|
|
|
static int remove_empty_directories(const char *file)
|
2006-09-30 11:25:30 +02:00
|
|
|
{
|
|
|
|
/* we want to create a file but there is a directory there;
|
|
|
|
* if that is an empty directory (or a directory that contains
|
|
|
|
* only empty directories), remove them.
|
|
|
|
*/
|
2007-09-28 17:28:54 +02:00
|
|
|
struct strbuf path;
|
2014-06-20 16:42:52 +02:00
|
|
|
int result, save_errno;
|
2006-09-30 11:25:30 +02:00
|
|
|
|
2007-09-28 17:28:54 +02:00
|
|
|
strbuf_init(&path, 20);
|
|
|
|
strbuf_addstr(&path, file);
|
|
|
|
|
2009-07-01 00:33:45 +02:00
|
|
|
result = remove_dir_recursively(&path, REMOVE_DIR_EMPTY_ONLY);
|
2014-06-20 16:42:52 +02:00
|
|
|
save_errno = errno;
|
2007-09-28 17:28:54 +02:00
|
|
|
|
|
|
|
strbuf_release(&path);
|
2014-06-20 16:42:52 +02:00
|
|
|
errno = save_errno;
|
2007-09-28 17:28:54 +02:00
|
|
|
|
|
|
|
return result;
|
2006-09-30 11:25:30 +02:00
|
|
|
}
|
|
|
|
|
2011-10-12 19:35:38 +02:00
|
|
|
/*
|
|
|
|
* *string and *len will only be substituted, and *string returned (for
|
|
|
|
* later free()ing) if the string passed in is a magic short-hand form
|
|
|
|
* to name a branch.
|
|
|
|
*/
|
|
|
|
static char *substitute_branch_name(const char **string, int *len)
|
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2013-09-02 08:34:29 +02:00
|
|
|
int ret = interpret_branch_name(*string, *len, &buf);
|
2011-10-12 19:35:38 +02:00
|
|
|
|
|
|
|
if (ret == *len) {
|
|
|
|
size_t size;
|
|
|
|
*string = strbuf_detach(&buf, &size);
|
|
|
|
*len = size;
|
|
|
|
return (char *)*string;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int dwim_ref(const char *str, int len, unsigned char *sha1, char **ref)
|
|
|
|
{
|
|
|
|
char *last_branch = substitute_branch_name(&str, &len);
|
|
|
|
const char **p, *r;
|
|
|
|
int refs_found = 0;
|
|
|
|
|
|
|
|
*ref = NULL;
|
|
|
|
for (p = ref_rev_parse_rules; *p; p++) {
|
|
|
|
char fullref[PATH_MAX];
|
|
|
|
unsigned char sha1_from_ref[20];
|
|
|
|
unsigned char *this_result;
|
|
|
|
int flag;
|
|
|
|
|
|
|
|
this_result = refs_found ? sha1_from_ref : sha1;
|
|
|
|
mksnpath(fullref, sizeof(fullref), *p, len, str);
|
2014-07-15 21:59:36 +02:00
|
|
|
r = resolve_ref_unsafe(fullref, RESOLVE_REF_READING,
|
|
|
|
this_result, &flag);
|
2011-10-12 19:35:38 +02:00
|
|
|
if (r) {
|
|
|
|
if (!refs_found++)
|
|
|
|
*ref = xstrdup(r);
|
|
|
|
if (!warn_ambiguous_refs)
|
|
|
|
break;
|
2011-10-19 22:55:49 +02:00
|
|
|
} else if ((flag & REF_ISSYMREF) && strcmp(fullref, "HEAD")) {
|
2011-10-12 19:35:38 +02:00
|
|
|
warning("ignoring dangling symref %s.", fullref);
|
2011-10-19 22:55:49 +02:00
|
|
|
} else if ((flag & REF_ISBROKEN) && strchr(fullref, '/')) {
|
|
|
|
warning("ignoring broken ref %s.", fullref);
|
|
|
|
}
|
2011-10-12 19:35:38 +02:00
|
|
|
}
|
|
|
|
free(last_branch);
|
|
|
|
return refs_found;
|
|
|
|
}
|
|
|
|
|
|
|
|
int dwim_log(const char *str, int len, unsigned char *sha1, char **log)
|
|
|
|
{
|
|
|
|
char *last_branch = substitute_branch_name(&str, &len);
|
|
|
|
const char **p;
|
|
|
|
int logs_found = 0;
|
|
|
|
|
|
|
|
*log = NULL;
|
|
|
|
for (p = ref_rev_parse_rules; *p; p++) {
|
|
|
|
unsigned char hash[20];
|
|
|
|
char path[PATH_MAX];
|
|
|
|
const char *ref, *it;
|
|
|
|
|
|
|
|
mksnpath(path, sizeof(path), *p, len, str);
|
2014-07-15 21:59:36 +02:00
|
|
|
ref = resolve_ref_unsafe(path, RESOLVE_REF_READING,
|
|
|
|
hash, NULL);
|
2011-10-12 19:35:38 +02:00
|
|
|
if (!ref)
|
|
|
|
continue;
|
2014-05-07 00:45:52 +02:00
|
|
|
if (reflog_exists(path))
|
2011-10-12 19:35:38 +02:00
|
|
|
it = path;
|
2014-05-07 00:45:52 +02:00
|
|
|
else if (strcmp(ref, path) && reflog_exists(ref))
|
2011-10-12 19:35:38 +02:00
|
|
|
it = ref;
|
|
|
|
else
|
|
|
|
continue;
|
|
|
|
if (!logs_found++) {
|
|
|
|
*log = xstrdup(it);
|
|
|
|
hashcpy(sha1, hash);
|
|
|
|
}
|
|
|
|
if (!warn_ambiguous_refs)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
free(last_branch);
|
|
|
|
return logs_found;
|
|
|
|
}
|
|
|
|
|
2014-04-29 00:38:47 +02:00
|
|
|
/*
|
2014-10-02 16:59:02 +02:00
|
|
|
* Locks a ref returning the lock on success and NULL on failure.
|
2014-04-29 00:38:47 +02:00
|
|
|
* On failure errno is set to something meaningful.
|
|
|
|
*/
|
2011-12-12 06:38:09 +01:00
|
|
|
static struct ref_lock *lock_ref_sha1_basic(const char *refname,
|
|
|
|
const unsigned char *old_sha1,
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
const struct string_list *extras,
|
2014-05-01 20:16:07 +02:00
|
|
|
const struct string_list *skip,
|
2015-05-11 17:25:15 +02:00
|
|
|
unsigned int flags, int *type_p,
|
|
|
|
struct strbuf *err)
|
2006-05-17 11:55:02 +02:00
|
|
|
{
|
2014-11-30 09:24:27 +01:00
|
|
|
const char *ref_file;
|
2011-12-12 06:38:09 +01:00
|
|
|
const char *orig_refname = refname;
|
2006-05-17 11:55:02 +02:00
|
|
|
struct ref_lock *lock;
|
2006-09-30 23:14:31 +02:00
|
|
|
int last_errno = 0;
|
2008-10-18 00:44:39 +02:00
|
|
|
int type, lflags;
|
2006-09-27 10:09:18 +02:00
|
|
|
int mustexist = (old_sha1 && !is_null_sha1(old_sha1));
|
2014-07-15 21:59:36 +02:00
|
|
|
int resolve_flags = 0;
|
2014-01-18 23:48:54 +01:00
|
|
|
int attempts_remaining = 3;
|
2006-05-17 11:55:02 +02:00
|
|
|
|
2015-05-11 17:25:15 +02:00
|
|
|
assert(err);
|
2006-05-17 11:55:02 +02:00
|
|
|
|
|
|
|
lock = xcalloc(1, sizeof(struct ref_lock));
|
|
|
|
|
2014-07-15 21:59:36 +02:00
|
|
|
if (mustexist)
|
|
|
|
resolve_flags |= RESOLVE_REF_READING;
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
if (flags & REF_DELETING) {
|
|
|
|
resolve_flags |= RESOLVE_REF_ALLOW_BAD_NAME;
|
|
|
|
if (flags & REF_NODEREF)
|
|
|
|
resolve_flags |= RESOLVE_REF_NO_RECURSE;
|
|
|
|
}
|
2014-07-15 21:59:36 +02:00
|
|
|
|
|
|
|
refname = resolve_ref_unsafe(refname, resolve_flags,
|
2015-05-25 20:39:22 +02:00
|
|
|
lock->old_oid.hash, &type);
|
2011-12-12 06:38:09 +01:00
|
|
|
if (!refname && errno == EISDIR) {
|
2006-09-30 11:25:30 +02:00
|
|
|
/* we are trying to lock foo but we used to
|
|
|
|
* have foo/bar which now does not exist;
|
|
|
|
* it is normal for the empty directory 'foo'
|
|
|
|
* to remain.
|
|
|
|
*/
|
2011-12-12 06:38:09 +01:00
|
|
|
ref_file = git_path("%s", orig_refname);
|
2006-09-30 23:14:31 +02:00
|
|
|
if (remove_empty_directories(ref_file)) {
|
|
|
|
last_errno = errno;
|
2015-05-11 17:25:16 +02:00
|
|
|
|
|
|
|
if (!verify_refname_available(orig_refname, extras, skip,
|
|
|
|
get_loose_refs(&ref_cache), err))
|
|
|
|
strbuf_addf(err, "there are still refs under '%s'",
|
|
|
|
orig_refname);
|
|
|
|
|
2006-09-30 23:14:31 +02:00
|
|
|
goto error_return;
|
|
|
|
}
|
2014-07-15 21:59:36 +02:00
|
|
|
refname = resolve_ref_unsafe(orig_refname, resolve_flags,
|
2015-05-25 20:39:22 +02:00
|
|
|
lock->old_oid.hash, &type);
|
2006-09-30 11:25:30 +02:00
|
|
|
}
|
2007-05-09 12:33:20 +02:00
|
|
|
if (type_p)
|
|
|
|
*type_p = type;
|
2011-12-12 06:38:09 +01:00
|
|
|
if (!refname) {
|
2006-09-30 23:14:31 +02:00
|
|
|
last_errno = errno;
|
2015-05-11 17:25:16 +02:00
|
|
|
if (last_errno != ENOTDIR ||
|
|
|
|
!verify_refname_available(orig_refname, extras, skip,
|
|
|
|
get_loose_refs(&ref_cache), err))
|
|
|
|
strbuf_addf(err, "unable to resolve reference %s: %s",
|
|
|
|
orig_refname, strerror(last_errno));
|
|
|
|
|
2006-09-30 23:14:31 +02:00
|
|
|
goto error_return;
|
2006-05-17 11:55:02 +02:00
|
|
|
}
|
2015-03-02 10:29:53 +01:00
|
|
|
/*
|
|
|
|
* If the ref did not exist and we are creating it, make sure
|
|
|
|
* there is no existing packed ref whose name begins with our
|
|
|
|
* refname, nor a packed ref whose name is a proper prefix of
|
|
|
|
* our refname.
|
2006-11-28 15:47:40 +01:00
|
|
|
*/
|
2015-05-25 20:39:22 +02:00
|
|
|
if (is_null_oid(&lock->old_oid) &&
|
2015-05-11 17:25:14 +02:00
|
|
|
verify_refname_available(refname, extras, skip,
|
2015-05-11 17:25:15 +02:00
|
|
|
get_packed_refs(&ref_cache), err)) {
|
2009-05-25 12:37:15 +02:00
|
|
|
last_errno = ENOTDIR;
|
2006-11-28 15:47:40 +01:00
|
|
|
goto error_return;
|
2009-05-25 12:37:15 +02:00
|
|
|
}
|
2006-09-30 23:19:25 +02:00
|
|
|
|
2006-06-06 22:54:14 +02:00
|
|
|
lock->lk = xcalloc(1, sizeof(struct lock_file));
|
2006-05-17 11:55:02 +02:00
|
|
|
|
2014-01-18 23:48:55 +01:00
|
|
|
lflags = 0;
|
2008-10-18 00:44:39 +02:00
|
|
|
if (flags & REF_NODEREF) {
|
2011-12-12 06:38:09 +01:00
|
|
|
refname = orig_refname;
|
2014-10-01 12:28:37 +02:00
|
|
|
lflags |= LOCK_NO_DEREF;
|
2008-10-18 00:44:39 +02:00
|
|
|
}
|
2011-12-12 06:38:09 +01:00
|
|
|
lock->ref_name = xstrdup(refname);
|
|
|
|
lock->orig_ref_name = xstrdup(orig_refname);
|
|
|
|
ref_file = git_path("%s", refname);
|
2006-05-17 11:55:02 +02:00
|
|
|
|
2014-01-18 23:48:54 +01:00
|
|
|
retry:
|
2014-11-30 09:24:27 +01:00
|
|
|
switch (safe_create_leading_directories_const(ref_file)) {
|
2014-01-18 23:48:54 +01:00
|
|
|
case SCLD_OK:
|
|
|
|
break; /* success */
|
|
|
|
case SCLD_VANISHED:
|
|
|
|
if (--attempts_remaining > 0)
|
|
|
|
goto retry;
|
|
|
|
/* fall through */
|
|
|
|
default:
|
2006-09-30 23:14:31 +02:00
|
|
|
last_errno = errno;
|
2015-05-11 17:25:15 +02:00
|
|
|
strbuf_addf(err, "unable to create directory for %s", ref_file);
|
2006-09-30 23:14:31 +02:00
|
|
|
goto error_return;
|
|
|
|
}
|
2006-05-17 11:55:02 +02:00
|
|
|
|
2015-04-17 01:17:37 +02:00
|
|
|
if (hold_lock_file_for_update(lock->lk, ref_file, lflags) < 0) {
|
lock_ref_sha1_basic: do not die on locking errors
lock_ref_sha1_basic is inconsistent about when it calls
die() and when it returns NULL to signal an error. This is
annoying to any callers that want to recover from a locking
error.
This seems to be mostly historical accident. It was added in
4bd18c4 (Improve abstraction of ref lock/write.,
2006-05-17), which returned an error in all cases except
calling safe_create_leading_directories, in which case it
died. Later, 40aaae8 (Better error message when we are
unable to lock the index file, 2006-08-12) asked
hold_lock_file_for_update to die for us, leaving the
resolve_ref code-path the only one which returned NULL.
We tried to correct that in 5cc3cef (lock_ref_sha1(): do not
sometimes error() and sometimes die()., 2006-09-30),
by converting all of the die() calls into returns. But we
missed the "die" flag passed to the lock code, leaving us
inconsistent. This state persisted until e5c223e
(lock_ref_sha1_basic(): if locking fails with ENOENT, retry,
2014-01-18). Because of its retry scheme, it does not ask
the lock code to die, but instead manually dies with
unable_to_lock_die().
We can make this consistent with the other return paths by
converting this to use unable_to_lock_message(), and
returning NULL. This is safe to do because all callers
already needed to check the return value of the function,
since it could fail (and return NULL) for other reasons.
[jk: Added excessive history explanation]
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-19 23:28:52 +01:00
|
|
|
last_errno = errno;
|
2014-01-18 23:48:55 +01:00
|
|
|
if (errno == ENOENT && --attempts_remaining > 0)
|
|
|
|
/*
|
|
|
|
* Maybe somebody just deleted one of the
|
|
|
|
* directories leading to ref_file. Try
|
|
|
|
* again:
|
|
|
|
*/
|
|
|
|
goto retry;
|
lock_ref_sha1_basic: do not die on locking errors
lock_ref_sha1_basic is inconsistent about when it calls
die() and when it returns NULL to signal an error. This is
annoying to any callers that want to recover from a locking
error.
This seems to be mostly historical accident. It was added in
4bd18c4 (Improve abstraction of ref lock/write.,
2006-05-17), which returned an error in all cases except
calling safe_create_leading_directories, in which case it
died. Later, 40aaae8 (Better error message when we are
unable to lock the index file, 2006-08-12) asked
hold_lock_file_for_update to die for us, leaving the
resolve_ref code-path the only one which returned NULL.
We tried to correct that in 5cc3cef (lock_ref_sha1(): do not
sometimes error() and sometimes die()., 2006-09-30),
by converting all of the die() calls into returns. But we
missed the "die" flag passed to the lock code, leaving us
inconsistent. This state persisted until e5c223e
(lock_ref_sha1_basic(): if locking fails with ENOENT, retry,
2014-01-18). Because of its retry scheme, it does not ask
the lock code to die, but instead manually dies with
unable_to_lock_die().
We can make this consistent with the other return paths by
converting this to use unable_to_lock_message(), and
returning NULL. This is safe to do because all callers
already needed to check the return value of the function,
since it could fail (and return NULL) for other reasons.
[jk: Added excessive history explanation]
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-19 23:28:52 +01:00
|
|
|
else {
|
2015-05-11 17:25:15 +02:00
|
|
|
unable_to_lock_message(ref_file, errno, err);
|
lock_ref_sha1_basic: do not die on locking errors
lock_ref_sha1_basic is inconsistent about when it calls
die() and when it returns NULL to signal an error. This is
annoying to any callers that want to recover from a locking
error.
This seems to be mostly historical accident. It was added in
4bd18c4 (Improve abstraction of ref lock/write.,
2006-05-17), which returned an error in all cases except
calling safe_create_leading_directories, in which case it
died. Later, 40aaae8 (Better error message when we are
unable to lock the index file, 2006-08-12) asked
hold_lock_file_for_update to die for us, leaving the
resolve_ref code-path the only one which returned NULL.
We tried to correct that in 5cc3cef (lock_ref_sha1(): do not
sometimes error() and sometimes die()., 2006-09-30),
by converting all of the die() calls into returns. But we
missed the "die" flag passed to the lock code, leaving us
inconsistent. This state persisted until e5c223e
(lock_ref_sha1_basic(): if locking fails with ENOENT, retry,
2014-01-18). Because of its retry scheme, it does not ask
the lock code to die, but instead manually dies with
unable_to_lock_die().
We can make this consistent with the other return paths by
converting this to use unable_to_lock_message(), and
returning NULL. This is safe to do because all callers
already needed to check the return value of the function,
since it could fail (and return NULL) for other reasons.
[jk: Added excessive history explanation]
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-19 23:28:52 +01:00
|
|
|
goto error_return;
|
|
|
|
}
|
2014-01-18 23:48:55 +01:00
|
|
|
}
|
2015-05-23 01:34:55 +02:00
|
|
|
if (old_sha1 && verify_lock(lock, old_sha1, mustexist, err)) {
|
2015-05-23 01:34:54 +02:00
|
|
|
last_errno = errno;
|
|
|
|
goto error_return;
|
|
|
|
}
|
2015-05-23 01:34:53 +02:00
|
|
|
return lock;
|
2006-09-30 23:14:31 +02:00
|
|
|
|
|
|
|
error_return:
|
|
|
|
unlock_ref(lock);
|
|
|
|
errno = last_errno;
|
|
|
|
return NULL;
|
2006-05-17 11:55:02 +02:00
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:30 +02:00
|
|
|
/*
|
|
|
|
* Write an entry to the packed-refs file for the specified refname.
|
|
|
|
* If peeled is non-NULL, write it as the entry's peeled value.
|
|
|
|
*/
|
2014-09-10 12:03:52 +02:00
|
|
|
static void write_packed_entry(FILE *fh, char *refname, unsigned char *sha1,
|
2013-04-22 21:52:30 +02:00
|
|
|
unsigned char *peeled)
|
2012-04-10 07:30:17 +02:00
|
|
|
{
|
2014-09-10 12:03:52 +02:00
|
|
|
fprintf_or_die(fh, "%s %s\n", sha1_to_hex(sha1), refname);
|
|
|
|
if (peeled)
|
|
|
|
fprintf_or_die(fh, "^%s\n", sha1_to_hex(peeled));
|
2013-04-22 21:52:30 +02:00
|
|
|
}
|
|
|
|
|
2013-06-20 10:37:43 +02:00
|
|
|
/*
|
|
|
|
* An each_ref_entry_fn that writes the entry to a packed-refs file.
|
|
|
|
*/
|
|
|
|
static int write_packed_entry_fn(struct ref_entry *entry, void *cb_data)
|
|
|
|
{
|
|
|
|
enum peel_status peel_status = peel_entry(entry, 0);
|
|
|
|
|
|
|
|
if (peel_status != PEEL_PEELED && peel_status != PEEL_NON_TAG)
|
|
|
|
error("internal error: %s is not a valid packed reference!",
|
|
|
|
entry->name);
|
2015-05-25 20:38:27 +02:00
|
|
|
write_packed_entry(cb_data, entry->name, entry->u.value.oid.hash,
|
2013-06-20 10:37:43 +02:00
|
|
|
peel_status == PEEL_PEELED ?
|
2015-05-25 20:38:27 +02:00
|
|
|
entry->u.value.peeled.hash : NULL);
|
2013-06-20 10:37:43 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-06-20 16:42:48 +02:00
|
|
|
/* This should return a meaningful errno on failure */
|
2013-06-20 10:37:46 +02:00
|
|
|
int lock_packed_refs(int flags)
|
|
|
|
{
|
lock_packed_refs(): allow retries when acquiring the packed-refs lock
Currently, there is only one attempt to acquire any lockfile, and if
the lock is held by another process, the locking attempt fails
immediately.
This is not such a limitation for loose reference files. First, they
don't take long to rewrite. Second, most reference updates have a
known "old" value, so if another process is updating a reference at
the same moment that we are trying to lock it, then probably the
expected "old" value will not longer be valid, and the update will
fail anyway.
But these arguments do not hold for packed-refs:
* The packed-refs file can be large and take significant time to
rewrite.
* Many references are stored in a single packed-refs file, so it could
be that the other process was changing a different reference than
the one that we are interested in.
Therefore, it is much more likely for there to be spurious lock
conflicts in connection to the packed-refs file, resulting in
unnecessary command failures.
So, if the first attempt to lock the packed-refs file fails, continue
retrying for a configurable length of time before giving up. The
default timeout is 1 second.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-05-11 12:35:26 +02:00
|
|
|
static int timeout_configured = 0;
|
|
|
|
static int timeout_value = 1000;
|
|
|
|
|
2013-06-20 10:37:46 +02:00
|
|
|
struct packed_ref_cache *packed_ref_cache;
|
|
|
|
|
lock_packed_refs(): allow retries when acquiring the packed-refs lock
Currently, there is only one attempt to acquire any lockfile, and if
the lock is held by another process, the locking attempt fails
immediately.
This is not such a limitation for loose reference files. First, they
don't take long to rewrite. Second, most reference updates have a
known "old" value, so if another process is updating a reference at
the same moment that we are trying to lock it, then probably the
expected "old" value will not longer be valid, and the update will
fail anyway.
But these arguments do not hold for packed-refs:
* The packed-refs file can be large and take significant time to
rewrite.
* Many references are stored in a single packed-refs file, so it could
be that the other process was changing a different reference than
the one that we are interested in.
Therefore, it is much more likely for there to be spurious lock
conflicts in connection to the packed-refs file, resulting in
unnecessary command failures.
So, if the first attempt to lock the packed-refs file fails, continue
retrying for a configurable length of time before giving up. The
default timeout is 1 second.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-05-11 12:35:26 +02:00
|
|
|
if (!timeout_configured) {
|
|
|
|
git_config_get_int("core.packedrefstimeout", &timeout_value);
|
|
|
|
timeout_configured = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hold_lock_file_for_update_timeout(
|
|
|
|
&packlock, git_path("packed-refs"),
|
|
|
|
flags, timeout_value) < 0)
|
2013-06-20 10:37:46 +02:00
|
|
|
return -1;
|
2013-06-20 10:37:54 +02:00
|
|
|
/*
|
|
|
|
* Get the current packed-refs while holding the lock. If the
|
|
|
|
* packed-refs file has been modified since we last read it,
|
|
|
|
* this will automatically invalidate the cache and re-read
|
|
|
|
* the packed-refs file.
|
|
|
|
*/
|
2013-06-20 10:37:46 +02:00
|
|
|
packed_ref_cache = get_packed_ref_cache(&ref_cache);
|
|
|
|
packed_ref_cache->lock = &packlock;
|
2013-06-20 10:37:49 +02:00
|
|
|
/* Increment the reference count to prevent it from being freed: */
|
|
|
|
acquire_packed_ref_cache(packed_ref_cache);
|
2013-06-20 10:37:46 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-06-20 16:42:53 +02:00
|
|
|
/*
|
|
|
|
* Commit the packed refs changes.
|
|
|
|
* On error we must make sure that errno contains a meaningful value.
|
|
|
|
*/
|
2013-06-20 10:37:46 +02:00
|
|
|
int commit_packed_refs(void)
|
|
|
|
{
|
|
|
|
struct packed_ref_cache *packed_ref_cache =
|
|
|
|
get_packed_ref_cache(&ref_cache);
|
|
|
|
int error = 0;
|
2014-06-20 16:42:53 +02:00
|
|
|
int save_errno = 0;
|
2014-09-10 12:03:52 +02:00
|
|
|
FILE *out;
|
2013-06-20 10:37:46 +02:00
|
|
|
|
|
|
|
if (!packed_ref_cache->lock)
|
|
|
|
die("internal error: packed-refs not locked");
|
|
|
|
|
2014-10-01 13:14:49 +02:00
|
|
|
out = fdopen_lock_file(packed_ref_cache->lock, "w");
|
2014-09-10 12:03:52 +02:00
|
|
|
if (!out)
|
|
|
|
die_errno("unable to fdopen packed-refs descriptor");
|
|
|
|
|
|
|
|
fprintf_or_die(out, "%s", PACKED_REFS_HEADER);
|
2013-06-20 10:37:46 +02:00
|
|
|
do_for_each_entry_in_dir(get_packed_ref_dir(packed_ref_cache),
|
2014-09-10 12:03:52 +02:00
|
|
|
0, write_packed_entry_fn, out);
|
|
|
|
|
2014-06-20 16:42:53 +02:00
|
|
|
if (commit_lock_file(packed_ref_cache->lock)) {
|
|
|
|
save_errno = errno;
|
2013-06-20 10:37:46 +02:00
|
|
|
error = -1;
|
2014-06-20 16:42:53 +02:00
|
|
|
}
|
2013-06-20 10:37:46 +02:00
|
|
|
packed_ref_cache->lock = NULL;
|
2013-06-20 10:37:49 +02:00
|
|
|
release_packed_ref_cache(packed_ref_cache);
|
2014-06-20 16:42:53 +02:00
|
|
|
errno = save_errno;
|
2013-06-20 10:37:46 +02:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
void rollback_packed_refs(void)
|
|
|
|
{
|
|
|
|
struct packed_ref_cache *packed_ref_cache =
|
|
|
|
get_packed_ref_cache(&ref_cache);
|
|
|
|
|
|
|
|
if (!packed_ref_cache->lock)
|
|
|
|
die("internal error: packed-refs not locked");
|
|
|
|
rollback_lock_file(packed_ref_cache->lock);
|
|
|
|
packed_ref_cache->lock = NULL;
|
2013-06-20 10:37:49 +02:00
|
|
|
release_packed_ref_cache(packed_ref_cache);
|
2013-06-20 10:37:46 +02:00
|
|
|
clear_packed_ref_cache(&ref_cache);
|
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:32 +02:00
|
|
|
struct ref_to_prune {
|
|
|
|
struct ref_to_prune *next;
|
|
|
|
unsigned char sha1[20];
|
|
|
|
char name[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct pack_refs_cb_data {
|
|
|
|
unsigned int flags;
|
2013-06-20 10:37:44 +02:00
|
|
|
struct ref_dir *packed_refs;
|
2013-04-22 21:52:32 +02:00
|
|
|
struct ref_to_prune *ref_to_prune;
|
|
|
|
};
|
|
|
|
|
2013-06-20 10:37:44 +02:00
|
|
|
/*
|
|
|
|
* An each_ref_entry_fn that is run over loose references only. If
|
|
|
|
* the loose reference can be packed, add an entry in the packed ref
|
|
|
|
* cache. If the reference should be pruned, also add it to
|
|
|
|
* ref_to_prune in the pack_refs_cb_data.
|
|
|
|
*/
|
|
|
|
static int pack_if_possible_fn(struct ref_entry *entry, void *cb_data)
|
2013-04-22 21:52:32 +02:00
|
|
|
{
|
|
|
|
struct pack_refs_cb_data *cb = cb_data;
|
2013-04-22 21:52:37 +02:00
|
|
|
enum peel_status peel_status;
|
2013-06-20 10:37:44 +02:00
|
|
|
struct ref_entry *packed_entry;
|
2013-11-30 21:55:40 +01:00
|
|
|
int is_tag_ref = starts_with(entry->name, "refs/tags/");
|
2013-04-22 21:52:32 +02:00
|
|
|
|
2013-06-20 10:37:44 +02:00
|
|
|
/* ALWAYS pack tags */
|
|
|
|
if (!(cb->flags & PACK_REFS_ALL) && !is_tag_ref)
|
2013-04-22 21:52:32 +02:00
|
|
|
return 0;
|
|
|
|
|
2013-04-22 21:52:39 +02:00
|
|
|
/* Do not pack symbolic or broken refs: */
|
|
|
|
if ((entry->flag & REF_ISSYMREF) || !ref_resolves_to_object(entry))
|
|
|
|
return 0;
|
|
|
|
|
2013-06-20 10:37:44 +02:00
|
|
|
/* Add a packed ref cache entry equivalent to the loose entry. */
|
2013-04-22 21:52:37 +02:00
|
|
|
peel_status = peel_entry(entry, 1);
|
2013-04-22 21:52:38 +02:00
|
|
|
if (peel_status != PEEL_PEELED && peel_status != PEEL_NON_TAG)
|
2013-04-22 21:52:37 +02:00
|
|
|
die("internal error peeling reference %s (%s)",
|
2015-05-25 20:38:27 +02:00
|
|
|
entry->name, oid_to_hex(&entry->u.value.oid));
|
2013-06-20 10:37:44 +02:00
|
|
|
packed_entry = find_ref(cb->packed_refs, entry->name);
|
|
|
|
if (packed_entry) {
|
|
|
|
/* Overwrite existing packed entry with info from loose entry */
|
|
|
|
packed_entry->flag = REF_ISPACKED | REF_KNOWS_PEELED;
|
2015-05-25 20:38:27 +02:00
|
|
|
oidcpy(&packed_entry->u.value.oid, &entry->u.value.oid);
|
2013-06-20 10:37:44 +02:00
|
|
|
} else {
|
2015-05-25 20:38:27 +02:00
|
|
|
packed_entry = create_ref_entry(entry->name, entry->u.value.oid.hash,
|
2013-06-20 10:37:44 +02:00
|
|
|
REF_ISPACKED | REF_KNOWS_PEELED, 0);
|
|
|
|
add_ref(cb->packed_refs, packed_entry);
|
|
|
|
}
|
2015-05-25 20:38:27 +02:00
|
|
|
oidcpy(&packed_entry->u.value.peeled, &entry->u.value.peeled);
|
2013-04-22 21:52:32 +02:00
|
|
|
|
2013-06-20 10:37:44 +02:00
|
|
|
/* Schedule the loose reference for pruning if requested. */
|
|
|
|
if ((cb->flags & PACK_REFS_PRUNE)) {
|
2013-04-22 21:52:35 +02:00
|
|
|
int namelen = strlen(entry->name) + 1;
|
2013-04-22 21:52:32 +02:00
|
|
|
struct ref_to_prune *n = xcalloc(1, sizeof(*n) + namelen);
|
2015-05-25 20:38:27 +02:00
|
|
|
hashcpy(n->sha1, entry->u.value.oid.hash);
|
2013-04-22 21:52:35 +02:00
|
|
|
strcpy(n->name, entry->name);
|
2013-04-22 21:52:32 +02:00
|
|
|
n->next = cb->ref_to_prune;
|
|
|
|
cb->ref_to_prune = n;
|
|
|
|
}
|
2012-04-10 07:30:17 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-22 21:52:32 +02:00
|
|
|
/*
|
|
|
|
* Remove empty parents, but spare refs/ and immediate subdirs.
|
|
|
|
* Note: munges *name.
|
|
|
|
*/
|
|
|
|
static void try_remove_empty_parents(char *name)
|
|
|
|
{
|
|
|
|
char *p, *q;
|
|
|
|
int i;
|
|
|
|
p = name;
|
|
|
|
for (i = 0; i < 2; i++) { /* refs/{heads,tags,...}/ */
|
|
|
|
while (*p && *p != '/')
|
|
|
|
p++;
|
|
|
|
/* tolerate duplicate slashes; see check_refname_format() */
|
|
|
|
while (*p == '/')
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
for (q = p; *q; q++)
|
|
|
|
;
|
|
|
|
while (1) {
|
|
|
|
while (q > p && *q != '/')
|
|
|
|
q--;
|
|
|
|
while (q > p && *(q-1) == '/')
|
|
|
|
q--;
|
|
|
|
if (q == p)
|
|
|
|
break;
|
|
|
|
*q = '\0';
|
|
|
|
if (rmdir(git_path("%s", name)))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* make sure nobody touched the ref, and unlink */
|
|
|
|
static void prune_ref(struct ref_to_prune *r)
|
|
|
|
{
|
2014-04-30 18:03:36 +02:00
|
|
|
struct ref_transaction *transaction;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
2013-04-22 21:52:32 +02:00
|
|
|
|
2014-09-11 19:33:33 +02:00
|
|
|
if (check_refname_format(r->name, 0))
|
2014-04-30 00:45:52 +02:00
|
|
|
return;
|
2013-04-22 21:52:32 +02:00
|
|
|
|
2014-04-30 18:03:36 +02:00
|
|
|
transaction = ref_transaction_begin(&err);
|
|
|
|
if (!transaction ||
|
|
|
|
ref_transaction_delete(transaction, r->name, r->sha1,
|
2015-02-17 18:00:16 +01:00
|
|
|
REF_ISPRUNING, NULL, &err) ||
|
2014-04-30 21:22:42 +02:00
|
|
|
ref_transaction_commit(transaction, &err)) {
|
2014-04-30 18:03:36 +02:00
|
|
|
ref_transaction_free(transaction);
|
|
|
|
error("%s", err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
return;
|
2013-04-22 21:52:32 +02:00
|
|
|
}
|
2014-04-30 18:03:36 +02:00
|
|
|
ref_transaction_free(transaction);
|
|
|
|
strbuf_release(&err);
|
|
|
|
try_remove_empty_parents(r->name);
|
2013-04-22 21:52:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void prune_refs(struct ref_to_prune *r)
|
|
|
|
{
|
|
|
|
while (r) {
|
|
|
|
prune_ref(r);
|
|
|
|
r = r->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int pack_refs(unsigned int flags)
|
|
|
|
{
|
|
|
|
struct pack_refs_cb_data cbdata;
|
|
|
|
|
|
|
|
memset(&cbdata, 0, sizeof(cbdata));
|
|
|
|
cbdata.flags = flags;
|
|
|
|
|
2013-06-20 10:37:46 +02:00
|
|
|
lock_packed_refs(LOCK_DIE_ON_ERROR);
|
2013-06-20 10:37:44 +02:00
|
|
|
cbdata.packed_refs = get_packed_refs(&ref_cache);
|
2013-04-22 21:52:32 +02:00
|
|
|
|
2013-06-20 10:37:44 +02:00
|
|
|
do_for_each_entry_in_dir(get_loose_refs(&ref_cache), 0,
|
|
|
|
pack_if_possible_fn, &cbdata);
|
2013-04-22 21:52:32 +02:00
|
|
|
|
2013-06-20 10:37:46 +02:00
|
|
|
if (commit_packed_refs())
|
2013-04-22 21:52:32 +02:00
|
|
|
die_errno("unable to overwrite old ref-pack file");
|
2013-06-20 10:37:46 +02:00
|
|
|
|
2013-04-22 21:52:32 +02:00
|
|
|
prune_refs(cbdata.ref_to_prune);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-25 09:02:32 +01:00
|
|
|
int repack_without_refs(struct string_list *refnames, struct strbuf *err)
|
2006-10-01 00:02:00 +02:00
|
|
|
{
|
2013-04-22 21:52:17 +02:00
|
|
|
struct ref_dir *packed;
|
refs.c: drop curate_packed_refs
When we delete a ref, we have to rewrite the entire
packed-refs file. We take this opportunity to "curate" the
packed-refs file and drop any entries that are crufty or
broken.
Dropping broken entries (e.g., with bogus names, or ones
that point to missing objects) is actively a bad idea, as it
means that we lose any notion that the data was there in the
first place. Aside from the general hackiness that we might
lose any information about ref "foo" while deleting an
unrelated ref "bar", this may seriously hamper any attempts
by the user at recovering from the corruption in "foo".
They will lose the sha1 and name of "foo"; the exact pointer
may still be useful even if they recover missing objects
from a different copy of the repository. But worse, once the
ref is gone, there is no trace of the corruption. A
follow-up "git prune" may delete objects, even though it
would otherwise bail when seeing corruption.
We could just drop the "broken" bits from
curate_packed_refs, and continue to drop the "crufty" bits:
refs whose loose counterpart exists in the filesystem. This
is not wrong to do, and it does have the advantage that we
may write out a slightly smaller packed-refs file. But it
has two disadvantages:
1. It is a potential source of races or mistakes with
respect to these refs that are otherwise unrelated to
the operation. To my knowledge, there aren't any active
problems in this area, but it seems like an unnecessary
risk.
2. We have to spend time looking up the matching loose
refs for every item in the packed-refs file. If you
have a large number of packed refs that do not change,
that outweighs the benefit from writing out a smaller
packed-refs file (it doesn't get smaller, and you do a
bunch of directory traversal to find that out).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-20 19:43:17 +01:00
|
|
|
struct string_list_item *refname;
|
2014-11-25 09:02:32 +01:00
|
|
|
int ret, needs_repacking = 0, removed = 0;
|
2013-09-04 17:22:42 +02:00
|
|
|
|
2014-08-29 01:42:37 +02:00
|
|
|
assert(err);
|
|
|
|
|
2013-09-04 17:22:42 +02:00
|
|
|
/* Look for a packed ref */
|
2014-11-25 09:02:32 +01:00
|
|
|
for_each_string_list_item(refname, refnames) {
|
|
|
|
if (get_packed_ref(refname->string)) {
|
|
|
|
needs_repacking = 1;
|
2013-09-04 17:22:42 +02:00
|
|
|
break;
|
2014-11-25 09:02:32 +01:00
|
|
|
}
|
|
|
|
}
|
2013-04-22 21:52:17 +02:00
|
|
|
|
2013-09-04 17:22:42 +02:00
|
|
|
/* Avoid locking if we have nothing to do */
|
2014-11-25 09:02:32 +01:00
|
|
|
if (!needs_repacking)
|
2013-09-04 17:22:42 +02:00
|
|
|
return 0; /* no refname exists in packed refs */
|
2013-04-22 21:52:17 +02:00
|
|
|
|
2013-06-20 10:37:46 +02:00
|
|
|
if (lock_packed_refs(0)) {
|
2014-08-29 01:42:37 +02:00
|
|
|
unable_to_lock_message(git_path("packed-refs"), errno, err);
|
|
|
|
return -1;
|
2009-09-27 01:15:09 +02:00
|
|
|
}
|
2013-04-22 21:52:41 +02:00
|
|
|
packed = get_packed_refs(&ref_cache);
|
2013-06-20 10:37:43 +02:00
|
|
|
|
2013-09-04 17:22:42 +02:00
|
|
|
/* Remove refnames from the cache */
|
2014-11-25 09:02:32 +01:00
|
|
|
for_each_string_list_item(refname, refnames)
|
|
|
|
if (remove_entry(packed, refname->string) != -1)
|
2013-09-04 17:22:42 +02:00
|
|
|
removed = 1;
|
|
|
|
if (!removed) {
|
2013-04-22 21:52:27 +02:00
|
|
|
/*
|
2013-09-04 17:22:42 +02:00
|
|
|
* All packed entries disappeared while we were
|
2013-04-22 21:52:27 +02:00
|
|
|
* acquiring the lock.
|
|
|
|
*/
|
2013-06-20 10:37:46 +02:00
|
|
|
rollback_packed_refs();
|
2013-04-22 21:52:27 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2013-06-20 10:37:43 +02:00
|
|
|
|
2013-09-04 17:22:42 +02:00
|
|
|
/* Write what remains */
|
2014-06-20 16:42:49 +02:00
|
|
|
ret = commit_packed_refs();
|
2014-08-29 01:42:37 +02:00
|
|
|
if (ret)
|
2014-06-20 16:42:49 +02:00
|
|
|
strbuf_addf(err, "unable to overwrite old ref-pack file: %s",
|
|
|
|
strerror(errno));
|
|
|
|
return ret;
|
2006-10-01 00:02:00 +02:00
|
|
|
}
|
|
|
|
|
2014-05-15 17:25:23 +02:00
|
|
|
static int delete_ref_loose(struct ref_lock *lock, int flag, struct strbuf *err)
|
2013-09-04 17:22:41 +02:00
|
|
|
{
|
2014-08-29 01:42:37 +02:00
|
|
|
assert(err);
|
|
|
|
|
2008-11-01 00:25:44 +01:00
|
|
|
if (!(flag & REF_ISPACKED) || flag & REF_ISSYMREF) {
|
2014-10-01 12:28:16 +02:00
|
|
|
/*
|
|
|
|
* loose. The loose file name is the same as the
|
|
|
|
* lockfile name, minus ".lock":
|
|
|
|
*/
|
2014-10-01 12:28:39 +02:00
|
|
|
char *loose_filename = get_locked_file_path(lock->lk);
|
2014-05-15 17:25:23 +02:00
|
|
|
int res = unlink_or_msg(loose_filename, err);
|
2014-10-01 12:28:16 +02:00
|
|
|
free(loose_filename);
|
2014-05-15 17:25:23 +02:00
|
|
|
if (res)
|
2013-09-04 17:22:41 +02:00
|
|
|
return 1;
|
2006-10-01 00:02:00 +02:00
|
|
|
}
|
2013-09-04 17:22:41 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-02-17 18:00:13 +01:00
|
|
|
int delete_ref(const char *refname, const unsigned char *sha1, unsigned int flags)
|
2006-10-01 00:02:00 +02:00
|
|
|
{
|
2014-04-30 18:22:45 +02:00
|
|
|
struct ref_transaction *transaction;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
2006-10-01 00:02:00 +02:00
|
|
|
|
2014-04-30 18:22:45 +02:00
|
|
|
transaction = ref_transaction_begin(&err);
|
|
|
|
if (!transaction ||
|
2015-02-17 18:00:16 +01:00
|
|
|
ref_transaction_delete(transaction, refname,
|
|
|
|
(sha1 && !is_null_sha1(sha1)) ? sha1 : NULL,
|
|
|
|
flags, NULL, &err) ||
|
2014-04-30 21:22:42 +02:00
|
|
|
ref_transaction_commit(transaction, &err)) {
|
2014-04-30 18:22:45 +02:00
|
|
|
error("%s", err.buf);
|
|
|
|
ref_transaction_free(transaction);
|
|
|
|
strbuf_release(&err);
|
2006-10-01 00:02:00 +02:00
|
|
|
return 1;
|
2014-04-30 18:22:45 +02:00
|
|
|
}
|
|
|
|
ref_transaction_free(transaction);
|
|
|
|
strbuf_release(&err);
|
|
|
|
return 0;
|
2006-05-17 11:55:02 +02:00
|
|
|
}
|
|
|
|
|
2010-07-07 09:47:20 +02:00
|
|
|
/*
|
|
|
|
* People using contrib's git-new-workdir have .git/logs/refs ->
|
|
|
|
* /some/other/path/.git/logs/refs, and that may live on another device.
|
|
|
|
*
|
|
|
|
* IOW, to avoid cross device rename errors, the temporary renamed log must
|
|
|
|
* live into logs/refs.
|
|
|
|
*/
|
|
|
|
#define TMP_RENAMED_LOG "logs/refs/.tmp-renamed-log"
|
|
|
|
|
2014-01-18 23:48:58 +01:00
|
|
|
static int rename_tmp_log(const char *newrefname)
|
|
|
|
{
|
2014-01-18 23:49:00 +01:00
|
|
|
int attempts_remaining = 4;
|
2014-01-18 23:48:59 +01:00
|
|
|
|
|
|
|
retry:
|
2014-11-30 09:24:27 +01:00
|
|
|
switch (safe_create_leading_directories_const(git_path("logs/%s", newrefname))) {
|
2014-01-18 23:49:01 +01:00
|
|
|
case SCLD_OK:
|
|
|
|
break; /* success */
|
|
|
|
case SCLD_VANISHED:
|
|
|
|
if (--attempts_remaining > 0)
|
|
|
|
goto retry;
|
|
|
|
/* fall through */
|
|
|
|
default:
|
2014-01-18 23:48:58 +01:00
|
|
|
error("unable to create directory for %s", newrefname);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rename(git_path(TMP_RENAMED_LOG), git_path("logs/%s", newrefname))) {
|
2014-01-18 23:49:00 +01:00
|
|
|
if ((errno==EISDIR || errno==ENOTDIR) && --attempts_remaining > 0) {
|
2014-01-18 23:48:58 +01:00
|
|
|
/*
|
|
|
|
* rename(a, b) when b is an existing
|
|
|
|
* directory ought to result in ISDIR, but
|
|
|
|
* Solaris 5.8 gives ENOTDIR. Sheesh.
|
|
|
|
*/
|
|
|
|
if (remove_empty_directories(git_path("logs/%s", newrefname))) {
|
|
|
|
error("Directory not empty: logs/%s", newrefname);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
goto retry;
|
2014-01-18 23:48:59 +01:00
|
|
|
} else if (errno == ENOENT && --attempts_remaining > 0) {
|
|
|
|
/*
|
|
|
|
* Maybe another process just deleted one of
|
|
|
|
* the directories in the path to newrefname.
|
|
|
|
* Try again from the beginning.
|
|
|
|
*/
|
|
|
|
goto retry;
|
2014-01-18 23:48:58 +01:00
|
|
|
} else {
|
|
|
|
error("unable to move logfile "TMP_RENAMED_LOG" to logs/%s: %s",
|
|
|
|
newrefname, strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-05-01 20:16:07 +02:00
|
|
|
static int rename_ref_available(const char *oldname, const char *newname)
|
|
|
|
{
|
|
|
|
struct string_list skip = STRING_LIST_INIT_NODUP;
|
2015-05-11 17:25:14 +02:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2014-05-01 20:16:07 +02:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
string_list_insert(&skip, oldname);
|
2015-05-11 17:25:13 +02:00
|
|
|
ret = !verify_refname_available(newname, NULL, &skip,
|
2015-05-11 17:25:14 +02:00
|
|
|
get_packed_refs(&ref_cache), &err)
|
2015-05-11 17:25:13 +02:00
|
|
|
&& !verify_refname_available(newname, NULL, &skip,
|
2015-05-11 17:25:14 +02:00
|
|
|
get_loose_refs(&ref_cache), &err);
|
|
|
|
if (!ret)
|
|
|
|
error("%s", err.buf);
|
|
|
|
|
2014-05-01 20:16:07 +02:00
|
|
|
string_list_clear(&skip, 0);
|
2015-05-11 17:25:14 +02:00
|
|
|
strbuf_release(&err);
|
2014-05-01 20:16:07 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-05-09 17:20:39 +02:00
|
|
|
static int write_ref_to_lockfile(struct ref_lock *lock, const unsigned char *sha1);
|
|
|
|
static int commit_ref_update(struct ref_lock *lock,
|
|
|
|
const unsigned char *sha1, const char *logmsg);
|
2014-04-29 00:36:58 +02:00
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
int rename_ref(const char *oldrefname, const char *newrefname, const char *logmsg)
|
2006-11-28 15:47:40 +01:00
|
|
|
{
|
|
|
|
unsigned char sha1[20], orig_sha1[20];
|
|
|
|
int flag = 0, logmoved = 0;
|
|
|
|
struct ref_lock *lock;
|
|
|
|
struct stat loginfo;
|
2011-12-12 06:38:09 +01:00
|
|
|
int log = !lstat(git_path("logs/%s", oldrefname), &loginfo);
|
2008-10-26 03:33:56 +01:00
|
|
|
const char *symref = NULL;
|
2015-05-11 17:25:15 +02:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2006-11-28 15:47:40 +01:00
|
|
|
|
2008-10-26 03:33:57 +01:00
|
|
|
if (log && S_ISLNK(loginfo.st_mode))
|
2011-12-12 06:38:09 +01:00
|
|
|
return error("reflog for %s is a symlink", oldrefname);
|
2006-11-28 15:47:40 +01:00
|
|
|
|
2014-07-15 21:59:36 +02:00
|
|
|
symref = resolve_ref_unsafe(oldrefname, RESOLVE_REF_READING,
|
|
|
|
orig_sha1, &flag);
|
2008-10-26 03:33:56 +01:00
|
|
|
if (flag & REF_ISSYMREF)
|
2008-10-29 01:05:27 +01:00
|
|
|
return error("refname %s is a symbolic ref, renaming it is not supported",
|
2011-12-12 06:38:09 +01:00
|
|
|
oldrefname);
|
2008-10-26 03:33:56 +01:00
|
|
|
if (!symref)
|
2011-12-12 06:38:09 +01:00
|
|
|
return error("refname %s not found", oldrefname);
|
2006-11-28 15:47:40 +01:00
|
|
|
|
2014-05-01 20:16:07 +02:00
|
|
|
if (!rename_ref_available(oldrefname, newrefname))
|
2006-11-28 15:47:40 +01:00
|
|
|
return 1;
|
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
if (log && rename(git_path("logs/%s", oldrefname), git_path(TMP_RENAMED_LOG)))
|
2010-07-07 09:47:20 +02:00
|
|
|
return error("unable to move logfile logs/%s to "TMP_RENAMED_LOG": %s",
|
2011-12-12 06:38:09 +01:00
|
|
|
oldrefname, strerror(errno));
|
2006-11-28 15:47:40 +01:00
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
if (delete_ref(oldrefname, orig_sha1, REF_NODEREF)) {
|
|
|
|
error("unable to delete old %s", oldrefname);
|
2006-11-28 15:47:40 +01:00
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
|
2014-07-15 21:59:36 +02:00
|
|
|
if (!read_ref_full(newrefname, RESOLVE_REF_READING, sha1, NULL) &&
|
2011-12-12 06:38:09 +01:00
|
|
|
delete_ref(newrefname, sha1, REF_NODEREF)) {
|
2006-11-28 15:47:40 +01:00
|
|
|
if (errno==EISDIR) {
|
2011-12-12 06:38:09 +01:00
|
|
|
if (remove_empty_directories(git_path("%s", newrefname))) {
|
|
|
|
error("Directory not empty: %s", newrefname);
|
2006-11-28 15:47:40 +01:00
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
} else {
|
2011-12-12 06:38:09 +01:00
|
|
|
error("unable to delete existing %s", newrefname);
|
2006-11-28 15:47:40 +01:00
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-18 23:48:58 +01:00
|
|
|
if (log && rename_tmp_log(newrefname))
|
2006-11-28 15:47:40 +01:00
|
|
|
goto rollback;
|
|
|
|
|
|
|
|
logmoved = log;
|
|
|
|
|
2015-05-11 17:25:15 +02:00
|
|
|
lock = lock_ref_sha1_basic(newrefname, NULL, NULL, NULL, 0, NULL, &err);
|
2006-11-28 15:47:40 +01:00
|
|
|
if (!lock) {
|
2015-05-11 17:25:17 +02:00
|
|
|
error("unable to rename '%s' to '%s': %s", oldrefname, newrefname, err.buf);
|
2015-05-11 17:25:15 +02:00
|
|
|
strbuf_release(&err);
|
2006-11-28 15:47:40 +01:00
|
|
|
goto rollback;
|
|
|
|
}
|
2015-05-25 20:39:22 +02:00
|
|
|
hashcpy(lock->old_oid.hash, orig_sha1);
|
2015-05-09 17:20:39 +02:00
|
|
|
|
|
|
|
if (write_ref_to_lockfile(lock, orig_sha1) ||
|
|
|
|
commit_ref_update(lock, orig_sha1, logmsg)) {
|
2011-12-12 06:38:09 +01:00
|
|
|
error("unable to write current sha1 into %s", newrefname);
|
2006-11-28 15:47:40 +01:00
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
rollback:
|
2015-05-11 17:25:15 +02:00
|
|
|
lock = lock_ref_sha1_basic(oldrefname, NULL, NULL, NULL, 0, NULL, &err);
|
2006-11-28 15:47:40 +01:00
|
|
|
if (!lock) {
|
2015-05-11 17:25:17 +02:00
|
|
|
error("unable to lock %s for rollback: %s", oldrefname, err.buf);
|
2015-05-11 17:25:15 +02:00
|
|
|
strbuf_release(&err);
|
2006-11-28 15:47:40 +01:00
|
|
|
goto rollbacklog;
|
|
|
|
}
|
|
|
|
|
|
|
|
flag = log_all_ref_updates;
|
|
|
|
log_all_ref_updates = 0;
|
2015-05-09 17:20:39 +02:00
|
|
|
if (write_ref_to_lockfile(lock, orig_sha1) ||
|
|
|
|
commit_ref_update(lock, orig_sha1, NULL))
|
2011-12-12 06:38:09 +01:00
|
|
|
error("unable to write current sha1 into %s", oldrefname);
|
2006-11-28 15:47:40 +01:00
|
|
|
log_all_ref_updates = flag;
|
|
|
|
|
|
|
|
rollbacklog:
|
2011-12-12 06:38:09 +01:00
|
|
|
if (logmoved && rename(git_path("logs/%s", newrefname), git_path("logs/%s", oldrefname)))
|
2006-11-28 15:47:40 +01:00
|
|
|
error("unable to restore logfile %s from %s: %s",
|
2011-12-12 06:38:09 +01:00
|
|
|
oldrefname, newrefname, strerror(errno));
|
2006-11-28 15:47:40 +01:00
|
|
|
if (!logmoved && log &&
|
2011-12-12 06:38:09 +01:00
|
|
|
rename(git_path(TMP_RENAMED_LOG), git_path("logs/%s", oldrefname)))
|
2010-07-07 09:47:20 +02:00
|
|
|
error("unable to restore logfile %s from "TMP_RENAMED_LOG": %s",
|
2011-12-12 06:38:09 +01:00
|
|
|
oldrefname, strerror(errno));
|
2006-11-28 15:47:40 +01:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-12-12 09:57:00 +01:00
|
|
|
static int close_ref(struct ref_lock *lock)
|
2008-01-16 20:14:30 +01:00
|
|
|
{
|
|
|
|
if (close_lock_file(lock->lk))
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-12-12 09:57:00 +01:00
|
|
|
static int commit_ref(struct ref_lock *lock)
|
2008-01-16 20:14:30 +01:00
|
|
|
{
|
|
|
|
if (commit_lock_file(lock->lk))
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-07-29 02:17:17 +02:00
|
|
|
/*
|
|
|
|
* copy the reflog message msg to buf, which has been allocated sufficiently
|
|
|
|
* large, while cleaning up the whitespaces. Especially, convert LF to space,
|
|
|
|
* because reflog file is one line per entry.
|
|
|
|
*/
|
|
|
|
static int copy_msg(char *buf, const char *msg)
|
|
|
|
{
|
|
|
|
char *cp = buf;
|
|
|
|
char c;
|
|
|
|
int wasspace = 1;
|
|
|
|
|
|
|
|
*cp++ = '\t';
|
|
|
|
while ((c = *msg++)) {
|
|
|
|
if (wasspace && isspace(c))
|
|
|
|
continue;
|
|
|
|
wasspace = isspace(c);
|
|
|
|
if (wasspace)
|
|
|
|
c = ' ';
|
|
|
|
*cp++ = c;
|
|
|
|
}
|
|
|
|
while (buf < cp && isspace(cp[-1]))
|
|
|
|
cp--;
|
|
|
|
*cp++ = '\n';
|
|
|
|
return cp - buf;
|
|
|
|
}
|
|
|
|
|
2014-06-20 16:42:50 +02:00
|
|
|
/* This function must set a meaningful errno on failure */
|
2014-11-30 09:24:28 +01:00
|
|
|
int log_ref_setup(const char *refname, struct strbuf *sb_logfile)
|
2006-05-17 11:55:40 +02:00
|
|
|
{
|
2010-05-22 02:28:36 +02:00
|
|
|
int logfd, oflags = O_APPEND | O_WRONLY;
|
2014-11-30 09:24:28 +01:00
|
|
|
char *logfile;
|
2007-01-26 23:26:05 +01:00
|
|
|
|
2014-11-30 09:24:28 +01:00
|
|
|
strbuf_git_path(sb_logfile, "logs/%s", refname);
|
|
|
|
logfile = sb_logfile->buf;
|
|
|
|
/* make sure the rest of the function can't change "logfile" */
|
|
|
|
sb_logfile = NULL;
|
2006-10-08 10:35:18 +02:00
|
|
|
if (log_all_ref_updates &&
|
2013-11-30 21:55:40 +01:00
|
|
|
(starts_with(refname, "refs/heads/") ||
|
|
|
|
starts_with(refname, "refs/remotes/") ||
|
|
|
|
starts_with(refname, "refs/notes/") ||
|
2011-12-12 06:38:09 +01:00
|
|
|
!strcmp(refname, "HEAD"))) {
|
2014-06-20 16:42:50 +02:00
|
|
|
if (safe_create_leading_directories(logfile) < 0) {
|
|
|
|
int save_errno = errno;
|
|
|
|
error("unable to create directory for %s", logfile);
|
|
|
|
errno = save_errno;
|
|
|
|
return -1;
|
|
|
|
}
|
2006-05-17 11:55:40 +02:00
|
|
|
oflags |= O_CREAT;
|
|
|
|
}
|
|
|
|
|
2010-06-10 14:54:03 +02:00
|
|
|
logfd = open(logfile, oflags, 0666);
|
2006-05-17 11:55:40 +02:00
|
|
|
if (logfd < 0) {
|
ignore stale directories when checking reflog existence
When we update a ref, we have two rules for whether or not
we actually update the reflog:
1. If the reflog already exists, we will always append to
it.
2. If log_all_ref_updates is set, we will create a new
reflog file if necessary.
We do the existence check by trying to open the reflog file,
either with or without O_CREAT (depending on log_all_ref_updates).
If it fails, then we check errno to see what happened.
If we were not using O_CREAT and we got ENOENT, the file
doesn't exist, and we return success (there isn't a reflog
already, and we were not told to make a new one).
If we get EISDIR, then there is likely a stale directory
that needs to be removed (e.g., there used to be "foo/bar",
it was deleted, and the directory "foo" was left. Now we
want to create the ref "foo"). If O_CREAT is set, then we
catch this case, try to remove the directory, and retry our
open. So far so good.
But if we get EISDIR and O_CREAT is not set, then we treat
this as any other error, which is not right. Like ENOENT,
EISDIR is an indication that we do not have a reflog, and we
should silently return success (we were not told to create
it). Instead, the current code reports this as an error, and
we fail to update the ref at all.
Note that this is relatively unlikely to happen, as you
would have to have had reflogs turned on, and then later
turned them off (it could also happen due to a bug in fetch,
but that was fixed in the previous commit). However, it's
quite easy to fix: we just need to treat EISDIR like ENOENT
for the non-O_CREAT case, and silently return (note that
this early return means we can also simplify the O_CREAT
case).
Our new tests cover both cases (O_CREAT and non-O_CREAT).
The first one already worked, of course.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-04 14:24:53 +01:00
|
|
|
if (!(oflags & O_CREAT) && (errno == ENOENT || errno == EISDIR))
|
2006-05-17 11:55:40 +02:00
|
|
|
return 0;
|
2006-10-19 10:28:47 +02:00
|
|
|
|
ignore stale directories when checking reflog existence
When we update a ref, we have two rules for whether or not
we actually update the reflog:
1. If the reflog already exists, we will always append to
it.
2. If log_all_ref_updates is set, we will create a new
reflog file if necessary.
We do the existence check by trying to open the reflog file,
either with or without O_CREAT (depending on log_all_ref_updates).
If it fails, then we check errno to see what happened.
If we were not using O_CREAT and we got ENOENT, the file
doesn't exist, and we return success (there isn't a reflog
already, and we were not told to make a new one).
If we get EISDIR, then there is likely a stale directory
that needs to be removed (e.g., there used to be "foo/bar",
it was deleted, and the directory "foo" was left. Now we
want to create the ref "foo"). If O_CREAT is set, then we
catch this case, try to remove the directory, and retry our
open. So far so good.
But if we get EISDIR and O_CREAT is not set, then we treat
this as any other error, which is not right. Like ENOENT,
EISDIR is an indication that we do not have a reflog, and we
should silently return success (we were not told to create
it). Instead, the current code reports this as an error, and
we fail to update the ref at all.
Note that this is relatively unlikely to happen, as you
would have to have had reflogs turned on, and then later
turned them off (it could also happen due to a bug in fetch,
but that was fixed in the previous commit). However, it's
quite easy to fix: we just need to treat EISDIR like ENOENT
for the non-O_CREAT case, and silently return (note that
this early return means we can also simplify the O_CREAT
case).
Our new tests cover both cases (O_CREAT and non-O_CREAT).
The first one already worked, of course.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-04 14:24:53 +01:00
|
|
|
if (errno == EISDIR) {
|
2010-06-10 14:54:03 +02:00
|
|
|
if (remove_empty_directories(logfile)) {
|
2014-06-20 16:42:50 +02:00
|
|
|
int save_errno = errno;
|
|
|
|
error("There are still logs under '%s'",
|
|
|
|
logfile);
|
|
|
|
errno = save_errno;
|
|
|
|
return -1;
|
2006-10-19 10:28:47 +02:00
|
|
|
}
|
2010-06-10 14:54:03 +02:00
|
|
|
logfd = open(logfile, oflags, 0666);
|
2006-10-19 10:28:47 +02:00
|
|
|
}
|
|
|
|
|
2014-06-20 16:42:50 +02:00
|
|
|
if (logfd < 0) {
|
|
|
|
int save_errno = errno;
|
|
|
|
error("Unable to append to %s: %s", logfile,
|
|
|
|
strerror(errno));
|
|
|
|
errno = save_errno;
|
|
|
|
return -1;
|
|
|
|
}
|
2006-05-17 11:55:40 +02:00
|
|
|
}
|
|
|
|
|
2010-06-10 14:54:03 +02:00
|
|
|
adjust_shared_perm(logfile);
|
2010-05-22 02:28:36 +02:00
|
|
|
close(logfd);
|
|
|
|
return 0;
|
|
|
|
}
|
2007-03-09 23:38:57 +01:00
|
|
|
|
2014-12-12 09:56:42 +01:00
|
|
|
static int log_ref_write_fd(int fd, const unsigned char *old_sha1,
|
|
|
|
const unsigned char *new_sha1,
|
|
|
|
const char *committer, const char *msg)
|
|
|
|
{
|
|
|
|
int msglen, written;
|
|
|
|
unsigned maxlen, len;
|
|
|
|
char *logrec;
|
|
|
|
|
|
|
|
msglen = msg ? strlen(msg) : 0;
|
|
|
|
maxlen = strlen(committer) + msglen + 100;
|
|
|
|
logrec = xmalloc(maxlen);
|
|
|
|
len = sprintf(logrec, "%s %s %s\n",
|
|
|
|
sha1_to_hex(old_sha1),
|
|
|
|
sha1_to_hex(new_sha1),
|
|
|
|
committer);
|
|
|
|
if (msglen)
|
|
|
|
len += copy_msg(logrec + len - 1, msg) - 1;
|
|
|
|
|
|
|
|
written = len <= maxlen ? write_in_full(fd, logrec, len) : -1;
|
|
|
|
free(logrec);
|
|
|
|
if (written != len)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:28 +01:00
|
|
|
static int log_ref_write_1(const char *refname, const unsigned char *old_sha1,
|
|
|
|
const unsigned char *new_sha1, const char *msg,
|
|
|
|
struct strbuf *sb_log_file)
|
2010-05-22 02:28:36 +02:00
|
|
|
{
|
2014-12-12 09:56:42 +01:00
|
|
|
int logfd, result, oflags = O_APPEND | O_WRONLY;
|
2015-05-11 23:23:39 +02:00
|
|
|
char *log_file;
|
2010-05-22 02:28:36 +02:00
|
|
|
|
|
|
|
if (log_all_ref_updates < 0)
|
|
|
|
log_all_ref_updates = !is_bare_repository();
|
|
|
|
|
2014-11-30 09:24:28 +01:00
|
|
|
result = log_ref_setup(refname, sb_log_file);
|
2010-05-22 02:28:36 +02:00
|
|
|
if (result)
|
|
|
|
return result;
|
2014-11-30 09:24:28 +01:00
|
|
|
log_file = sb_log_file->buf;
|
|
|
|
/* make sure the rest of the function can't change "log_file" */
|
|
|
|
sb_log_file = NULL;
|
2010-05-22 02:28:36 +02:00
|
|
|
|
|
|
|
logfd = open(log_file, oflags);
|
|
|
|
if (logfd < 0)
|
|
|
|
return 0;
|
2014-12-12 09:56:42 +01:00
|
|
|
result = log_ref_write_fd(logfd, old_sha1, new_sha1,
|
|
|
|
git_committer_info(0), msg);
|
|
|
|
if (result) {
|
2014-06-20 16:42:55 +02:00
|
|
|
int save_errno = errno;
|
|
|
|
close(logfd);
|
|
|
|
error("Unable to append to %s", log_file);
|
|
|
|
errno = save_errno;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (close(logfd)) {
|
|
|
|
int save_errno = errno;
|
|
|
|
error("Unable to append to %s", log_file);
|
|
|
|
errno = save_errno;
|
|
|
|
return -1;
|
|
|
|
}
|
2006-05-17 11:55:40 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:28 +01:00
|
|
|
static int log_ref_write(const char *refname, const unsigned char *old_sha1,
|
|
|
|
const unsigned char *new_sha1, const char *msg)
|
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
int ret = log_ref_write_1(refname, old_sha1, new_sha1, msg, &sb);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-07-16 01:02:38 +02:00
|
|
|
int is_branch(const char *refname)
|
2008-01-16 00:50:17 +01:00
|
|
|
{
|
2013-11-30 21:55:40 +01:00
|
|
|
return !strcmp(refname, "HEAD") || starts_with(refname, "refs/heads/");
|
2008-01-16 00:50:17 +01:00
|
|
|
}
|
|
|
|
|
2014-04-29 00:36:58 +02:00
|
|
|
/*
|
2015-04-24 13:35:45 +02:00
|
|
|
* Write sha1 into the open lockfile, then close the lockfile. On
|
|
|
|
* errors, rollback the lockfile and set errno to reflect the problem.
|
2014-04-29 00:36:58 +02:00
|
|
|
*/
|
2015-04-24 13:35:45 +02:00
|
|
|
static int write_ref_to_lockfile(struct ref_lock *lock,
|
|
|
|
const unsigned char *sha1)
|
2006-05-17 11:55:02 +02:00
|
|
|
{
|
|
|
|
static char term = '\n';
|
2008-01-16 00:50:17 +01:00
|
|
|
struct object *o;
|
2006-05-17 11:55:02 +02:00
|
|
|
|
2008-01-16 00:50:17 +01:00
|
|
|
o = parse_object(sha1);
|
|
|
|
if (!o) {
|
2011-06-16 15:42:48 +02:00
|
|
|
error("Trying to write ref %s with nonexistent object %s",
|
2008-01-16 00:50:17 +01:00
|
|
|
lock->ref_name, sha1_to_hex(sha1));
|
|
|
|
unlock_ref(lock);
|
2014-06-20 16:42:55 +02:00
|
|
|
errno = EINVAL;
|
2008-01-16 00:50:17 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (o->type != OBJ_COMMIT && is_branch(lock->ref_name)) {
|
|
|
|
error("Trying to write non-commit object %s to branch %s",
|
|
|
|
sha1_to_hex(sha1), lock->ref_name);
|
|
|
|
unlock_ref(lock);
|
2014-06-20 16:42:55 +02:00
|
|
|
errno = EINVAL;
|
2008-01-16 00:50:17 +01:00
|
|
|
return -1;
|
|
|
|
}
|
2015-04-17 01:17:37 +02:00
|
|
|
if (write_in_full(lock->lk->fd, sha1_to_hex(sha1), 40) != 40 ||
|
|
|
|
write_in_full(lock->lk->fd, &term, 1) != 1 ||
|
2014-06-20 16:42:55 +02:00
|
|
|
close_ref(lock) < 0) {
|
|
|
|
int save_errno = errno;
|
2014-10-01 12:28:32 +02:00
|
|
|
error("Couldn't write %s", lock->lk->filename.buf);
|
2006-05-17 11:55:02 +02:00
|
|
|
unlock_ref(lock);
|
2014-06-20 16:42:55 +02:00
|
|
|
errno = save_errno;
|
2006-05-17 11:55:02 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2015-04-24 13:35:45 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-05-09 17:18:36 +02:00
|
|
|
* Commit a change to a loose reference that has already been written
|
|
|
|
* to the loose reference lockfile. Also update the reflogs if
|
|
|
|
* necessary, using the specified lockmsg (which can be NULL).
|
2015-04-24 13:35:45 +02:00
|
|
|
*/
|
2015-05-09 17:18:36 +02:00
|
|
|
static int commit_ref_update(struct ref_lock *lock,
|
|
|
|
const unsigned char *sha1, const char *logmsg)
|
2015-04-24 13:35:45 +02:00
|
|
|
{
|
2013-04-22 21:52:41 +02:00
|
|
|
clear_loose_ref_cache(&ref_cache);
|
2015-05-25 20:39:22 +02:00
|
|
|
if (log_ref_write(lock->ref_name, lock->old_oid.hash, sha1, logmsg) < 0 ||
|
2007-01-26 23:26:07 +01:00
|
|
|
(strcmp(lock->ref_name, lock->orig_ref_name) &&
|
2015-05-25 20:39:22 +02:00
|
|
|
log_ref_write(lock->orig_ref_name, lock->old_oid.hash, sha1, logmsg) < 0)) {
|
2006-05-17 11:55:40 +02:00
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|
2007-03-21 22:11:44 +01:00
|
|
|
if (strcmp(lock->orig_ref_name, "HEAD") != 0) {
|
|
|
|
/*
|
|
|
|
* Special hack: If a branch is updated directly and HEAD
|
|
|
|
* points to it (may happen on the remote side of a push
|
|
|
|
* for example) then logically the HEAD reflog should be
|
|
|
|
* updated too.
|
|
|
|
* A generic solution implies reverse symref information,
|
|
|
|
* but finding all symrefs pointing to the given branch
|
|
|
|
* would be rather costly for this rare event (the direct
|
|
|
|
* update of a branch) to be worth it. So let's cheat and
|
|
|
|
* check with HEAD only which should cover 99% of all usage
|
|
|
|
* scenarios (even 100% of the default ones).
|
|
|
|
*/
|
|
|
|
unsigned char head_sha1[20];
|
|
|
|
int head_flag;
|
|
|
|
const char *head_ref;
|
2014-07-15 21:59:36 +02:00
|
|
|
head_ref = resolve_ref_unsafe("HEAD", RESOLVE_REF_READING,
|
|
|
|
head_sha1, &head_flag);
|
2007-03-21 22:11:44 +01:00
|
|
|
if (head_ref && (head_flag & REF_ISSYMREF) &&
|
|
|
|
!strcmp(head_ref, lock->ref_name))
|
2015-05-25 20:39:22 +02:00
|
|
|
log_ref_write("HEAD", lock->old_oid.hash, sha1, logmsg);
|
2007-03-21 22:11:44 +01:00
|
|
|
}
|
2008-01-16 20:14:30 +01:00
|
|
|
if (commit_ref(lock)) {
|
Enable the packed refs file format
This actually "turns on" the packed ref file format, now that the
infrastructure to do so sanely exists (ie notably the change to make the
reference reading logic take refnames rather than pathnames to the loose
objects that no longer necessarily even exist).
In particular, when the ref lookup hits a refname that has no loose file
associated with it, it falls back on the packed-ref information. Also, the
ref-locking code, while still using a loose file for the locking itself
(and _creating_ a loose file for the new ref) no longer requires that the
old ref be in such an unpacked state.
Finally, this does a minimal hack to git-checkout.sh to rather than check
the ref-file directly, do a "git-rev-parse" on the "heads/$refname".
That's not really wonderful - we should rather really have a special
routine to verify the names as proper branch head names, but it is a
workable solution for now.
With this, I can literally do something like
git pack-refs
find .git/refs -type f -print0 | xargs -0 rm -f --
and the end result is a largely working repository (ie I've done two
commits - which creates _one_ unpacked ref file - done things like run
"gitk" and "git log" etc, and it all looks ok).
There are probably things missing, but I'm hoping that the missing things
are now of the "small and obvious" kind, and that somebody else might want
to start looking at this too. Hint hint ;)
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-09-14 19:14:47 +02:00
|
|
|
error("Couldn't set %s", lock->ref_name);
|
2006-05-17 11:55:02 +02:00
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
unlock_ref(lock);
|
|
|
|
return 0;
|
2005-06-06 22:31:29 +02:00
|
|
|
}
|
2006-05-17 11:56:09 +02:00
|
|
|
|
2007-01-26 23:26:10 +01:00
|
|
|
int create_symref(const char *ref_target, const char *refs_heads_master,
|
|
|
|
const char *logmsg)
|
2007-01-26 23:26:09 +01:00
|
|
|
{
|
|
|
|
const char *lockpath;
|
|
|
|
char ref[1000];
|
|
|
|
int fd, len, written;
|
2008-10-27 11:22:09 +01:00
|
|
|
char *git_HEAD = git_pathdup("%s", ref_target);
|
2007-01-26 23:26:10 +01:00
|
|
|
unsigned char old_sha1[20], new_sha1[20];
|
|
|
|
|
|
|
|
if (logmsg && read_ref(ref_target, old_sha1))
|
|
|
|
hashclr(old_sha1);
|
2007-01-26 23:26:09 +01:00
|
|
|
|
2007-02-08 08:41:43 +01:00
|
|
|
if (safe_create_leading_directories(git_HEAD) < 0)
|
|
|
|
return error("unable to create directory for %s", git_HEAD);
|
|
|
|
|
2007-01-26 23:26:09 +01:00
|
|
|
#ifndef NO_SYMLINK_HEAD
|
|
|
|
if (prefer_symlink_refs) {
|
|
|
|
unlink(git_HEAD);
|
|
|
|
if (!symlink(refs_heads_master, git_HEAD))
|
2007-01-26 23:26:10 +01:00
|
|
|
goto done;
|
2007-01-26 23:26:09 +01:00
|
|
|
fprintf(stderr, "no symlink - falling back to symbolic ref\n");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
len = snprintf(ref, sizeof(ref), "ref: %s\n", refs_heads_master);
|
|
|
|
if (sizeof(ref) <= len) {
|
|
|
|
error("refname too long: %s", refs_heads_master);
|
2007-01-27 02:49:00 +01:00
|
|
|
goto error_free_return;
|
2007-01-26 23:26:09 +01:00
|
|
|
}
|
|
|
|
lockpath = mkpath("%s.lock", git_HEAD);
|
|
|
|
fd = open(lockpath, O_CREAT | O_EXCL | O_WRONLY, 0666);
|
|
|
|
if (fd < 0) {
|
|
|
|
error("Unable to open %s for writing", lockpath);
|
2007-01-27 02:49:00 +01:00
|
|
|
goto error_free_return;
|
2007-01-26 23:26:09 +01:00
|
|
|
}
|
|
|
|
written = write_in_full(fd, ref, len);
|
2007-06-24 21:20:41 +02:00
|
|
|
if (close(fd) != 0 || written != len) {
|
2007-01-26 23:26:09 +01:00
|
|
|
error("Unable to write to %s", lockpath);
|
2007-01-27 02:49:00 +01:00
|
|
|
goto error_unlink_return;
|
2007-01-26 23:26:09 +01:00
|
|
|
}
|
|
|
|
if (rename(lockpath, git_HEAD) < 0) {
|
|
|
|
error("Unable to create %s", git_HEAD);
|
2007-01-27 02:49:00 +01:00
|
|
|
goto error_unlink_return;
|
2007-01-26 23:26:09 +01:00
|
|
|
}
|
|
|
|
if (adjust_shared_perm(git_HEAD)) {
|
|
|
|
error("Unable to fix permissions on %s", lockpath);
|
2007-01-27 02:49:00 +01:00
|
|
|
error_unlink_return:
|
2009-04-29 23:22:56 +02:00
|
|
|
unlink_or_warn(lockpath);
|
2007-01-27 02:49:00 +01:00
|
|
|
error_free_return:
|
|
|
|
free(git_HEAD);
|
|
|
|
return -1;
|
2007-01-26 23:26:09 +01:00
|
|
|
}
|
2007-01-26 23:26:10 +01:00
|
|
|
|
2007-03-03 19:28:46 +01:00
|
|
|
#ifndef NO_SYMLINK_HEAD
|
2007-01-26 23:26:10 +01:00
|
|
|
done:
|
2007-03-03 19:28:46 +01:00
|
|
|
#endif
|
2007-01-26 23:26:10 +01:00
|
|
|
if (logmsg && !read_ref(refs_heads_master, new_sha1))
|
|
|
|
log_ref_write(ref_target, old_sha1, new_sha1, logmsg);
|
|
|
|
|
2007-01-27 02:49:00 +01:00
|
|
|
free(git_HEAD);
|
2007-01-26 23:26:09 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-06-03 18:09:59 +02:00
|
|
|
struct read_ref_at_cb {
|
|
|
|
const char *refname;
|
|
|
|
unsigned long at_time;
|
|
|
|
int cnt;
|
|
|
|
int reccnt;
|
|
|
|
unsigned char *sha1;
|
|
|
|
int found_it;
|
|
|
|
|
|
|
|
unsigned char osha1[20];
|
|
|
|
unsigned char nsha1[20];
|
|
|
|
int tz;
|
|
|
|
unsigned long date;
|
|
|
|
char **msg;
|
|
|
|
unsigned long *cutoff_time;
|
|
|
|
int *cutoff_tz;
|
|
|
|
int *cutoff_cnt;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int read_ref_at_ent(unsigned char *osha1, unsigned char *nsha1,
|
|
|
|
const char *email, unsigned long timestamp, int tz,
|
|
|
|
const char *message, void *cb_data)
|
|
|
|
{
|
|
|
|
struct read_ref_at_cb *cb = cb_data;
|
|
|
|
|
|
|
|
cb->reccnt++;
|
|
|
|
cb->tz = tz;
|
|
|
|
cb->date = timestamp;
|
|
|
|
|
|
|
|
if (timestamp <= cb->at_time || cb->cnt == 0) {
|
|
|
|
if (cb->msg)
|
|
|
|
*cb->msg = xstrdup(message);
|
|
|
|
if (cb->cutoff_time)
|
|
|
|
*cb->cutoff_time = timestamp;
|
|
|
|
if (cb->cutoff_tz)
|
|
|
|
*cb->cutoff_tz = tz;
|
|
|
|
if (cb->cutoff_cnt)
|
|
|
|
*cb->cutoff_cnt = cb->reccnt - 1;
|
|
|
|
/*
|
|
|
|
* we have not yet updated cb->[n|o]sha1 so they still
|
|
|
|
* hold the values for the previous record.
|
|
|
|
*/
|
|
|
|
if (!is_null_sha1(cb->osha1)) {
|
|
|
|
hashcpy(cb->sha1, nsha1);
|
|
|
|
if (hashcmp(cb->osha1, nsha1))
|
|
|
|
warning("Log for ref %s has gap after %s.",
|
|
|
|
cb->refname, show_date(cb->date, cb->tz, DATE_RFC2822));
|
|
|
|
}
|
|
|
|
else if (cb->date == cb->at_time)
|
|
|
|
hashcpy(cb->sha1, nsha1);
|
|
|
|
else if (hashcmp(nsha1, cb->sha1))
|
|
|
|
warning("Log for ref %s unexpectedly ended on %s.",
|
|
|
|
cb->refname, show_date(cb->date, cb->tz,
|
|
|
|
DATE_RFC2822));
|
|
|
|
hashcpy(cb->osha1, osha1);
|
|
|
|
hashcpy(cb->nsha1, nsha1);
|
|
|
|
cb->found_it = 1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
hashcpy(cb->osha1, osha1);
|
|
|
|
hashcpy(cb->nsha1, nsha1);
|
|
|
|
if (cb->cnt > 0)
|
|
|
|
cb->cnt--;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int read_ref_at_ent_oldest(unsigned char *osha1, unsigned char *nsha1,
|
|
|
|
const char *email, unsigned long timestamp,
|
|
|
|
int tz, const char *message, void *cb_data)
|
|
|
|
{
|
|
|
|
struct read_ref_at_cb *cb = cb_data;
|
|
|
|
|
|
|
|
if (cb->msg)
|
|
|
|
*cb->msg = xstrdup(message);
|
|
|
|
if (cb->cutoff_time)
|
|
|
|
*cb->cutoff_time = timestamp;
|
|
|
|
if (cb->cutoff_tz)
|
|
|
|
*cb->cutoff_tz = tz;
|
|
|
|
if (cb->cutoff_cnt)
|
|
|
|
*cb->cutoff_cnt = cb->reccnt;
|
|
|
|
hashcpy(cb->sha1, osha1);
|
|
|
|
if (is_null_sha1(cb->sha1))
|
|
|
|
hashcpy(cb->sha1, nsha1);
|
|
|
|
/* We just want the first entry */
|
|
|
|
return 1;
|
2007-01-19 10:19:05 +01:00
|
|
|
}
|
|
|
|
|
2014-09-19 05:45:37 +02:00
|
|
|
int read_ref_at(const char *refname, unsigned int flags, unsigned long at_time, int cnt,
|
2011-12-12 06:38:09 +01:00
|
|
|
unsigned char *sha1, char **msg,
|
|
|
|
unsigned long *cutoff_time, int *cutoff_tz, int *cutoff_cnt)
|
2006-05-17 11:56:09 +02:00
|
|
|
{
|
2014-06-03 18:09:59 +02:00
|
|
|
struct read_ref_at_cb cb;
|
2006-05-17 11:56:09 +02:00
|
|
|
|
2014-06-03 18:09:59 +02:00
|
|
|
memset(&cb, 0, sizeof(cb));
|
|
|
|
cb.refname = refname;
|
|
|
|
cb.at_time = at_time;
|
|
|
|
cb.cnt = cnt;
|
|
|
|
cb.msg = msg;
|
|
|
|
cb.cutoff_time = cutoff_time;
|
|
|
|
cb.cutoff_tz = cutoff_tz;
|
|
|
|
cb.cutoff_cnt = cutoff_cnt;
|
|
|
|
cb.sha1 = sha1;
|
|
|
|
|
|
|
|
for_each_reflog_ent_reverse(refname, read_ref_at_ent, &cb);
|
|
|
|
|
2014-09-19 05:45:37 +02:00
|
|
|
if (!cb.reccnt) {
|
|
|
|
if (flags & GET_SHA1_QUIETLY)
|
|
|
|
exit(128);
|
|
|
|
else
|
|
|
|
die("Log for %s is empty.", refname);
|
|
|
|
}
|
2014-06-03 18:09:59 +02:00
|
|
|
if (cb.found_it)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for_each_reflog_ent(refname, read_ref_at_ent_oldest, &cb);
|
2006-05-17 11:56:09 +02:00
|
|
|
|
2007-01-19 10:19:05 +01:00
|
|
|
return 1;
|
2006-05-17 11:56:09 +02:00
|
|
|
}
|
2006-12-18 10:18:16 +01:00
|
|
|
|
2014-05-07 00:45:52 +02:00
|
|
|
int reflog_exists(const char *refname)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
return !lstat(git_path("logs/%s", refname), &st) &&
|
|
|
|
S_ISREG(st.st_mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
int delete_reflog(const char *refname)
|
|
|
|
{
|
|
|
|
return remove_path(git_path("logs/%s", refname));
|
|
|
|
}
|
|
|
|
|
2013-03-08 19:36:43 +01:00
|
|
|
static int show_one_reflog_ent(struct strbuf *sb, each_reflog_ent_fn fn, void *cb_data)
|
|
|
|
{
|
|
|
|
unsigned char osha1[20], nsha1[20];
|
|
|
|
char *email_end, *message;
|
|
|
|
unsigned long timestamp;
|
|
|
|
int tz;
|
|
|
|
|
|
|
|
/* old SP new SP name <email> SP time TAB msg LF */
|
|
|
|
if (sb->len < 83 || sb->buf[sb->len - 1] != '\n' ||
|
|
|
|
get_sha1_hex(sb->buf, osha1) || sb->buf[40] != ' ' ||
|
|
|
|
get_sha1_hex(sb->buf + 41, nsha1) || sb->buf[81] != ' ' ||
|
|
|
|
!(email_end = strchr(sb->buf + 82, '>')) ||
|
|
|
|
email_end[1] != ' ' ||
|
|
|
|
!(timestamp = strtoul(email_end + 2, &message, 10)) ||
|
|
|
|
!message || message[0] != ' ' ||
|
|
|
|
(message[1] != '+' && message[1] != '-') ||
|
|
|
|
!isdigit(message[2]) || !isdigit(message[3]) ||
|
|
|
|
!isdigit(message[4]) || !isdigit(message[5]))
|
|
|
|
return 0; /* corrupt? */
|
|
|
|
email_end[1] = '\0';
|
|
|
|
tz = strtol(message + 1, NULL, 10);
|
|
|
|
if (message[6] != '\t')
|
|
|
|
message += 6;
|
|
|
|
else
|
|
|
|
message += 7;
|
|
|
|
return fn(osha1, nsha1, sb->buf + 82, timestamp, tz, message, cb_data);
|
|
|
|
}
|
|
|
|
|
2013-03-08 22:27:37 +01:00
|
|
|
static char *find_beginning_of_line(char *bob, char *scan)
|
|
|
|
{
|
|
|
|
while (bob < scan && *(--scan) != '\n')
|
|
|
|
; /* keep scanning backwards */
|
|
|
|
/*
|
|
|
|
* Return either beginning of the buffer, or LF at the end of
|
|
|
|
* the previous line.
|
|
|
|
*/
|
|
|
|
return scan;
|
|
|
|
}
|
|
|
|
|
|
|
|
int for_each_reflog_ent_reverse(const char *refname, each_reflog_ent_fn fn, void *cb_data)
|
2006-12-18 10:18:16 +01:00
|
|
|
{
|
2010-03-13 18:37:50 +01:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2013-03-08 22:27:37 +01:00
|
|
|
FILE *logfp;
|
|
|
|
long pos;
|
|
|
|
int ret = 0, at_tail = 1;
|
2006-12-18 10:18:16 +01:00
|
|
|
|
2013-03-08 19:45:25 +01:00
|
|
|
logfp = fopen(git_path("logs/%s", refname), "r");
|
2006-12-18 10:18:16 +01:00
|
|
|
if (!logfp)
|
2007-01-08 01:59:54 +01:00
|
|
|
return -1;
|
2009-01-20 07:18:29 +01:00
|
|
|
|
2013-03-08 22:27:37 +01:00
|
|
|
/* Jump to the end */
|
|
|
|
if (fseek(logfp, 0, SEEK_END) < 0)
|
|
|
|
return error("cannot seek back reflog for %s: %s",
|
|
|
|
refname, strerror(errno));
|
|
|
|
pos = ftell(logfp);
|
|
|
|
while (!ret && 0 < pos) {
|
|
|
|
int cnt;
|
|
|
|
size_t nread;
|
|
|
|
char buf[BUFSIZ];
|
|
|
|
char *endp, *scanp;
|
|
|
|
|
|
|
|
/* Fill next block from the end */
|
|
|
|
cnt = (sizeof(buf) < pos) ? sizeof(buf) : pos;
|
|
|
|
if (fseek(logfp, pos - cnt, SEEK_SET))
|
|
|
|
return error("cannot seek back reflog for %s: %s",
|
|
|
|
refname, strerror(errno));
|
|
|
|
nread = fread(buf, cnt, 1, logfp);
|
2013-03-23 18:16:46 +01:00
|
|
|
if (nread != 1)
|
2013-03-08 22:27:37 +01:00
|
|
|
return error("cannot read %d bytes from reflog for %s: %s",
|
|
|
|
cnt, refname, strerror(errno));
|
|
|
|
pos -= cnt;
|
|
|
|
|
|
|
|
scanp = endp = buf + cnt;
|
|
|
|
if (at_tail && scanp[-1] == '\n')
|
|
|
|
/* Looking at the final LF at the end of the file */
|
|
|
|
scanp--;
|
|
|
|
at_tail = 0;
|
|
|
|
|
|
|
|
while (buf < scanp) {
|
|
|
|
/*
|
|
|
|
* terminating LF of the previous line, or the beginning
|
|
|
|
* of the buffer.
|
|
|
|
*/
|
|
|
|
char *bp;
|
|
|
|
|
|
|
|
bp = find_beginning_of_line(buf, scanp);
|
|
|
|
|
for_each_reflog_ent_reverse: fix newlines on block boundaries
When we read a reflog file in reverse, we read whole chunks
of BUFSIZ bytes, then loop over the buffer, parsing any
lines we find. We find the beginning of each line by looking
for the newline from the previous line. If we don't find
one, we know that we are either at the beginning of
the file, or that we have to read another block.
In the latter case, we stuff away what we have into a
strbuf, read another block, and continue our parse. But we
missed one case here. If we did find a newline, and it is at
the beginning of the block, we must also stuff that newline
into the strbuf, as it belongs to the block we are about to
read.
The minimal fix here would be to add this special case to
the conditional that checks whether we found a newline.
But we can make the flow a little clearer by rearranging a
bit: we first handle lines that we are going to show, and
then at the end of each loop, stuff away any leftovers if
necessary. That lets us fold this special-case in with the
more common "we ended in the middle of a line" case.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-05 02:28:54 +01:00
|
|
|
if (*bp == '\n') {
|
2013-03-08 22:27:37 +01:00
|
|
|
/*
|
for_each_reflog_ent_reverse: fix newlines on block boundaries
When we read a reflog file in reverse, we read whole chunks
of BUFSIZ bytes, then loop over the buffer, parsing any
lines we find. We find the beginning of each line by looking
for the newline from the previous line. If we don't find
one, we know that we are either at the beginning of
the file, or that we have to read another block.
In the latter case, we stuff away what we have into a
strbuf, read another block, and continue our parse. But we
missed one case here. If we did find a newline, and it is at
the beginning of the block, we must also stuff that newline
into the strbuf, as it belongs to the block we are about to
read.
The minimal fix here would be to add this special case to
the conditional that checks whether we found a newline.
But we can make the flow a little clearer by rearranging a
bit: we first handle lines that we are going to show, and
then at the end of each loop, stuff away any leftovers if
necessary. That lets us fold this special-case in with the
more common "we ended in the middle of a line" case.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-05 02:28:54 +01:00
|
|
|
* The newline is the end of the previous line,
|
|
|
|
* so we know we have complete line starting
|
|
|
|
* at (bp + 1). Prefix it onto any prior data
|
|
|
|
* we collected for the line and process it.
|
2013-03-08 22:27:37 +01:00
|
|
|
*/
|
|
|
|
strbuf_splice(&sb, 0, 0, bp + 1, endp - (bp + 1));
|
|
|
|
scanp = bp;
|
|
|
|
endp = bp + 1;
|
for_each_reflog_ent_reverse: fix newlines on block boundaries
When we read a reflog file in reverse, we read whole chunks
of BUFSIZ bytes, then loop over the buffer, parsing any
lines we find. We find the beginning of each line by looking
for the newline from the previous line. If we don't find
one, we know that we are either at the beginning of
the file, or that we have to read another block.
In the latter case, we stuff away what we have into a
strbuf, read another block, and continue our parse. But we
missed one case here. If we did find a newline, and it is at
the beginning of the block, we must also stuff that newline
into the strbuf, as it belongs to the block we are about to
read.
The minimal fix here would be to add this special case to
the conditional that checks whether we found a newline.
But we can make the flow a little clearer by rearranging a
bit: we first handle lines that we are going to show, and
then at the end of each loop, stuff away any leftovers if
necessary. That lets us fold this special-case in with the
more common "we ended in the middle of a line" case.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-05 02:28:54 +01:00
|
|
|
ret = show_one_reflog_ent(&sb, fn, cb_data);
|
|
|
|
strbuf_reset(&sb);
|
|
|
|
if (ret)
|
|
|
|
break;
|
|
|
|
} else if (!pos) {
|
|
|
|
/*
|
|
|
|
* We are at the start of the buffer, and the
|
|
|
|
* start of the file; there is no previous
|
|
|
|
* line, and we have everything for this one.
|
|
|
|
* Process it, and we can end the loop.
|
|
|
|
*/
|
|
|
|
strbuf_splice(&sb, 0, 0, buf, endp - buf);
|
|
|
|
ret = show_one_reflog_ent(&sb, fn, cb_data);
|
|
|
|
strbuf_reset(&sb);
|
|
|
|
break;
|
2013-03-08 22:27:37 +01:00
|
|
|
}
|
for_each_reflog_ent_reverse: fix newlines on block boundaries
When we read a reflog file in reverse, we read whole chunks
of BUFSIZ bytes, then loop over the buffer, parsing any
lines we find. We find the beginning of each line by looking
for the newline from the previous line. If we don't find
one, we know that we are either at the beginning of
the file, or that we have to read another block.
In the latter case, we stuff away what we have into a
strbuf, read another block, and continue our parse. But we
missed one case here. If we did find a newline, and it is at
the beginning of the block, we must also stuff that newline
into the strbuf, as it belongs to the block we are about to
read.
The minimal fix here would be to add this special case to
the conditional that checks whether we found a newline.
But we can make the flow a little clearer by rearranging a
bit: we first handle lines that we are going to show, and
then at the end of each loop, stuff away any leftovers if
necessary. That lets us fold this special-case in with the
more common "we ended in the middle of a line" case.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-05 02:28:54 +01:00
|
|
|
|
|
|
|
if (bp == buf) {
|
|
|
|
/*
|
|
|
|
* We are at the start of the buffer, and there
|
|
|
|
* is more file to read backwards. Which means
|
|
|
|
* we are in the middle of a line. Note that we
|
|
|
|
* may get here even if *bp was a newline; that
|
|
|
|
* just means we are at the exact end of the
|
|
|
|
* previous line, rather than some spot in the
|
|
|
|
* middle.
|
|
|
|
*
|
|
|
|
* Save away what we have to be combined with
|
|
|
|
* the data from the next read.
|
|
|
|
*/
|
|
|
|
strbuf_splice(&sb, 0, 0, buf, endp - buf);
|
2013-03-08 22:27:37 +01:00
|
|
|
break;
|
for_each_reflog_ent_reverse: fix newlines on block boundaries
When we read a reflog file in reverse, we read whole chunks
of BUFSIZ bytes, then loop over the buffer, parsing any
lines we find. We find the beginning of each line by looking
for the newline from the previous line. If we don't find
one, we know that we are either at the beginning of
the file, or that we have to read another block.
In the latter case, we stuff away what we have into a
strbuf, read another block, and continue our parse. But we
missed one case here. If we did find a newline, and it is at
the beginning of the block, we must also stuff that newline
into the strbuf, as it belongs to the block we are about to
read.
The minimal fix here would be to add this special case to
the conditional that checks whether we found a newline.
But we can make the flow a little clearer by rearranging a
bit: we first handle lines that we are going to show, and
then at the end of each loop, stuff away any leftovers if
necessary. That lets us fold this special-case in with the
more common "we ended in the middle of a line" case.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-05 02:28:54 +01:00
|
|
|
}
|
2009-07-16 23:25:18 +02:00
|
|
|
}
|
2009-01-20 07:18:29 +01:00
|
|
|
|
2006-12-18 10:18:16 +01:00
|
|
|
}
|
2013-03-08 22:27:37 +01:00
|
|
|
if (!ret && sb.len)
|
2014-12-05 02:32:44 +01:00
|
|
|
die("BUG: reverse reflog parser had leftover data");
|
2013-03-08 22:27:37 +01:00
|
|
|
|
2006-12-18 10:18:16 +01:00
|
|
|
fclose(logfp);
|
2010-03-13 18:37:50 +01:00
|
|
|
strbuf_release(&sb);
|
2007-01-19 08:25:54 +01:00
|
|
|
return ret;
|
2006-12-18 10:18:16 +01:00
|
|
|
}
|
2006-12-19 07:07:45 +01:00
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
int for_each_reflog_ent(const char *refname, each_reflog_ent_fn fn, void *cb_data)
|
2009-01-20 07:18:29 +01:00
|
|
|
{
|
2013-03-08 22:27:37 +01:00
|
|
|
FILE *logfp;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
logfp = fopen(git_path("logs/%s", refname), "r");
|
|
|
|
if (!logfp)
|
|
|
|
return -1;
|
2009-01-20 07:18:29 +01:00
|
|
|
|
2013-03-08 22:27:37 +01:00
|
|
|
while (!ret && !strbuf_getwholeline(&sb, logfp, '\n'))
|
|
|
|
ret = show_one_reflog_ent(&sb, fn, cb_data);
|
|
|
|
fclose(logfp);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return ret;
|
|
|
|
}
|
2012-04-25 00:45:14 +02:00
|
|
|
/*
|
|
|
|
* Call fn for each reflog in the namespace indicated by name. name
|
|
|
|
* must be empty or end with '/'. Name will be used as a scratch
|
|
|
|
* space, but its contents will be restored before return.
|
|
|
|
*/
|
|
|
|
static int do_for_each_reflog(struct strbuf *name, each_ref_fn fn, void *cb_data)
|
2007-02-03 19:25:43 +01:00
|
|
|
{
|
2012-04-25 00:45:14 +02:00
|
|
|
DIR *d = opendir(git_path("logs/%s", name->buf));
|
2007-02-07 18:18:57 +01:00
|
|
|
int retval = 0;
|
2012-04-25 00:45:13 +02:00
|
|
|
struct dirent *de;
|
2012-04-25 00:45:14 +02:00
|
|
|
int oldlen = name->len;
|
2007-02-03 19:25:43 +01:00
|
|
|
|
2012-04-25 00:45:13 +02:00
|
|
|
if (!d)
|
2012-04-25 00:45:14 +02:00
|
|
|
return name->len ? errno : 0;
|
2007-02-03 19:25:43 +01:00
|
|
|
|
2012-04-25 00:45:13 +02:00
|
|
|
while ((de = readdir(d)) != NULL) {
|
|
|
|
struct stat st;
|
2007-02-03 19:25:43 +01:00
|
|
|
|
2012-04-25 00:45:13 +02:00
|
|
|
if (de->d_name[0] == '.')
|
|
|
|
continue;
|
2014-06-30 18:58:25 +02:00
|
|
|
if (ends_with(de->d_name, ".lock"))
|
2012-04-25 00:45:13 +02:00
|
|
|
continue;
|
2012-04-25 00:45:14 +02:00
|
|
|
strbuf_addstr(name, de->d_name);
|
|
|
|
if (stat(git_path("logs/%s", name->buf), &st) < 0) {
|
|
|
|
; /* silently ignore */
|
2012-04-25 00:45:13 +02:00
|
|
|
} else {
|
2007-02-03 19:25:43 +01:00
|
|
|
if (S_ISDIR(st.st_mode)) {
|
2012-04-25 00:45:14 +02:00
|
|
|
strbuf_addch(name, '/');
|
|
|
|
retval = do_for_each_reflog(name, fn, cb_data);
|
2007-02-03 19:25:43 +01:00
|
|
|
} else {
|
2015-05-25 20:38:28 +02:00
|
|
|
struct object_id oid;
|
|
|
|
|
|
|
|
if (read_ref_full(name->buf, 0, oid.hash, NULL))
|
2012-04-25 00:45:14 +02:00
|
|
|
retval = error("bad ref for %s", name->buf);
|
2007-02-03 19:25:43 +01:00
|
|
|
else
|
2015-05-25 20:38:28 +02:00
|
|
|
retval = fn(name->buf, &oid, 0, cb_data);
|
2007-02-03 19:25:43 +01:00
|
|
|
}
|
|
|
|
if (retval)
|
|
|
|
break;
|
|
|
|
}
|
2012-04-25 00:45:14 +02:00
|
|
|
strbuf_setlen(name, oldlen);
|
2007-02-03 19:25:43 +01:00
|
|
|
}
|
2012-04-25 00:45:13 +02:00
|
|
|
closedir(d);
|
2007-02-03 19:25:43 +01:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
int for_each_reflog(each_ref_fn fn, void *cb_data)
|
|
|
|
{
|
2012-04-25 00:45:14 +02:00
|
|
|
int retval;
|
|
|
|
struct strbuf name;
|
|
|
|
strbuf_init(&name, PATH_MAX);
|
|
|
|
retval = do_for_each_reflog(&name, fn, cb_data);
|
|
|
|
strbuf_release(&name);
|
|
|
|
return retval;
|
2007-02-03 19:25:43 +01:00
|
|
|
}
|
2007-09-05 03:38:24 +02:00
|
|
|
|
2014-04-07 15:48:12 +02:00
|
|
|
/**
|
2015-02-17 18:00:14 +01:00
|
|
|
* Information needed for a single ref update. Set new_sha1 to the new
|
|
|
|
* value or to null_sha1 to delete the ref. To check the old value
|
|
|
|
* while the ref is locked, set (flags & REF_HAVE_OLD) and set
|
|
|
|
* old_sha1 to the old value, or to null_sha1 to ensure the ref does
|
|
|
|
* not exist before update.
|
2014-04-07 15:48:12 +02:00
|
|
|
*/
|
|
|
|
struct ref_update {
|
2015-02-17 18:00:21 +01:00
|
|
|
/*
|
|
|
|
* If (flags & REF_HAVE_NEW), set the reference to this value:
|
|
|
|
*/
|
2014-04-07 15:48:12 +02:00
|
|
|
unsigned char new_sha1[20];
|
2015-02-17 18:00:21 +01:00
|
|
|
/*
|
|
|
|
* If (flags & REF_HAVE_OLD), check that the reference
|
|
|
|
* previously had this value:
|
|
|
|
*/
|
2014-04-07 15:48:12 +02:00
|
|
|
unsigned char old_sha1[20];
|
2015-02-17 18:00:14 +01:00
|
|
|
/*
|
2015-02-17 18:00:21 +01:00
|
|
|
* One or more of REF_HAVE_NEW, REF_HAVE_OLD, REF_NODEREF,
|
2015-02-17 18:00:14 +01:00
|
|
|
* REF_DELETING, and REF_ISPRUNING:
|
|
|
|
*/
|
|
|
|
unsigned int flags;
|
2014-04-07 15:48:16 +02:00
|
|
|
struct ref_lock *lock;
|
2014-04-07 15:48:17 +02:00
|
|
|
int type;
|
2014-04-30 21:22:42 +02:00
|
|
|
char *msg;
|
2014-04-07 15:48:14 +02:00
|
|
|
const char refname[FLEX_ARRAY];
|
2014-04-07 15:48:12 +02:00
|
|
|
};
|
|
|
|
|
2014-04-29 21:06:19 +02:00
|
|
|
/*
|
|
|
|
* Transaction states.
|
|
|
|
* OPEN: The transaction is in a valid state and can accept new updates.
|
|
|
|
* An OPEN transaction can be committed.
|
|
|
|
* CLOSED: A closed transaction is no longer active and no other operations
|
|
|
|
* than free can be used on it in this state.
|
|
|
|
* A transaction can either become closed by successfully committing
|
|
|
|
* an active transaction or if there is a failure while building
|
|
|
|
* the transaction thus rendering it failed/inactive.
|
|
|
|
*/
|
|
|
|
enum ref_transaction_state {
|
|
|
|
REF_TRANSACTION_OPEN = 0,
|
|
|
|
REF_TRANSACTION_CLOSED = 1
|
|
|
|
};
|
|
|
|
|
2014-04-07 15:48:10 +02:00
|
|
|
/*
|
|
|
|
* Data structure for holding a reference transaction, which can
|
|
|
|
* consist of checks and updates to multiple references, carried out
|
|
|
|
* as atomically as possible. This structure is opaque to callers.
|
|
|
|
*/
|
|
|
|
struct ref_transaction {
|
|
|
|
struct ref_update **updates;
|
|
|
|
size_t alloc;
|
|
|
|
size_t nr;
|
2014-04-29 21:06:19 +02:00
|
|
|
enum ref_transaction_state state;
|
2014-04-07 15:48:10 +02:00
|
|
|
};
|
|
|
|
|
2014-05-19 19:42:34 +02:00
|
|
|
struct ref_transaction *ref_transaction_begin(struct strbuf *err)
|
2014-04-07 15:48:10 +02:00
|
|
|
{
|
2014-08-29 01:42:37 +02:00
|
|
|
assert(err);
|
|
|
|
|
2014-04-07 15:48:10 +02:00
|
|
|
return xcalloc(1, sizeof(struct ref_transaction));
|
|
|
|
}
|
|
|
|
|
2014-06-20 16:42:42 +02:00
|
|
|
void ref_transaction_free(struct ref_transaction *transaction)
|
2014-04-07 15:48:10 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2014-06-20 16:42:45 +02:00
|
|
|
if (!transaction)
|
|
|
|
return;
|
|
|
|
|
2014-04-30 21:22:42 +02:00
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
free(transaction->updates[i]->msg);
|
2014-04-07 15:48:14 +02:00
|
|
|
free(transaction->updates[i]);
|
2014-04-30 21:22:42 +02:00
|
|
|
}
|
2014-04-07 15:48:10 +02:00
|
|
|
free(transaction->updates);
|
|
|
|
free(transaction);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref_update *add_update(struct ref_transaction *transaction,
|
|
|
|
const char *refname)
|
|
|
|
{
|
2014-04-07 15:48:14 +02:00
|
|
|
size_t len = strlen(refname);
|
|
|
|
struct ref_update *update = xcalloc(1, sizeof(*update) + len + 1);
|
2014-04-07 15:48:10 +02:00
|
|
|
|
2014-04-07 15:48:14 +02:00
|
|
|
strcpy((char *)update->refname, refname);
|
2014-04-07 15:48:10 +02:00
|
|
|
ALLOC_GROW(transaction->updates, transaction->nr + 1, transaction->alloc);
|
|
|
|
transaction->updates[transaction->nr++] = update;
|
|
|
|
return update;
|
|
|
|
}
|
|
|
|
|
2014-06-20 16:43:00 +02:00
|
|
|
int ref_transaction_update(struct ref_transaction *transaction,
|
|
|
|
const char *refname,
|
|
|
|
const unsigned char *new_sha1,
|
|
|
|
const unsigned char *old_sha1,
|
2015-02-17 18:00:15 +01:00
|
|
|
unsigned int flags, const char *msg,
|
2014-06-20 16:43:00 +02:00
|
|
|
struct strbuf *err)
|
2014-04-07 15:48:10 +02:00
|
|
|
{
|
2014-06-20 16:43:00 +02:00
|
|
|
struct ref_update *update;
|
2014-04-07 15:48:10 +02:00
|
|
|
|
2014-08-29 01:42:37 +02:00
|
|
|
assert(err);
|
|
|
|
|
2014-04-29 21:06:19 +02:00
|
|
|
if (transaction->state != REF_TRANSACTION_OPEN)
|
|
|
|
die("BUG: update called for transaction that is not open");
|
|
|
|
|
2015-02-17 18:00:21 +01:00
|
|
|
if (new_sha1 && !is_null_sha1(new_sha1) &&
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
|
|
|
|
strbuf_addf(err, "refusing to update ref with bad name %s",
|
|
|
|
refname);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-06-20 16:43:00 +02:00
|
|
|
update = add_update(transaction, refname);
|
2015-02-17 18:00:21 +01:00
|
|
|
if (new_sha1) {
|
|
|
|
hashcpy(update->new_sha1, new_sha1);
|
|
|
|
flags |= REF_HAVE_NEW;
|
|
|
|
}
|
2015-02-17 18:00:15 +01:00
|
|
|
if (old_sha1) {
|
2014-04-07 15:48:10 +02:00
|
|
|
hashcpy(update->old_sha1, old_sha1);
|
2015-02-17 18:00:14 +01:00
|
|
|
flags |= REF_HAVE_OLD;
|
|
|
|
}
|
|
|
|
update->flags = flags;
|
2014-04-30 21:22:42 +02:00
|
|
|
if (msg)
|
|
|
|
update->msg = xstrdup(msg);
|
2014-06-20 16:43:00 +02:00
|
|
|
return 0;
|
2014-04-07 15:48:10 +02:00
|
|
|
}
|
|
|
|
|
2014-04-17 00:26:44 +02:00
|
|
|
int ref_transaction_create(struct ref_transaction *transaction,
|
|
|
|
const char *refname,
|
|
|
|
const unsigned char *new_sha1,
|
2015-02-17 18:00:13 +01:00
|
|
|
unsigned int flags, const char *msg,
|
2014-04-17 00:26:44 +02:00
|
|
|
struct strbuf *err)
|
2014-04-07 15:48:10 +02:00
|
|
|
{
|
2015-02-17 18:00:19 +01:00
|
|
|
if (!new_sha1 || is_null_sha1(new_sha1))
|
|
|
|
die("BUG: create called without valid new_sha1");
|
2014-12-05 00:08:13 +01:00
|
|
|
return ref_transaction_update(transaction, refname, new_sha1,
|
2015-02-17 18:00:15 +01:00
|
|
|
null_sha1, flags, msg, err);
|
2014-04-07 15:48:10 +02:00
|
|
|
}
|
|
|
|
|
2014-04-17 00:27:45 +02:00
|
|
|
int ref_transaction_delete(struct ref_transaction *transaction,
|
|
|
|
const char *refname,
|
|
|
|
const unsigned char *old_sha1,
|
2015-02-17 18:00:16 +01:00
|
|
|
unsigned int flags, const char *msg,
|
2014-04-17 00:27:45 +02:00
|
|
|
struct strbuf *err)
|
2014-04-07 15:48:10 +02:00
|
|
|
{
|
2015-02-17 18:00:20 +01:00
|
|
|
if (old_sha1 && is_null_sha1(old_sha1))
|
|
|
|
die("BUG: delete called with old_sha1 set to zeros");
|
2015-02-17 18:00:15 +01:00
|
|
|
return ref_transaction_update(transaction, refname,
|
2015-02-17 18:00:16 +01:00
|
|
|
null_sha1, old_sha1,
|
2015-02-17 18:00:15 +01:00
|
|
|
flags, msg, err);
|
2014-04-07 15:48:10 +02:00
|
|
|
}
|
|
|
|
|
2015-02-17 18:00:21 +01:00
|
|
|
int ref_transaction_verify(struct ref_transaction *transaction,
|
|
|
|
const char *refname,
|
|
|
|
const unsigned char *old_sha1,
|
|
|
|
unsigned int flags,
|
|
|
|
struct strbuf *err)
|
|
|
|
{
|
|
|
|
if (!old_sha1)
|
|
|
|
die("BUG: verify called with old_sha1 set to NULL");
|
|
|
|
return ref_transaction_update(transaction, refname,
|
|
|
|
NULL, old_sha1,
|
|
|
|
flags, NULL, err);
|
|
|
|
}
|
|
|
|
|
2015-02-17 18:00:22 +01:00
|
|
|
int update_ref(const char *msg, const char *refname,
|
|
|
|
const unsigned char *new_sha1, const unsigned char *old_sha1,
|
2015-02-17 18:00:13 +01:00
|
|
|
unsigned int flags, enum action_on_err onerr)
|
2013-09-04 17:22:40 +02:00
|
|
|
{
|
2014-04-25 01:36:55 +02:00
|
|
|
struct ref_transaction *t;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
|
|
|
|
t = ref_transaction_begin(&err);
|
|
|
|
if (!t ||
|
2015-02-17 18:00:22 +01:00
|
|
|
ref_transaction_update(t, refname, new_sha1, old_sha1,
|
|
|
|
flags, msg, &err) ||
|
2014-04-30 21:22:42 +02:00
|
|
|
ref_transaction_commit(t, &err)) {
|
2014-04-25 01:36:55 +02:00
|
|
|
const char *str = "update_ref failed for ref '%s': %s";
|
|
|
|
|
|
|
|
ref_transaction_free(t);
|
|
|
|
switch (onerr) {
|
|
|
|
case UPDATE_REFS_MSG_ON_ERR:
|
|
|
|
error(str, refname, err.buf);
|
|
|
|
break;
|
|
|
|
case UPDATE_REFS_DIE_ON_ERR:
|
|
|
|
die(str, refname, err.buf);
|
|
|
|
break;
|
|
|
|
case UPDATE_REFS_QUIET_ON_ERR:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
strbuf_release(&err);
|
2013-09-04 17:22:40 +02:00
|
|
|
return 1;
|
2014-04-25 01:36:55 +02:00
|
|
|
}
|
|
|
|
strbuf_release(&err);
|
|
|
|
ref_transaction_free(t);
|
|
|
|
return 0;
|
2013-09-04 17:22:40 +02:00
|
|
|
}
|
|
|
|
|
2015-05-11 17:25:11 +02:00
|
|
|
static int ref_update_reject_duplicates(struct string_list *refnames,
|
2014-06-20 16:42:59 +02:00
|
|
|
struct strbuf *err)
|
2013-09-04 17:22:43 +02:00
|
|
|
{
|
2015-05-11 17:25:11 +02:00
|
|
|
int i, n = refnames->nr;
|
2014-08-29 01:42:37 +02:00
|
|
|
|
|
|
|
assert(err);
|
|
|
|
|
2013-09-04 17:22:43 +02:00
|
|
|
for (i = 1; i < n; i++)
|
2015-05-11 17:25:11 +02:00
|
|
|
if (!strcmp(refnames->items[i - 1].string, refnames->items[i].string)) {
|
2014-08-29 01:42:37 +02:00
|
|
|
strbuf_addf(err,
|
|
|
|
"Multiple updates for ref '%s' not allowed.",
|
2015-05-11 17:25:11 +02:00
|
|
|
refnames->items[i].string);
|
2013-09-04 17:22:43 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-04-07 15:48:12 +02:00
|
|
|
int ref_transaction_commit(struct ref_transaction *transaction,
|
2014-04-30 21:22:42 +02:00
|
|
|
struct strbuf *err)
|
2013-09-04 17:22:43 +02:00
|
|
|
{
|
2014-11-25 09:02:32 +01:00
|
|
|
int ret = 0, i;
|
2014-04-07 15:48:12 +02:00
|
|
|
int n = transaction->nr;
|
2014-04-07 15:48:18 +02:00
|
|
|
struct ref_update **updates = transaction->updates;
|
2014-11-25 09:02:32 +01:00
|
|
|
struct string_list refs_to_delete = STRING_LIST_INIT_NODUP;
|
|
|
|
struct string_list_item *ref_to_delete;
|
2015-05-11 17:25:11 +02:00
|
|
|
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
|
2013-09-04 17:22:43 +02:00
|
|
|
|
2014-08-29 01:42:37 +02:00
|
|
|
assert(err);
|
|
|
|
|
2014-04-29 21:06:19 +02:00
|
|
|
if (transaction->state != REF_TRANSACTION_OPEN)
|
|
|
|
die("BUG: commit called for transaction that is not open");
|
|
|
|
|
|
|
|
if (!n) {
|
|
|
|
transaction->state = REF_TRANSACTION_CLOSED;
|
2013-09-04 17:22:43 +02:00
|
|
|
return 0;
|
2014-04-29 21:06:19 +02:00
|
|
|
}
|
2013-09-04 17:22:43 +02:00
|
|
|
|
2015-05-11 17:25:11 +02:00
|
|
|
/* Fail if a refname appears more than once in the transaction: */
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
string_list_append(&affected_refnames, updates[i]->refname);
|
|
|
|
string_list_sort(&affected_refnames);
|
|
|
|
if (ref_update_reject_duplicates(&affected_refnames, err)) {
|
2014-05-16 23:14:38 +02:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
2013-09-04 17:22:43 +02:00
|
|
|
goto cleanup;
|
2014-05-16 23:14:38 +02:00
|
|
|
}
|
2013-09-04 17:22:43 +02:00
|
|
|
|
ref_transaction_commit(): fix atomicity and avoid fd exhaustion
The old code was roughly
for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This has two problems.
Non-atomic updates
==================
The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:
* It verifies that new_sha1 is a valid object
* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).
If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.
So these checks have to be moved to the first loop.
File descriptor exhaustion
==========================
The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.
The solution
============
After this patch, the code looks like
for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This fixes both problems:
1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.
2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.
To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)
This change fixes two tests in t1400.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-24 13:35:49 +02:00
|
|
|
/*
|
|
|
|
* Acquire all locks, verify old values if provided, check
|
|
|
|
* that new values are valid, and write new values to the
|
|
|
|
* lockfiles, ready to be activated. Only keep one lockfile
|
|
|
|
* open at a time to avoid running out of file descriptors.
|
|
|
|
*/
|
2013-09-04 17:22:43 +02:00
|
|
|
for (i = 0; i < n; i++) {
|
2014-04-07 15:48:15 +02:00
|
|
|
struct ref_update *update = updates[i];
|
|
|
|
|
2015-04-24 13:35:48 +02:00
|
|
|
if ((update->flags & REF_HAVE_NEW) &&
|
|
|
|
is_null_sha1(update->new_sha1))
|
|
|
|
update->flags |= REF_DELETING;
|
2015-02-17 18:00:14 +01:00
|
|
|
update->lock = lock_ref_sha1_basic(
|
|
|
|
update->refname,
|
|
|
|
((update->flags & REF_HAVE_OLD) ?
|
|
|
|
update->old_sha1 : NULL),
|
refs: check for D/F conflicts among refs created in a transaction
If two references that D/F conflict (e.g., "refs/foo" and
"refs/foo/bar") are created in a single transaction, the old code
discovered the problem only after the "commit" phase of
ref_transaction_commit() had already begun. This could leave some
references updated and others not, which violates the promise of
atomicity.
Instead, check for such conflicts during the "locking" phase:
* Teach is_refname_available() to take an "extras" parameter that can
contain extra reference names with which the specified refname must
not conflict.
* Change lock_ref_sha1_basic() to take an "extras" parameter, which it
passes through to is_refname_available().
* Change ref_transaction_commit() to pass "affected_refnames" to
lock_ref_sha1_basic() as its "extras" argument.
This change fixes a test case in t1404.
This code is a bit stricter than it needs to be. We could conceivably
allow reference "refs/foo/bar" to be created in the same transaction
as "refs/foo" is deleted (or vice versa). But that would be
complicated to implement, because it is not possible to lock
"refs/foo/bar" while "refs/foo" exists as a loose reference, but on
the other hand we don't want to delete some references before adding
others (because that could leave a gap during which required objects
are unreachable). There is also a complication that reflog files'
paths can conflict.
Any less-strict implementation would probably require tricks like the
packing of all references before the start of the real transaction, or
the use of temporary intermediate reference names.
So for now let's accept too-strict checks. Some reference update
transactions will be rejected unnecessarily, but they will be rejected
in their entirety rather than leaving the repository in an
intermediate state, as would happen now.
Please note that there is still one kind of D/F conflict that is *not*
handled correctly. If two processes are running at the same time, and
one tries to create "refs/foo" at the same time that the other tries
to create "refs/foo/bar", then they can race with each other. Both
processes can obtain their respective locks ("refs/foo.lock" and
"refs/foo/bar.lock"), proceed to the "commit" phase of
ref_transaction_commit(), and then the slower process will discover
that it cannot rename its lockfile into place (after possibly having
committed changes to other references). There appears to be no way to
fix this race without changing the locking policy, which in turn would
require a change to *all* Git clients.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2015-05-11 17:25:12 +02:00
|
|
|
&affected_refnames, NULL,
|
2015-04-24 13:35:48 +02:00
|
|
|
update->flags,
|
2015-05-11 17:25:15 +02:00
|
|
|
&update->type,
|
|
|
|
err);
|
2014-04-07 15:48:16 +02:00
|
|
|
if (!update->lock) {
|
2015-05-11 17:25:18 +02:00
|
|
|
char *reason;
|
|
|
|
|
2014-05-16 23:14:38 +02:00
|
|
|
ret = (errno == ENOTDIR)
|
|
|
|
? TRANSACTION_NAME_CONFLICT
|
|
|
|
: TRANSACTION_GENERIC_ERROR;
|
2015-05-11 17:25:18 +02:00
|
|
|
reason = strbuf_detach(err, NULL);
|
2015-05-23 01:34:57 +02:00
|
|
|
strbuf_addf(err, "cannot lock ref '%s': %s",
|
2015-05-11 17:25:18 +02:00
|
|
|
update->refname, reason);
|
|
|
|
free(reason);
|
2013-09-04 17:22:43 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
ref_transaction_commit(): fix atomicity and avoid fd exhaustion
The old code was roughly
for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This has two problems.
Non-atomic updates
==================
The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:
* It verifies that new_sha1 is a valid object
* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).
If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.
So these checks have to be moved to the first loop.
File descriptor exhaustion
==========================
The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.
The solution
============
After this patch, the code looks like
for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This fixes both problems:
1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.
2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.
To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)
This change fixes two tests in t1400.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-24 13:35:49 +02:00
|
|
|
if ((update->flags & REF_HAVE_NEW) &&
|
|
|
|
!(update->flags & REF_DELETING)) {
|
2015-03-03 12:43:14 +01:00
|
|
|
int overwriting_symref = ((update->type & REF_ISSYMREF) &&
|
|
|
|
(update->flags & REF_NODEREF));
|
|
|
|
|
ref_transaction_commit(): fix atomicity and avoid fd exhaustion
The old code was roughly
for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This has two problems.
Non-atomic updates
==================
The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:
* It verifies that new_sha1 is a valid object
* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).
If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.
So these checks have to be moved to the first loop.
File descriptor exhaustion
==========================
The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.
The solution
============
After this patch, the code looks like
for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This fixes both problems:
1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.
2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.
To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)
This change fixes two tests in t1400.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-24 13:35:49 +02:00
|
|
|
if (!overwriting_symref &&
|
2015-05-25 20:39:22 +02:00
|
|
|
!hashcmp(update->lock->old_oid.hash, update->new_sha1)) {
|
2015-03-03 12:43:14 +01:00
|
|
|
/*
|
|
|
|
* The reference already has the desired
|
|
|
|
* value, so we don't need to write it.
|
|
|
|
*/
|
2015-05-09 17:29:20 +02:00
|
|
|
} else if (write_ref_to_lockfile(update->lock,
|
ref_transaction_commit(): fix atomicity and avoid fd exhaustion
The old code was roughly
for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This has two problems.
Non-atomic updates
==================
The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:
* It verifies that new_sha1 is a valid object
* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).
If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.
So these checks have to be moved to the first loop.
File descriptor exhaustion
==========================
The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.
The solution
============
After this patch, the code looks like
for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This fixes both problems:
1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.
2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.
To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)
This change fixes two tests in t1400.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-24 13:35:49 +02:00
|
|
|
update->new_sha1)) {
|
|
|
|
/*
|
|
|
|
* The lock was freed upon failure of
|
|
|
|
* write_ref_to_lockfile():
|
|
|
|
*/
|
|
|
|
update->lock = NULL;
|
2015-05-23 01:34:57 +02:00
|
|
|
strbuf_addf(err, "cannot update the ref '%s'.",
|
ref_transaction_commit(): fix atomicity and avoid fd exhaustion
The old code was roughly
for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This has two problems.
Non-atomic updates
==================
The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:
* It verifies that new_sha1 is a valid object
* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).
If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.
So these checks have to be moved to the first loop.
File descriptor exhaustion
==========================
The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.
The solution
============
After this patch, the code looks like
for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This fixes both problems:
1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.
2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.
To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)
This change fixes two tests in t1400.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-24 13:35:49 +02:00
|
|
|
update->refname);
|
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
} else {
|
|
|
|
update->flags |= REF_NEEDS_COMMIT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!(update->flags & REF_NEEDS_COMMIT)) {
|
|
|
|
/*
|
|
|
|
* We didn't have to write anything to the lockfile.
|
|
|
|
* Close it to free up the file descriptor:
|
|
|
|
*/
|
|
|
|
if (close_ref(update->lock)) {
|
|
|
|
strbuf_addf(err, "Couldn't close %s.lock",
|
|
|
|
update->refname);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Perform updates first so live commits remain referenced */
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
struct ref_update *update = updates[i];
|
|
|
|
|
|
|
|
if (update->flags & REF_NEEDS_COMMIT) {
|
|
|
|
if (commit_ref_update(update->lock,
|
|
|
|
update->new_sha1, update->msg)) {
|
|
|
|
/* freed by commit_ref_update(): */
|
2015-03-02 10:29:52 +01:00
|
|
|
update->lock = NULL;
|
2014-08-29 01:42:37 +02:00
|
|
|
strbuf_addf(err, "Cannot update the ref '%s'.",
|
|
|
|
update->refname);
|
2014-05-16 23:14:38 +02:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
2013-09-04 17:22:43 +02:00
|
|
|
goto cleanup;
|
2015-03-02 10:29:52 +01:00
|
|
|
} else {
|
ref_transaction_commit(): fix atomicity and avoid fd exhaustion
The old code was roughly
for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This has two problems.
Non-atomic updates
==================
The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:
* It verifies that new_sha1 is a valid object
* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).
If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.
So these checks have to be moved to the first loop.
File descriptor exhaustion
==========================
The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.
The solution
============
After this patch, the code looks like
for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This fixes both problems:
1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.
2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.
To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)
This change fixes two tests in t1400.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-24 13:35:49 +02:00
|
|
|
/* freed by commit_ref_update(): */
|
2015-03-02 10:29:52 +01:00
|
|
|
update->lock = NULL;
|
2014-04-29 22:42:07 +02:00
|
|
|
}
|
2013-09-04 17:22:43 +02:00
|
|
|
}
|
2014-04-07 15:48:15 +02:00
|
|
|
}
|
2013-09-04 17:22:43 +02:00
|
|
|
|
|
|
|
/* Perform deletes now that updates are safely completed */
|
2014-04-07 15:48:16 +02:00
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
struct ref_update *update = updates[i];
|
|
|
|
|
ref_transaction_commit(): fix atomicity and avoid fd exhaustion
The old code was roughly
for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This has two problems.
Non-atomic updates
==================
The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:
* It verifies that new_sha1 is a valid object
* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).
If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.
So these checks have to be moved to the first loop.
File descriptor exhaustion
==========================
The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.
The solution
============
After this patch, the code looks like
for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()
This fixes both problems:
1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.
2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.
To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)
This change fixes two tests in t1400.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-24 13:35:49 +02:00
|
|
|
if (update->flags & REF_DELETING) {
|
2014-08-29 02:01:35 +02:00
|
|
|
if (delete_ref_loose(update->lock, update->type, err)) {
|
2014-05-16 23:14:38 +02:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
2014-08-29 02:01:35 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
2014-05-16 23:14:38 +02:00
|
|
|
|
2015-04-24 13:35:48 +02:00
|
|
|
if (!(update->flags & REF_ISPRUNING))
|
2014-11-25 09:02:32 +01:00
|
|
|
string_list_append(&refs_to_delete,
|
|
|
|
update->lock->ref_name);
|
2013-09-04 17:22:43 +02:00
|
|
|
}
|
2014-04-07 15:48:16 +02:00
|
|
|
}
|
|
|
|
|
2014-11-25 09:02:32 +01:00
|
|
|
if (repack_without_refs(&refs_to_delete, err)) {
|
2014-05-16 23:14:38 +02:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
2014-08-29 02:01:35 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
2014-11-25 09:02:32 +01:00
|
|
|
for_each_string_list_item(ref_to_delete, &refs_to_delete)
|
|
|
|
unlink_or_warn(git_path("logs/%s", ref_to_delete->string));
|
2013-09-04 17:22:43 +02:00
|
|
|
clear_loose_ref_cache(&ref_cache);
|
|
|
|
|
|
|
|
cleanup:
|
2014-04-29 21:06:19 +02:00
|
|
|
transaction->state = REF_TRANSACTION_CLOSED;
|
|
|
|
|
2013-09-04 17:22:43 +02:00
|
|
|
for (i = 0; i < n; i++)
|
2014-04-07 15:48:16 +02:00
|
|
|
if (updates[i]->lock)
|
|
|
|
unlock_ref(updates[i]->lock);
|
2014-11-25 09:02:32 +01:00
|
|
|
string_list_clear(&refs_to_delete, 0);
|
2015-05-11 17:25:11 +02:00
|
|
|
string_list_clear(&affected_refnames, 0);
|
2014-04-07 15:48:10 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
char *shorten_unambiguous_ref(const char *refname, int strict)
|
2009-04-07 09:14:20 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
static char **scanf_fmts;
|
|
|
|
static int nr_rules;
|
|
|
|
char *short_name;
|
|
|
|
|
|
|
|
if (!nr_rules) {
|
2014-01-08 15:43:39 +01:00
|
|
|
/*
|
|
|
|
* Pre-generate scanf formats from ref_rev_parse_rules[].
|
|
|
|
* Generate a format suitable for scanf from a
|
|
|
|
* ref_rev_parse_rules rule by interpolating "%s" at the
|
|
|
|
* location of the "%.*s".
|
|
|
|
*/
|
2009-04-07 09:14:20 +02:00
|
|
|
size_t total_len = 0;
|
2014-01-08 15:43:38 +01:00
|
|
|
size_t offset = 0;
|
2009-04-07 09:14:20 +02:00
|
|
|
|
|
|
|
/* the rule list is NULL terminated, count them first */
|
2013-10-24 10:45:13 +02:00
|
|
|
for (nr_rules = 0; ref_rev_parse_rules[nr_rules]; nr_rules++)
|
2014-01-08 15:43:40 +01:00
|
|
|
/* -2 for strlen("%.*s") - strlen("%s"); +1 for NUL */
|
|
|
|
total_len += strlen(ref_rev_parse_rules[nr_rules]) - 2 + 1;
|
2009-04-07 09:14:20 +02:00
|
|
|
|
|
|
|
scanf_fmts = xmalloc(nr_rules * sizeof(char *) + total_len);
|
|
|
|
|
2014-01-08 15:43:38 +01:00
|
|
|
offset = 0;
|
2009-04-07 09:14:20 +02:00
|
|
|
for (i = 0; i < nr_rules; i++) {
|
2014-01-08 15:43:39 +01:00
|
|
|
assert(offset < total_len);
|
2014-01-08 15:43:38 +01:00
|
|
|
scanf_fmts[i] = (char *)&scanf_fmts[nr_rules] + offset;
|
2014-01-08 15:43:39 +01:00
|
|
|
offset += snprintf(scanf_fmts[i], total_len - offset,
|
|
|
|
ref_rev_parse_rules[i], 2, "%s") + 1;
|
2009-04-07 09:14:20 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* bail out if there are no rules */
|
|
|
|
if (!nr_rules)
|
2011-12-12 06:38:09 +01:00
|
|
|
return xstrdup(refname);
|
2009-04-07 09:14:20 +02:00
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
/* buffer for scanf result, at most refname must fit */
|
|
|
|
short_name = xstrdup(refname);
|
2009-04-07 09:14:20 +02:00
|
|
|
|
|
|
|
/* skip first rule, it will always match */
|
|
|
|
for (i = nr_rules - 1; i > 0 ; --i) {
|
|
|
|
int j;
|
2009-04-13 12:25:46 +02:00
|
|
|
int rules_to_fail = i;
|
2009-04-07 09:14:20 +02:00
|
|
|
int short_name_len;
|
|
|
|
|
2011-12-12 06:38:09 +01:00
|
|
|
if (1 != sscanf(refname, scanf_fmts[i], short_name))
|
2009-04-07 09:14:20 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
short_name_len = strlen(short_name);
|
|
|
|
|
2009-04-13 12:25:46 +02:00
|
|
|
/*
|
|
|
|
* in strict mode, all (except the matched one) rules
|
|
|
|
* must fail to resolve to a valid non-ambiguous ref
|
|
|
|
*/
|
|
|
|
if (strict)
|
|
|
|
rules_to_fail = nr_rules;
|
|
|
|
|
2009-04-07 09:14:20 +02:00
|
|
|
/*
|
|
|
|
* check if the short name resolves to a valid ref,
|
|
|
|
* but use only rules prior to the matched one
|
|
|
|
*/
|
2009-04-13 12:25:46 +02:00
|
|
|
for (j = 0; j < rules_to_fail; j++) {
|
2009-04-07 09:14:20 +02:00
|
|
|
const char *rule = ref_rev_parse_rules[j];
|
|
|
|
char refname[PATH_MAX];
|
|
|
|
|
2009-04-13 12:25:46 +02:00
|
|
|
/* skip matched rule */
|
|
|
|
if (i == j)
|
|
|
|
continue;
|
|
|
|
|
2009-04-07 09:14:20 +02:00
|
|
|
/*
|
|
|
|
* the short name is ambiguous, if it resolves
|
|
|
|
* (with this previous rule) to a valid ref
|
|
|
|
* read_ref() returns 0 on success
|
|
|
|
*/
|
|
|
|
mksnpath(refname, sizeof(refname),
|
|
|
|
rule, short_name_len, short_name);
|
2011-11-13 11:22:14 +01:00
|
|
|
if (ref_exists(refname))
|
2009-04-07 09:14:20 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* short name is non-ambiguous if all previous rules
|
|
|
|
* haven't resolved to a valid ref
|
|
|
|
*/
|
2009-04-13 12:25:46 +02:00
|
|
|
if (j == rules_to_fail)
|
2009-04-07 09:14:20 +02:00
|
|
|
return short_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
free(short_name);
|
2011-12-12 06:38:09 +01:00
|
|
|
return xstrdup(refname);
|
2009-04-07 09:14:20 +02:00
|
|
|
}
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 01:08:30 +01:00
|
|
|
|
|
|
|
static struct string_list *hide_refs;
|
|
|
|
|
|
|
|
int parse_hide_refs_config(const char *var, const char *value, const char *section)
|
|
|
|
{
|
|
|
|
if (!strcmp("transfer.hiderefs", var) ||
|
|
|
|
/* NEEDSWORK: use parse_config_key() once both are merged */
|
2013-11-30 21:55:40 +01:00
|
|
|
(starts_with(var, section) && var[strlen(section)] == '.' &&
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 01:08:30 +01:00
|
|
|
!strcmp(var + strlen(section), ".hiderefs"))) {
|
|
|
|
char *ref;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
if (!value)
|
|
|
|
return config_error_nonbool(var);
|
|
|
|
ref = xstrdup(value);
|
|
|
|
len = strlen(ref);
|
|
|
|
while (len && ref[len - 1] == '/')
|
|
|
|
ref[--len] = '\0';
|
|
|
|
if (!hide_refs) {
|
|
|
|
hide_refs = xcalloc(1, sizeof(*hide_refs));
|
|
|
|
hide_refs->strdup_strings = 1;
|
|
|
|
}
|
|
|
|
string_list_append(hide_refs, ref);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ref_is_hidden(const char *refname)
|
|
|
|
{
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
|
|
|
if (!hide_refs)
|
|
|
|
return 0;
|
|
|
|
for_each_string_list_item(item, hide_refs) {
|
|
|
|
int len;
|
2013-11-30 21:55:40 +01:00
|
|
|
if (!starts_with(refname, item->string))
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 01:08:30 +01:00
|
|
|
continue;
|
|
|
|
len = strlen(item->string);
|
|
|
|
if (!refname[len] || refname[len] == '/')
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2014-12-12 09:56:59 +01:00
|
|
|
|
|
|
|
struct expire_reflog_cb {
|
|
|
|
unsigned int flags;
|
|
|
|
reflog_expiry_should_prune_fn *should_prune_fn;
|
|
|
|
void *policy_cb;
|
|
|
|
FILE *newlog;
|
|
|
|
unsigned char last_kept_sha1[20];
|
|
|
|
};
|
|
|
|
|
|
|
|
static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
|
|
|
|
const char *email, unsigned long timestamp, int tz,
|
|
|
|
const char *message, void *cb_data)
|
|
|
|
{
|
|
|
|
struct expire_reflog_cb *cb = cb_data;
|
|
|
|
struct expire_reflog_policy_cb *policy_cb = cb->policy_cb;
|
|
|
|
|
|
|
|
if (cb->flags & EXPIRE_REFLOGS_REWRITE)
|
|
|
|
osha1 = cb->last_kept_sha1;
|
|
|
|
|
|
|
|
if ((*cb->should_prune_fn)(osha1, nsha1, email, timestamp, tz,
|
|
|
|
message, policy_cb)) {
|
|
|
|
if (!cb->newlog)
|
|
|
|
printf("would prune %s", message);
|
|
|
|
else if (cb->flags & EXPIRE_REFLOGS_VERBOSE)
|
|
|
|
printf("prune %s", message);
|
|
|
|
} else {
|
|
|
|
if (cb->newlog) {
|
2014-12-12 09:57:03 +01:00
|
|
|
fprintf(cb->newlog, "%s %s %s %lu %+05d\t%s",
|
2014-12-12 09:56:59 +01:00
|
|
|
sha1_to_hex(osha1), sha1_to_hex(nsha1),
|
2014-12-12 09:57:03 +01:00
|
|
|
email, timestamp, tz, message);
|
2014-12-12 09:56:59 +01:00
|
|
|
hashcpy(cb->last_kept_sha1, nsha1);
|
|
|
|
}
|
|
|
|
if (cb->flags & EXPIRE_REFLOGS_VERBOSE)
|
|
|
|
printf("keep %s", message);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reflog_expire(const char *refname, const unsigned char *sha1,
|
|
|
|
unsigned int flags,
|
|
|
|
reflog_expiry_prepare_fn prepare_fn,
|
|
|
|
reflog_expiry_should_prune_fn should_prune_fn,
|
|
|
|
reflog_expiry_cleanup_fn cleanup_fn,
|
|
|
|
void *policy_cb_data)
|
|
|
|
{
|
|
|
|
static struct lock_file reflog_lock;
|
|
|
|
struct expire_reflog_cb cb;
|
|
|
|
struct ref_lock *lock;
|
|
|
|
char *log_file;
|
|
|
|
int status = 0;
|
2015-03-03 12:43:16 +01:00
|
|
|
int type;
|
2015-05-11 17:25:15 +02:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2014-12-12 09:56:59 +01:00
|
|
|
|
|
|
|
memset(&cb, 0, sizeof(cb));
|
|
|
|
cb.flags = flags;
|
|
|
|
cb.policy_cb = policy_cb_data;
|
|
|
|
cb.should_prune_fn = should_prune_fn;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The reflog file is locked by holding the lock on the
|
|
|
|
* reference itself, plus we might need to update the
|
|
|
|
* reference if --updateref was specified:
|
|
|
|
*/
|
2015-05-11 17:25:15 +02:00
|
|
|
lock = lock_ref_sha1_basic(refname, sha1, NULL, NULL, 0, &type, &err);
|
|
|
|
if (!lock) {
|
2015-05-11 17:25:20 +02:00
|
|
|
error("cannot lock ref '%s': %s", refname, err.buf);
|
2015-05-11 17:25:15 +02:00
|
|
|
strbuf_release(&err);
|
2015-05-11 17:25:20 +02:00
|
|
|
return -1;
|
2015-05-11 17:25:15 +02:00
|
|
|
}
|
2014-12-12 09:56:59 +01:00
|
|
|
if (!reflog_exists(refname)) {
|
|
|
|
unlock_ref(lock);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
log_file = git_pathdup("logs/%s", refname);
|
|
|
|
if (!(flags & EXPIRE_REFLOGS_DRY_RUN)) {
|
|
|
|
/*
|
|
|
|
* Even though holding $GIT_DIR/logs/$reflog.lock has
|
|
|
|
* no locking implications, we use the lock_file
|
|
|
|
* machinery here anyway because it does a lot of the
|
|
|
|
* work we need, including cleaning up if the program
|
|
|
|
* exits unexpectedly.
|
|
|
|
*/
|
|
|
|
if (hold_lock_file_for_update(&reflog_lock, log_file, 0) < 0) {
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
unable_to_lock_message(log_file, errno, &err);
|
|
|
|
error("%s", err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
cb.newlog = fdopen_lock_file(&reflog_lock, "w");
|
|
|
|
if (!cb.newlog) {
|
|
|
|
error("cannot fdopen %s (%s)",
|
|
|
|
reflog_lock.filename.buf, strerror(errno));
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
(*prepare_fn)(refname, sha1, cb.policy_cb);
|
|
|
|
for_each_reflog_ent(refname, expire_reflog_ent, &cb);
|
|
|
|
(*cleanup_fn)(cb.policy_cb);
|
|
|
|
|
|
|
|
if (!(flags & EXPIRE_REFLOGS_DRY_RUN)) {
|
2015-03-03 12:43:16 +01:00
|
|
|
/*
|
|
|
|
* It doesn't make sense to adjust a reference pointed
|
|
|
|
* to by a symbolic ref based on expiring entries in
|
reflog_expire(): never update a reference to null_sha1
Currently, if --updateref is specified and the very last reflog entry
is expired or deleted, the reference's value is set to 0{40}. This is
an invalid state of the repository, and breaks, for example, "git
fsck" and "git for-each-ref".
The only place we use --updateref in our own code is when dropping
stash entries. In that code, the very next step is to check if the
reflog has been made empty, and if so, delete the "refs/stash"
reference entirely. Thus that code path ultimately leaves the
repository in a valid state.
But we don't want to the repository in an invalid state even
temporarily, and we don't want to leave an invalid state if other
callers of "git reflog expire|delete --updateref" don't think to do
the extra cleanup step.
So, if "git reflog expire|delete" leaves no more entries in the
reflog, just leave the reference unchanged.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-03 12:43:17 +01:00
|
|
|
* the symbolic reference's reflog. Nor can we update
|
|
|
|
* a reference if there are no remaining reflog
|
|
|
|
* entries.
|
2015-03-03 12:43:16 +01:00
|
|
|
*/
|
|
|
|
int update = (flags & EXPIRE_REFLOGS_UPDATE_REF) &&
|
reflog_expire(): never update a reference to null_sha1
Currently, if --updateref is specified and the very last reflog entry
is expired or deleted, the reference's value is set to 0{40}. This is
an invalid state of the repository, and breaks, for example, "git
fsck" and "git for-each-ref".
The only place we use --updateref in our own code is when dropping
stash entries. In that code, the very next step is to check if the
reflog has been made empty, and if so, delete the "refs/stash"
reference entirely. Thus that code path ultimately leaves the
repository in a valid state.
But we don't want to the repository in an invalid state even
temporarily, and we don't want to leave an invalid state if other
callers of "git reflog expire|delete --updateref" don't think to do
the extra cleanup step.
So, if "git reflog expire|delete" leaves no more entries in the
reflog, just leave the reference unchanged.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Reviewed-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-03 12:43:17 +01:00
|
|
|
!(type & REF_ISSYMREF) &&
|
|
|
|
!is_null_sha1(cb.last_kept_sha1);
|
2015-03-03 12:43:16 +01:00
|
|
|
|
2014-12-12 09:56:59 +01:00
|
|
|
if (close_lock_file(&reflog_lock)) {
|
|
|
|
status |= error("couldn't write %s: %s", log_file,
|
|
|
|
strerror(errno));
|
2015-03-03 12:43:16 +01:00
|
|
|
} else if (update &&
|
2015-04-17 01:17:37 +02:00
|
|
|
(write_in_full(lock->lk->fd,
|
2014-12-12 09:56:59 +01:00
|
|
|
sha1_to_hex(cb.last_kept_sha1), 40) != 40 ||
|
2015-04-17 01:17:37 +02:00
|
|
|
write_str_in_full(lock->lk->fd, "\n") != 1 ||
|
2014-12-12 09:56:59 +01:00
|
|
|
close_ref(lock) < 0)) {
|
|
|
|
status |= error("couldn't write %s",
|
|
|
|
lock->lk->filename.buf);
|
|
|
|
rollback_lock_file(&reflog_lock);
|
|
|
|
} else if (commit_lock_file(&reflog_lock)) {
|
|
|
|
status |= error("unable to commit reflog '%s' (%s)",
|
|
|
|
log_file, strerror(errno));
|
2015-03-03 12:43:16 +01:00
|
|
|
} else if (update && commit_ref(lock)) {
|
2014-12-12 09:56:59 +01:00
|
|
|
status |= error("couldn't set %s", lock->ref_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(log_file);
|
|
|
|
unlock_ref(lock);
|
|
|
|
return status;
|
|
|
|
|
|
|
|
failure:
|
|
|
|
rollback_lock_file(&reflog_lock);
|
|
|
|
free(log_file);
|
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|