2012-10-26 17:53:55 +02:00
|
|
|
#include "cache.h"
|
2018-03-23 18:45:21 +01:00
|
|
|
#include "repository.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
2014-10-01 12:28:42 +02:00
|
|
|
#include "lockfile.h"
|
2012-10-26 17:53:55 +02:00
|
|
|
#include "refs.h"
|
|
|
|
#include "pkt-line.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "tag.h"
|
2018-04-10 23:26:18 +02:00
|
|
|
#include "exec-cmd.h"
|
2012-10-26 17:53:55 +02:00
|
|
|
#include "pack.h"
|
|
|
|
#include "sideband.h"
|
|
|
|
#include "fetch-pack.h"
|
|
|
|
#include "remote.h"
|
|
|
|
#include "run-command.h"
|
2013-07-08 22:56:53 +02:00
|
|
|
#include "connect.h"
|
2012-10-26 17:53:55 +02:00
|
|
|
#include "transport.h"
|
|
|
|
#include "version.h"
|
2013-12-05 14:02:39 +01:00
|
|
|
#include "sha1-array.h"
|
2017-05-15 19:32:20 +02:00
|
|
|
#include "oidset.h"
|
2017-08-19 00:20:26 +02:00
|
|
|
#include "packfile.h"
|
2018-05-16 01:42:15 +02:00
|
|
|
#include "object-store.h"
|
fetch-pack: write shallow, then check connectivity
When fetching, connectivity is checked after the shallow file is
updated. There are 2 issues with this: (1) the connectivity check is
only performed up to ancestors of existing refs (which is not thorough
enough if we were deepening an existing ref in the first place), and (2)
there is no rollback of the shallow file if the connectivity check
fails.
To solve (1), update the connectivity check to check the ancestry chain
completely in the case of a deepening fetch by refraining from passing
"--not --all" when invoking rev-list in connected.c.
To solve (2), have fetch_pack() perform its own connectivity check
before updating the shallow file. To support existing use cases in which
"git fetch-pack" is used to download objects without much regard as to
the connectivity of the resulting objects with respect to the existing
repository, the connectivity check is only done if necessary (that is,
the fetch is not a clone, and the fetch involves shallow/deepen
functionality). "git fetch" still performs its own connectivity check,
preserving correctness but sometimes performing redundant work. This
redundancy is mitigated by the fact that fetch_pack() reports if it has
performed a connectivity check itself, and if the transport supports
connect or stateless-connect, it will bubble up that report so that "git
fetch" knows not to perform the connectivity check in such a case.
This was noticed when a user tried to deepen an existing repository by
fetching with --no-shallow from a server that did not send all necessary
objects - the connectivity check as run by "git fetch" succeeded, but a
subsequent "git fsck" failed.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-03 00:08:43 +02:00
|
|
|
#include "connected.h"
|
2018-06-15 00:54:28 +02:00
|
|
|
#include "fetch-negotiator.h"
|
2018-07-27 16:37:17 +02:00
|
|
|
#include "fsck.h"
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
static int transfer_unpack_limit = -1;
|
|
|
|
static int fetch_unpack_limit = -1;
|
|
|
|
static int unpack_limit = 100;
|
|
|
|
static int prefer_ofs_delta = 1;
|
|
|
|
static int no_done;
|
2016-06-12 12:53:59 +02:00
|
|
|
static int deepen_since_ok;
|
2016-06-12 12:54:04 +02:00
|
|
|
static int deepen_not_ok;
|
2012-10-26 17:53:55 +02:00
|
|
|
static int fetch_fsck_objects = -1;
|
|
|
|
static int transfer_fsck_objects = -1;
|
|
|
|
static int agent_supported;
|
2017-12-08 16:58:40 +01:00
|
|
|
static int server_supports_filtering;
|
2013-05-26 03:16:15 +02:00
|
|
|
static struct lock_file shallow_lock;
|
|
|
|
static const char *alternate_shallow_file;
|
2018-07-27 16:37:17 +02:00
|
|
|
static struct strbuf fsck_msg_types = STRBUF_INIT;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
2014-03-25 14:23:26 +01:00
|
|
|
/* Remember to update object flag allocation in object.h */
|
2012-10-26 17:53:55 +02:00
|
|
|
#define COMPLETE (1U << 0)
|
2018-06-15 00:54:28 +02:00
|
|
|
#define ALTERNATE (1U << 1)
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* After sending this many "have"s if we do not get any new ACK , we
|
|
|
|
* give up traversing our history.
|
|
|
|
*/
|
|
|
|
#define MAX_IN_VAIN 256
|
|
|
|
|
2018-06-15 00:54:26 +02:00
|
|
|
static int multi_ack, use_sideband;
|
2015-05-21 22:23:38 +02:00
|
|
|
/* Allow specifying sha1 if it is a ref tip. */
|
|
|
|
#define ALLOW_TIP_SHA1 01
|
2015-05-21 22:23:39 +02:00
|
|
|
/* Allow request of a sha1 if it is reachable from a ref (possibly hidden ref). */
|
|
|
|
#define ALLOW_REACHABLE_SHA1 02
|
2015-05-21 22:23:38 +02:00
|
|
|
static unsigned int allow_unadvertised_object_request;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
2016-06-12 12:53:54 +02:00
|
|
|
__attribute__((format (printf, 2, 3)))
|
|
|
|
static inline void print_verbose(const struct fetch_pack_args *args,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list params;
|
|
|
|
|
|
|
|
if (!args->verbose)
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(params, fmt);
|
|
|
|
vfprintf(stderr, fmt, params);
|
|
|
|
va_end(params);
|
|
|
|
fputc('\n', stderr);
|
|
|
|
}
|
|
|
|
|
fetch-pack: cache results of for_each_alternate_ref
We may run for_each_alternate_ref() twice, once in
find_common() and once in everything_local(). This operation
can be expensive, because it involves running a sub-process
which must freshly load all of the alternate's refs from
disk.
Let's cache and reuse the results between the two calls. We
can make some optimizations based on the particular use
pattern in fetch-pack to keep our memory usage down.
The first is that we only care about the sha1s, not the refs
themselves. So it's OK to store only the sha1s, and to
suppress duplicates. The natural fit would therefore be a
sha1_array.
However, sha1_array's de-duplication happens only after it
has read and sorted all entries. It still stores each
duplicate. For an alternate with a large number of refs
pointing to the same commits, this is a needless expense.
Instead, we'd prefer to eliminate duplicates before putting
them in the cache, which implies using a hash. We can
further note that fetch-pack will call parse_object() on
each alternate sha1. We can therefore keep our cache as a
set of pointers to "struct object". That gives us a place to
put our "already seen" bit with an optimized hash lookup.
And as a bonus, the object stores the sha1 for us, so
pointer-to-object is all we need.
There are two extra optimizations I didn't do here:
- we actually store an array of pointer-to-object.
Technically we could just walk the obj_hash table
looking for entries with the ALTERNATE flag set (because
our use case doesn't care about the order here).
But that hash table may be mostly composed of
non-ALTERNATE entries, so we'd waste time walking over
them. So it would be a slight win in memory use, but a
loss in CPU.
- the items we pull out of the cache are actual "struct
object"s, but then we feed "obj->sha1" to our
sub-functions, which promptly call parse_object().
This second parse is cheap, because it starts with
lookup_object() and will bail immediately when it sees
we've already parsed the object. We could save the extra
hash lookup, but it would involve refactoring the
functions we call. It may or may not be worth the
trouble.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-08 21:53:03 +01:00
|
|
|
struct alternate_object_cache {
|
|
|
|
struct object **items;
|
|
|
|
size_t nr, alloc;
|
|
|
|
};
|
|
|
|
|
2018-10-08 20:09:23 +02:00
|
|
|
static void cache_one_alternate(const struct object_id *oid,
|
fetch-pack: cache results of for_each_alternate_ref
We may run for_each_alternate_ref() twice, once in
find_common() and once in everything_local(). This operation
can be expensive, because it involves running a sub-process
which must freshly load all of the alternate's refs from
disk.
Let's cache and reuse the results between the two calls. We
can make some optimizations based on the particular use
pattern in fetch-pack to keep our memory usage down.
The first is that we only care about the sha1s, not the refs
themselves. So it's OK to store only the sha1s, and to
suppress duplicates. The natural fit would therefore be a
sha1_array.
However, sha1_array's de-duplication happens only after it
has read and sorted all entries. It still stores each
duplicate. For an alternate with a large number of refs
pointing to the same commits, this is a needless expense.
Instead, we'd prefer to eliminate duplicates before putting
them in the cache, which implies using a hash. We can
further note that fetch-pack will call parse_object() on
each alternate sha1. We can therefore keep our cache as a
set of pointers to "struct object". That gives us a place to
put our "already seen" bit with an optimized hash lookup.
And as a bonus, the object stores the sha1 for us, so
pointer-to-object is all we need.
There are two extra optimizations I didn't do here:
- we actually store an array of pointer-to-object.
Technically we could just walk the obj_hash table
looking for entries with the ALTERNATE flag set (because
our use case doesn't care about the order here).
But that hash table may be mostly composed of
non-ALTERNATE entries, so we'd waste time walking over
them. So it would be a slight win in memory use, but a
loss in CPU.
- the items we pull out of the cache are actual "struct
object"s, but then we feed "obj->sha1" to our
sub-functions, which promptly call parse_object().
This second parse is cheap, because it starts with
lookup_object() and will bail immediately when it sees
we've already parsed the object. We could save the extra
hash lookup, but it would involve refactoring the
functions we call. It may or may not be worth the
trouble.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-08 21:53:03 +01:00
|
|
|
void *vcache)
|
|
|
|
{
|
|
|
|
struct alternate_object_cache *cache = vcache;
|
2018-06-29 03:21:51 +02:00
|
|
|
struct object *obj = parse_object(the_repository, oid);
|
fetch-pack: cache results of for_each_alternate_ref
We may run for_each_alternate_ref() twice, once in
find_common() and once in everything_local(). This operation
can be expensive, because it involves running a sub-process
which must freshly load all of the alternate's refs from
disk.
Let's cache and reuse the results between the two calls. We
can make some optimizations based on the particular use
pattern in fetch-pack to keep our memory usage down.
The first is that we only care about the sha1s, not the refs
themselves. So it's OK to store only the sha1s, and to
suppress duplicates. The natural fit would therefore be a
sha1_array.
However, sha1_array's de-duplication happens only after it
has read and sorted all entries. It still stores each
duplicate. For an alternate with a large number of refs
pointing to the same commits, this is a needless expense.
Instead, we'd prefer to eliminate duplicates before putting
them in the cache, which implies using a hash. We can
further note that fetch-pack will call parse_object() on
each alternate sha1. We can therefore keep our cache as a
set of pointers to "struct object". That gives us a place to
put our "already seen" bit with an optimized hash lookup.
And as a bonus, the object stores the sha1 for us, so
pointer-to-object is all we need.
There are two extra optimizations I didn't do here:
- we actually store an array of pointer-to-object.
Technically we could just walk the obj_hash table
looking for entries with the ALTERNATE flag set (because
our use case doesn't care about the order here).
But that hash table may be mostly composed of
non-ALTERNATE entries, so we'd waste time walking over
them. So it would be a slight win in memory use, but a
loss in CPU.
- the items we pull out of the cache are actual "struct
object"s, but then we feed "obj->sha1" to our
sub-functions, which promptly call parse_object().
This second parse is cheap, because it starts with
lookup_object() and will bail immediately when it sees
we've already parsed the object. We could save the extra
hash lookup, but it would involve refactoring the
functions we call. It may or may not be worth the
trouble.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-08 21:53:03 +01:00
|
|
|
|
|
|
|
if (!obj || (obj->flags & ALTERNATE))
|
|
|
|
return;
|
|
|
|
|
|
|
|
obj->flags |= ALTERNATE;
|
|
|
|
ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc);
|
|
|
|
cache->items[cache->nr++] = obj;
|
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
static void for_each_cached_alternate(struct fetch_negotiator *negotiator,
|
|
|
|
void (*cb)(struct fetch_negotiator *,
|
2018-06-15 00:54:26 +02:00
|
|
|
struct object *))
|
fetch-pack: cache results of for_each_alternate_ref
We may run for_each_alternate_ref() twice, once in
find_common() and once in everything_local(). This operation
can be expensive, because it involves running a sub-process
which must freshly load all of the alternate's refs from
disk.
Let's cache and reuse the results between the two calls. We
can make some optimizations based on the particular use
pattern in fetch-pack to keep our memory usage down.
The first is that we only care about the sha1s, not the refs
themselves. So it's OK to store only the sha1s, and to
suppress duplicates. The natural fit would therefore be a
sha1_array.
However, sha1_array's de-duplication happens only after it
has read and sorted all entries. It still stores each
duplicate. For an alternate with a large number of refs
pointing to the same commits, this is a needless expense.
Instead, we'd prefer to eliminate duplicates before putting
them in the cache, which implies using a hash. We can
further note that fetch-pack will call parse_object() on
each alternate sha1. We can therefore keep our cache as a
set of pointers to "struct object". That gives us a place to
put our "already seen" bit with an optimized hash lookup.
And as a bonus, the object stores the sha1 for us, so
pointer-to-object is all we need.
There are two extra optimizations I didn't do here:
- we actually store an array of pointer-to-object.
Technically we could just walk the obj_hash table
looking for entries with the ALTERNATE flag set (because
our use case doesn't care about the order here).
But that hash table may be mostly composed of
non-ALTERNATE entries, so we'd waste time walking over
them. So it would be a slight win in memory use, but a
loss in CPU.
- the items we pull out of the cache are actual "struct
object"s, but then we feed "obj->sha1" to our
sub-functions, which promptly call parse_object().
This second parse is cheap, because it starts with
lookup_object() and will bail immediately when it sees
we've already parsed the object. We could save the extra
hash lookup, but it would involve refactoring the
functions we call. It may or may not be worth the
trouble.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-02-08 21:53:03 +01:00
|
|
|
{
|
|
|
|
static int initialized;
|
|
|
|
static struct alternate_object_cache cache;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
if (!initialized) {
|
|
|
|
for_each_alternate_ref(cache_one_alternate, &cache);
|
|
|
|
initialized = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < cache.nr; i++)
|
2018-06-15 00:54:28 +02:00
|
|
|
cb(negotiator, cache.items[i]);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
static int rev_list_insert_ref(struct fetch_negotiator *negotiator,
|
2018-06-15 00:54:26 +02:00
|
|
|
const char *refname,
|
|
|
|
const struct object_id *oid)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
2018-06-29 03:22:05 +02:00
|
|
|
struct object *o = deref_tag(the_repository,
|
|
|
|
parse_object(the_repository, oid),
|
2018-06-29 03:21:51 +02:00
|
|
|
refname, 0);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
if (o && o->type == OBJ_COMMIT)
|
2018-06-15 00:54:28 +02:00
|
|
|
negotiator->add_tip(negotiator, (struct commit *)o);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-05-25 20:39:18 +02:00
|
|
|
static int rev_list_insert_ref_oid(const char *refname, const struct object_id *oid,
|
|
|
|
int flag, void *cb_data)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
2018-06-15 00:54:26 +02:00
|
|
|
return rev_list_insert_ref(cb_data, refname, oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
enum ack_type {
|
|
|
|
NAK = 0,
|
|
|
|
ACK,
|
|
|
|
ACK_continue,
|
|
|
|
ACK_common,
|
|
|
|
ACK_ready
|
|
|
|
};
|
|
|
|
|
2018-12-29 22:19:14 +01:00
|
|
|
static void consume_shallow_list(struct fetch_pack_args *args,
|
|
|
|
struct packet_reader *reader)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
2016-06-12 12:53:56 +02:00
|
|
|
if (args->stateless_rpc && args->deepen) {
|
2012-10-26 17:53:55 +02:00
|
|
|
/* If we sent a depth we will get back "duplicate"
|
|
|
|
* shallow and unshallow commands every time there
|
|
|
|
* is a block of have lines exchanged.
|
|
|
|
*/
|
2018-12-29 22:19:14 +01:00
|
|
|
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
|
|
|
|
if (starts_with(reader->line, "shallow "))
|
2012-10-26 17:53:55 +02:00
|
|
|
continue;
|
2018-12-29 22:19:14 +01:00
|
|
|
if (starts_with(reader->line, "unshallow "))
|
2012-10-26 17:53:55 +02:00
|
|
|
continue;
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("git fetch-pack: expected shallow list"));
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
2018-12-29 22:19:14 +01:00
|
|
|
if (reader->status != PACKET_READ_FLUSH)
|
|
|
|
die(_("git fetch-pack: expected a flush packet after shallow list"));
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-29 22:19:14 +01:00
|
|
|
static enum ack_type get_ack(struct packet_reader *reader,
|
|
|
|
struct object_id *result_oid)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:02:57 +01:00
|
|
|
int len;
|
2014-06-18 21:56:03 +02:00
|
|
|
const char *arg;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
2018-12-29 22:19:14 +01:00
|
|
|
if (packet_reader_read(reader) != PACKET_READ_NORMAL)
|
2018-02-08 19:47:49 +01:00
|
|
|
die(_("git fetch-pack: expected ACK/NAK, got a flush packet"));
|
2018-12-29 22:19:14 +01:00
|
|
|
len = reader->pktlen;
|
|
|
|
|
|
|
|
if (!strcmp(reader->line, "NAK"))
|
2012-10-26 17:53:55 +02:00
|
|
|
return NAK;
|
2018-12-29 22:19:14 +01:00
|
|
|
if (skip_prefix(reader->line, "ACK ", &arg)) {
|
2019-08-18 22:04:04 +02:00
|
|
|
const char *p;
|
|
|
|
if (!parse_oid_hex(arg, result_oid, &p)) {
|
|
|
|
len -= p - reader->line;
|
2014-06-18 21:56:03 +02:00
|
|
|
if (len < 1)
|
fetch-pack: fix out-of-bounds buffer offset in get_ack
When we read acks from the remote, we expect either:
ACK <sha1>
or
ACK <sha1> <multi-ack-flag>
We parse the "ACK <sha1>" bit from the line, and then start
looking for the flag strings at "line+45"; if we don't have
them, we assume it's of the first type. But if we do have
the first type, then line+45 is not necessarily inside our
string at all!
It turns out that this works most of the time due to the way
we parse the packets. They should come in with a newline,
and packet_read puts an extra NUL into the buffer, so we end
up with:
ACK <sha1>\n\0
with the newline at offset 44 and the NUL at offset 45. We
then strip the newline, putting a NUL at offset 44. So
when we look at "line+45", we are looking past the end of
our string; but it's OK, because we hit the terminator from
the original string.
This breaks down, however, if the other side does not
terminate their packets with a newline. In that case, our
packet is one character shorter, and we start looking
through uninitialized memory for the flag. No known
implementation sends such a packet, so it has never come up
in practice.
This patch tightens the check by looking for a short,
flagless ACK before trying to parse the flag.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:00:28 +01:00
|
|
|
return ACK;
|
2019-08-18 22:04:04 +02:00
|
|
|
if (strstr(p, "continue"))
|
2012-10-26 17:53:55 +02:00
|
|
|
return ACK_continue;
|
2019-08-18 22:04:04 +02:00
|
|
|
if (strstr(p, "common"))
|
2012-10-26 17:53:55 +02:00
|
|
|
return ACK_common;
|
2019-08-18 22:04:04 +02:00
|
|
|
if (strstr(p, "ready"))
|
2012-10-26 17:53:55 +02:00
|
|
|
return ACK_ready;
|
|
|
|
return ACK;
|
|
|
|
}
|
|
|
|
}
|
2018-12-29 22:19:14 +01:00
|
|
|
die(_("git fetch-pack: expected ACK/NAK, got '%s'"), reader->line);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void send_request(struct fetch_pack_args *args,
|
|
|
|
int fd, struct strbuf *buf)
|
|
|
|
{
|
|
|
|
if (args->stateless_rpc) {
|
|
|
|
send_sideband(fd, -1, buf->buf, buf->len, LARGE_PACKET_MAX);
|
|
|
|
packet_flush(fd);
|
2019-03-05 05:11:39 +01:00
|
|
|
} else {
|
|
|
|
if (write_in_full(fd, buf->buf, buf->len) < 0)
|
|
|
|
die_errno(_("unable to write to remote"));
|
|
|
|
}
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
static void insert_one_alternate_object(struct fetch_negotiator *negotiator,
|
2018-06-15 00:54:26 +02:00
|
|
|
struct object *obj)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
2018-06-15 00:54:28 +02:00
|
|
|
rev_list_insert_ref(negotiator, NULL, &obj->oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#define INITIAL_FLUSH 16
|
|
|
|
#define PIPESAFE_FLUSH 32
|
2016-07-19 00:21:38 +02:00
|
|
|
#define LARGE_FLUSH 16384
|
2012-10-26 17:53:55 +02:00
|
|
|
|
2018-03-15 18:31:28 +01:00
|
|
|
static int next_flush(int stateless_rpc, int count)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
2018-03-15 18:31:28 +01:00
|
|
|
if (stateless_rpc) {
|
2016-07-19 00:21:38 +02:00
|
|
|
if (count < LARGE_FLUSH)
|
|
|
|
count <<= 1;
|
|
|
|
else
|
|
|
|
count = count * 11 / 10;
|
|
|
|
} else {
|
|
|
|
if (count < PIPESAFE_FLUSH)
|
|
|
|
count <<= 1;
|
|
|
|
else
|
|
|
|
count += PIPESAFE_FLUSH;
|
|
|
|
}
|
2012-10-26 17:53:55 +02:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2018-07-03 00:39:44 +02:00
|
|
|
static void mark_tips(struct fetch_negotiator *negotiator,
|
|
|
|
const struct oid_array *negotiation_tips)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!negotiation_tips) {
|
|
|
|
for_each_ref(rev_list_insert_ref_oid, negotiator);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < negotiation_tips->nr; i++)
|
|
|
|
rev_list_insert_ref(negotiator, NULL,
|
|
|
|
&negotiation_tips->oid[i]);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
static int find_common(struct fetch_negotiator *negotiator,
|
2018-06-15 00:54:26 +02:00
|
|
|
struct fetch_pack_args *args,
|
2017-05-01 04:28:54 +02:00
|
|
|
int fd[2], struct object_id *result_oid,
|
2012-10-26 17:53:55 +02:00
|
|
|
struct ref *refs)
|
|
|
|
{
|
|
|
|
int fetching;
|
|
|
|
int count = 0, flushes = 0, flush_at = INITIAL_FLUSH, retval;
|
2017-05-01 04:28:54 +02:00
|
|
|
const struct object_id *oid;
|
2012-10-26 17:53:55 +02:00
|
|
|
unsigned in_vain = 0;
|
|
|
|
int got_continue = 0;
|
|
|
|
int got_ready = 0;
|
|
|
|
struct strbuf req_buf = STRBUF_INIT;
|
|
|
|
size_t state_len = 0;
|
2018-12-29 22:19:14 +01:00
|
|
|
struct packet_reader reader;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
if (args->stateless_rpc && multi_ack == 1)
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("--stateless-rpc requires multi_ack_detailed"));
|
2012-10-26 17:53:55 +02:00
|
|
|
|
2018-12-29 22:19:14 +01:00
|
|
|
packet_reader_init(&reader, fd[0], NULL, 0,
|
pack-protocol.txt: accept error packets in any context
In the Git pack protocol definition, an error packet may appear only in
a certain context. However, servers can face a runtime error (e.g. I/O
error) at an arbitrary timing. This patch changes the protocol to allow
an error packet to be sent instead of any packet.
Without this protocol spec change, when a server cannot process a
request, there's no way to tell that to a client. Since the server
cannot produce a valid response, it would be forced to cut a connection
without telling why. With this protocol spec change, the server can be
more gentle in this situation. An old client may see these error packets
as an unexpected packet, but this is not worse than having an unexpected
EOF.
Following this protocol spec change, the error packet handling code is
moved to pkt-line.c. Implementation wise, this implementation uses
pkt-line to communicate with a subprocess. Since this is not a part of
Git protocol, it's possible that a packet that is not supposed to be an
error packet is mistakenly parsed as an error packet. This error packet
handling is enabled only for the Git pack protocol parsing code
considering this.
Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-12-29 22:19:15 +01:00
|
|
|
PACKET_READ_CHOMP_NEWLINE |
|
|
|
|
PACKET_READ_DIE_ON_ERR_PACKET);
|
2018-12-29 22:19:14 +01:00
|
|
|
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
if (!args->no_dependents) {
|
|
|
|
mark_tips(negotiator, args->negotiation_tips);
|
|
|
|
for_each_cached_alternate(negotiator, insert_one_alternate_object);
|
|
|
|
}
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
fetching = 0;
|
|
|
|
for ( ; refs ; refs = refs->next) {
|
2017-05-01 04:28:54 +02:00
|
|
|
struct object_id *remote = &refs->old_oid;
|
2012-10-26 17:53:55 +02:00
|
|
|
const char *remote_hex;
|
|
|
|
struct object *o;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If that object is complete (i.e. it is an ancestor of a
|
|
|
|
* local ref), we tell them we have it but do not have to
|
|
|
|
* tell them about its ancestors, which they already know
|
|
|
|
* about.
|
|
|
|
*
|
|
|
|
* We use lookup_object here because we are only
|
|
|
|
* interested in the case we *know* the object is
|
|
|
|
* reachable and we have already scanned it.
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
*
|
|
|
|
* Do this only if args->no_dependents is false (if it is true,
|
|
|
|
* we cannot trust the object flags).
|
2012-10-26 17:53:55 +02:00
|
|
|
*/
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
if (!args->no_dependents &&
|
2019-06-20 09:41:14 +02:00
|
|
|
((o = lookup_object(the_repository, remote)) != NULL) &&
|
2012-10-26 17:53:55 +02:00
|
|
|
(o->flags & COMPLETE)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-05-01 04:28:54 +02:00
|
|
|
remote_hex = oid_to_hex(remote);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (!fetching) {
|
|
|
|
struct strbuf c = STRBUF_INIT;
|
|
|
|
if (multi_ack == 2) strbuf_addstr(&c, " multi_ack_detailed");
|
|
|
|
if (multi_ack == 1) strbuf_addstr(&c, " multi_ack");
|
|
|
|
if (no_done) strbuf_addstr(&c, " no-done");
|
|
|
|
if (use_sideband == 2) strbuf_addstr(&c, " side-band-64k");
|
|
|
|
if (use_sideband == 1) strbuf_addstr(&c, " side-band");
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
|
|
|
if (args->deepen_relative) strbuf_addstr(&c, " deepen-relative");
|
2012-10-26 17:53:55 +02:00
|
|
|
if (args->use_thin_pack) strbuf_addstr(&c, " thin-pack");
|
|
|
|
if (args->no_progress) strbuf_addstr(&c, " no-progress");
|
|
|
|
if (args->include_tag) strbuf_addstr(&c, " include-tag");
|
|
|
|
if (prefer_ofs_delta) strbuf_addstr(&c, " ofs-delta");
|
2016-06-12 12:53:59 +02:00
|
|
|
if (deepen_since_ok) strbuf_addstr(&c, " deepen-since");
|
2016-06-12 12:54:04 +02:00
|
|
|
if (deepen_not_ok) strbuf_addstr(&c, " deepen-not");
|
2012-10-26 17:53:55 +02:00
|
|
|
if (agent_supported) strbuf_addf(&c, " agent=%s",
|
|
|
|
git_user_agent_sanitized());
|
2017-12-08 16:58:40 +01:00
|
|
|
if (args->filter_options.choice)
|
|
|
|
strbuf_addstr(&c, " filter");
|
2012-10-26 17:53:55 +02:00
|
|
|
packet_buf_write(&req_buf, "want %s%s\n", remote_hex, c.buf);
|
|
|
|
strbuf_release(&c);
|
|
|
|
} else
|
|
|
|
packet_buf_write(&req_buf, "want %s\n", remote_hex);
|
|
|
|
fetching++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!fetching) {
|
|
|
|
strbuf_release(&req_buf);
|
|
|
|
packet_flush(fd[1]);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2018-05-18 00:51:46 +02:00
|
|
|
if (is_repository_shallow(the_repository))
|
2013-12-05 14:02:34 +01:00
|
|
|
write_shallow_commits(&req_buf, 1, NULL);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (args->depth > 0)
|
|
|
|
packet_buf_write(&req_buf, "deepen %d", args->depth);
|
2016-06-12 12:53:59 +02:00
|
|
|
if (args->deepen_since) {
|
2017-04-26 21:29:31 +02:00
|
|
|
timestamp_t max_age = approxidate(args->deepen_since);
|
2017-04-21 12:45:48 +02:00
|
|
|
packet_buf_write(&req_buf, "deepen-since %"PRItime, max_age);
|
2016-06-12 12:53:59 +02:00
|
|
|
}
|
2016-06-12 12:54:04 +02:00
|
|
|
if (args->deepen_not) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < args->deepen_not->nr; i++) {
|
|
|
|
struct string_list_item *s = args->deepen_not->items + i;
|
|
|
|
packet_buf_write(&req_buf, "deepen-not %s", s->string);
|
|
|
|
}
|
|
|
|
}
|
2019-01-08 01:17:09 +01:00
|
|
|
if (server_supports_filtering && args->filter_options.choice) {
|
2019-06-28 00:54:10 +02:00
|
|
|
const char *spec =
|
|
|
|
expand_list_objects_filter_spec(&args->filter_options);
|
|
|
|
packet_buf_write(&req_buf, "filter %s", spec);
|
2019-01-08 01:17:09 +01:00
|
|
|
}
|
2012-10-26 17:53:55 +02:00
|
|
|
packet_buf_flush(&req_buf);
|
|
|
|
state_len = req_buf.len;
|
|
|
|
|
2016-06-12 12:53:56 +02:00
|
|
|
if (args->deepen) {
|
use skip_prefix to avoid magic numbers
It's a common idiom to match a prefix and then skip past it
with a magic number, like:
if (starts_with(foo, "bar"))
foo += 3;
This is easy to get wrong, since you have to count the
prefix string yourself, and there's no compiler check if the
string changes. We can use skip_prefix to avoid the magic
numbers here.
Note that some of these conversions could be much shorter.
For example:
if (starts_with(arg, "--foo=")) {
bar = arg + 6;
continue;
}
could become:
if (skip_prefix(arg, "--foo=", &bar))
continue;
However, I have left it as:
if (skip_prefix(arg, "--foo=", &v)) {
bar = v;
continue;
}
to visually match nearby cases which need to actually
process the string. Like:
if (skip_prefix(arg, "--foo=", &v)) {
bar = atoi(v);
continue;
}
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-18 21:47:50 +02:00
|
|
|
const char *arg;
|
2017-05-01 04:28:54 +02:00
|
|
|
struct object_id oid;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
send_request(args, fd[1], &req_buf);
|
2018-12-29 22:19:14 +01:00
|
|
|
while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
|
|
|
|
if (skip_prefix(reader.line, "shallow ", &arg)) {
|
2017-05-01 04:28:54 +02:00
|
|
|
if (get_oid_hex(arg, &oid))
|
2018-12-29 22:19:14 +01:00
|
|
|
die(_("invalid shallow line: %s"), reader.line);
|
2018-05-18 00:51:44 +02:00
|
|
|
register_shallow(the_repository, &oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
continue;
|
|
|
|
}
|
2018-12-29 22:19:14 +01:00
|
|
|
if (skip_prefix(reader.line, "unshallow ", &arg)) {
|
2017-05-01 04:28:54 +02:00
|
|
|
if (get_oid_hex(arg, &oid))
|
2018-12-29 22:19:14 +01:00
|
|
|
die(_("invalid unshallow line: %s"), reader.line);
|
2019-06-20 09:41:14 +02:00
|
|
|
if (!lookup_object(the_repository, &oid))
|
2018-12-29 22:19:14 +01:00
|
|
|
die(_("object not found: %s"), reader.line);
|
2012-10-26 17:53:55 +02:00
|
|
|
/* make sure that it is parsed as shallow */
|
2018-06-29 03:21:51 +02:00
|
|
|
if (!parse_object(the_repository, &oid))
|
2018-12-29 22:19:14 +01:00
|
|
|
die(_("error in object: %s"), reader.line);
|
2017-05-07 00:10:06 +02:00
|
|
|
if (unregister_shallow(&oid))
|
2018-12-29 22:19:14 +01:00
|
|
|
die(_("no shallow found: %s"), reader.line);
|
2012-10-26 17:53:55 +02:00
|
|
|
continue;
|
|
|
|
}
|
2018-12-29 22:19:14 +01:00
|
|
|
die(_("expected shallow/unshallow, got %s"), reader.line);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
} else if (!args->stateless_rpc)
|
|
|
|
send_request(args, fd[1], &req_buf);
|
|
|
|
|
|
|
|
if (!args->stateless_rpc) {
|
|
|
|
/* If we aren't using the stateless-rpc interface
|
|
|
|
* we don't need to retain the headers.
|
|
|
|
*/
|
|
|
|
strbuf_setlen(&req_buf, 0);
|
|
|
|
state_len = 0;
|
|
|
|
}
|
|
|
|
|
2019-10-03 01:49:28 +02:00
|
|
|
trace2_region_enter("fetch-pack", "negotiation_v0_v1", the_repository);
|
2012-10-26 17:53:55 +02:00
|
|
|
flushes = 0;
|
|
|
|
retval = -1;
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-05 17:58:49 +01:00
|
|
|
if (args->no_dependents)
|
|
|
|
goto done;
|
2018-06-15 00:54:28 +02:00
|
|
|
while ((oid = negotiator->next(negotiator))) {
|
2017-05-01 04:28:54 +02:00
|
|
|
packet_buf_write(&req_buf, "have %s\n", oid_to_hex(oid));
|
|
|
|
print_verbose(args, "have %s", oid_to_hex(oid));
|
2012-10-26 17:53:55 +02:00
|
|
|
in_vain++;
|
|
|
|
if (flush_at <= ++count) {
|
|
|
|
int ack;
|
|
|
|
|
|
|
|
packet_buf_flush(&req_buf);
|
|
|
|
send_request(args, fd[1], &req_buf);
|
|
|
|
strbuf_setlen(&req_buf, state_len);
|
|
|
|
flushes++;
|
2018-03-15 18:31:28 +01:00
|
|
|
flush_at = next_flush(args->stateless_rpc, count);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We keep one window "ahead" of the other side, and
|
|
|
|
* will wait for an ACK only on the next one
|
|
|
|
*/
|
|
|
|
if (!args->stateless_rpc && count == INITIAL_FLUSH)
|
|
|
|
continue;
|
|
|
|
|
2018-12-29 22:19:14 +01:00
|
|
|
consume_shallow_list(args, &reader);
|
2012-10-26 17:53:55 +02:00
|
|
|
do {
|
2018-12-29 22:19:14 +01:00
|
|
|
ack = get_ack(&reader, result_oid);
|
2016-06-12 12:53:54 +02:00
|
|
|
if (ack)
|
2016-06-12 12:53:55 +02:00
|
|
|
print_verbose(args, _("got %s %d %s"), "ack",
|
2017-05-01 04:28:54 +02:00
|
|
|
ack, oid_to_hex(result_oid));
|
2012-10-26 17:53:55 +02:00
|
|
|
switch (ack) {
|
|
|
|
case ACK:
|
|
|
|
flushes = 0;
|
|
|
|
multi_ack = 0;
|
|
|
|
retval = 0;
|
|
|
|
goto done;
|
|
|
|
case ACK_common:
|
|
|
|
case ACK_ready:
|
|
|
|
case ACK_continue: {
|
|
|
|
struct commit *commit =
|
2018-06-29 03:21:59 +02:00
|
|
|
lookup_commit(the_repository,
|
|
|
|
result_oid);
|
2018-06-15 00:54:27 +02:00
|
|
|
int was_common;
|
2018-08-03 00:30:42 +02:00
|
|
|
|
2012-10-26 17:53:55 +02:00
|
|
|
if (!commit)
|
2017-05-01 04:28:54 +02:00
|
|
|
die(_("invalid commit %s"), oid_to_hex(result_oid));
|
2018-06-15 00:54:28 +02:00
|
|
|
was_common = negotiator->ack(negotiator, commit);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (args->stateless_rpc
|
|
|
|
&& ack == ACK_common
|
2018-06-15 00:54:27 +02:00
|
|
|
&& !was_common) {
|
2012-10-26 17:53:55 +02:00
|
|
|
/* We need to replay the have for this object
|
|
|
|
* on the next RPC request so the peer knows
|
|
|
|
* it is in common with us.
|
|
|
|
*/
|
2017-05-01 04:28:54 +02:00
|
|
|
const char *hex = oid_to_hex(result_oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
packet_buf_write(&req_buf, "have %s\n", hex);
|
|
|
|
state_len = req_buf.len;
|
2016-09-23 19:41:35 +02:00
|
|
|
/*
|
|
|
|
* Reset in_vain because an ack
|
|
|
|
* for this commit has not been
|
|
|
|
* seen.
|
|
|
|
*/
|
|
|
|
in_vain = 0;
|
|
|
|
} else if (!args->stateless_rpc
|
|
|
|
|| ack != ACK_common)
|
|
|
|
in_vain = 0;
|
2012-10-26 17:53:55 +02:00
|
|
|
retval = 0;
|
|
|
|
got_continue = 1;
|
2018-06-15 00:54:24 +02:00
|
|
|
if (ack == ACK_ready)
|
2012-10-26 17:53:55 +02:00
|
|
|
got_ready = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (ack);
|
|
|
|
flushes--;
|
|
|
|
if (got_continue && MAX_IN_VAIN < in_vain) {
|
2016-06-12 12:53:55 +02:00
|
|
|
print_verbose(args, _("giving up"));
|
2012-10-26 17:53:55 +02:00
|
|
|
break; /* give up */
|
|
|
|
}
|
2018-06-15 00:54:24 +02:00
|
|
|
if (got_ready)
|
|
|
|
break;
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
done:
|
2019-10-03 01:49:28 +02:00
|
|
|
trace2_region_leave("fetch-pack", "negotiation_v0_v1", the_repository);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (!got_ready || !no_done) {
|
|
|
|
packet_buf_write(&req_buf, "done\n");
|
|
|
|
send_request(args, fd[1], &req_buf);
|
|
|
|
}
|
2016-06-12 12:53:55 +02:00
|
|
|
print_verbose(args, _("done"));
|
2012-10-26 17:53:55 +02:00
|
|
|
if (retval != 0) {
|
|
|
|
multi_ack = 0;
|
|
|
|
flushes++;
|
|
|
|
}
|
|
|
|
strbuf_release(&req_buf);
|
|
|
|
|
2014-02-06 16:10:39 +01:00
|
|
|
if (!got_ready || !no_done)
|
2018-12-29 22:19:14 +01:00
|
|
|
consume_shallow_list(args, &reader);
|
2012-10-26 17:53:55 +02:00
|
|
|
while (flushes || multi_ack) {
|
2018-12-29 22:19:14 +01:00
|
|
|
int ack = get_ack(&reader, result_oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (ack) {
|
2016-06-12 12:53:55 +02:00
|
|
|
print_verbose(args, _("got %s (%d) %s"), "ack",
|
2017-05-01 04:28:54 +02:00
|
|
|
ack, oid_to_hex(result_oid));
|
2012-10-26 17:53:55 +02:00
|
|
|
if (ack == ACK)
|
|
|
|
return 0;
|
|
|
|
multi_ack = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
flushes--;
|
|
|
|
}
|
|
|
|
/* it is no error to fetch into a completely empty repo */
|
|
|
|
return count ? retval : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct commit_list *complete;
|
|
|
|
|
2017-05-01 04:28:54 +02:00
|
|
|
static int mark_complete(const struct object_id *oid)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
2018-06-29 03:21:51 +02:00
|
|
|
struct object *o = parse_object(the_repository, oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
while (o && o->type == OBJ_TAG) {
|
|
|
|
struct tag *t = (struct tag *) o;
|
|
|
|
if (!t->tagged)
|
|
|
|
break; /* broken repository */
|
|
|
|
o->flags |= COMPLETE;
|
2018-06-29 03:21:51 +02:00
|
|
|
o = parse_object(the_repository, &t->tagged->oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
if (o && o->type == OBJ_COMMIT) {
|
|
|
|
struct commit *commit = (struct commit *)o;
|
|
|
|
if (!(commit->object.flags & COMPLETE)) {
|
|
|
|
commit->object.flags |= COMPLETE;
|
fetch-pack: avoid quadratic list insertion in mark_complete
We insert the commit pointed to by each ref one-by-one into
the "complete" commit_list using insert_by_date. Because
each insertion is O(n), we end up with O(n^2) behavior.
This typically doesn't matter, because the number of refs is
reasonably small. And even if there are a lot of refs, they
often point to a smaller set of objects (in which case the
optimization in commit ea5f220 keeps our "n" small).
However, in pathological repositories (hundreds of thousands
of refs, each pointing to a unique commit), this quadratic
behavior can make a difference. Since we do not care about
the list order until we have finished building it, we can
simply keep it unsorted during the insertion phase, then
sort it afterwards.
On a repository like the one described above, this dropped
the time to do a no-op fetch from 2.0s to 1.7s. On normal
repositories, it probably does not matter at all, but it
does not hurt to protect ourselves from pathological cases.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-02 08:16:23 +02:00
|
|
|
commit_list_insert(commit, &complete);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-05-25 20:39:16 +02:00
|
|
|
static int mark_complete_oid(const char *refname, const struct object_id *oid,
|
|
|
|
int flag, void *cb_data)
|
|
|
|
{
|
2017-05-01 04:28:54 +02:00
|
|
|
return mark_complete(oid);
|
2015-05-25 20:39:16 +02:00
|
|
|
}
|
|
|
|
|
2012-10-26 17:53:55 +02:00
|
|
|
static void mark_recent_complete_commits(struct fetch_pack_args *args,
|
2017-04-26 21:29:31 +02:00
|
|
|
timestamp_t cutoff)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
|
|
|
while (complete && cutoff <= complete->item->date) {
|
2016-06-12 12:53:55 +02:00
|
|
|
print_verbose(args, _("Marking %s as complete"),
|
2016-06-12 12:53:54 +02:00
|
|
|
oid_to_hex(&complete->item->object.oid));
|
2012-10-26 17:53:55 +02:00
|
|
|
pop_most_recent_commit(&complete, COMPLETE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-15 19:32:20 +02:00
|
|
|
static void add_refs_to_oidset(struct oidset *oids, struct ref *refs)
|
|
|
|
{
|
|
|
|
for (; refs; refs = refs->next)
|
|
|
|
oidset_insert(oids, &refs->old_oid);
|
|
|
|
}
|
|
|
|
|
2018-10-04 17:09:06 +02:00
|
|
|
static int is_unmatched_ref(const struct ref *ref)
|
|
|
|
{
|
|
|
|
struct object_id oid;
|
|
|
|
const char *p;
|
|
|
|
return ref->match_status == REF_NOT_MATCHED &&
|
|
|
|
!parse_oid_hex(ref->name, &oid, &p) &&
|
|
|
|
*p == '\0' &&
|
|
|
|
oideq(&oid, &ref->old_oid);
|
|
|
|
}
|
|
|
|
|
2012-10-26 17:53:55 +02:00
|
|
|
static void filter_refs(struct fetch_pack_args *args,
|
2013-01-29 23:02:15 +01:00
|
|
|
struct ref **refs,
|
|
|
|
struct ref **sought, int nr_sought)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
|
|
|
struct ref *newlist = NULL;
|
|
|
|
struct ref **newtail = &newlist;
|
2017-05-15 19:32:20 +02:00
|
|
|
struct ref *unmatched = NULL;
|
2012-10-26 17:53:55 +02:00
|
|
|
struct ref *ref, *next;
|
2017-05-15 19:32:20 +02:00
|
|
|
struct oidset tip_oids = OIDSET_INIT;
|
2013-01-29 23:02:15 +01:00
|
|
|
int i;
|
2018-10-04 17:09:39 +02:00
|
|
|
int strict = !(allow_unadvertised_object_request &
|
|
|
|
(ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1));
|
2012-10-26 17:53:55 +02:00
|
|
|
|
2013-01-29 23:02:15 +01:00
|
|
|
i = 0;
|
2012-10-26 17:53:55 +02:00
|
|
|
for (ref = *refs; ref; ref = next) {
|
|
|
|
int keep = 0;
|
|
|
|
next = ref->next;
|
2013-01-29 23:02:15 +01:00
|
|
|
|
2014-06-06 19:24:48 +02:00
|
|
|
if (starts_with(ref->name, "refs/") &&
|
fetch: do not consider peeled tags as advertised tips
Our filter_refs() function accidentally considers the target of a peeled
tag to be advertised by the server, even though upload-pack on the
server side does not consider it so. This can result in the client
making a bogus fetch to the server, which will end with the server
complaining "not our ref". Whereas the correct behavior is for the
client to notice that the server will not allow the request and error
out immediately.
So as bugs go, this is not very serious (the outcome is the same either
way -- the fetch fails). But it's worth making the logic here correct
and consistent with other related cases (e.g., fetching an oid that the
server did not mention at all).
The crux of the issue comes from fdb69d33c4 (fetch-pack: always allow
fetching of literal SHA1s, 2017-05-15). After that, the strategy of
filter_refs() is basically:
- for each advertised ref, try to match it with a "sought" ref
provided by the user. Skip any malformed refs (which includes
peeled values like "refs/tags/foo^{}"), and place any unmatched
items onto the unmatched list.
- if there are unmatched sought refs, then put all of the advertised
tips into an oidset, including the unmatched ones.
- for each sought ref, see if it's in the oidset, in which case it's
legal for us to ask the server for it
The problem is in the second step. Our list of unmatched refs includes
the peeled refs, even though upload-pack does not allow them to be
directly fetched. So the simplest fix would be to exclude them during
that step.
However, we can observe that the unmatched list isn't used for anything
else, and is freed at the end. We can just free those malformed refs
immediately. That saves us having to check each ref a second time to see
if it's malformed.
Note that this code only kicks in when "strict" is in effect. I.e., if
we are using the v0 protocol and uploadpack.allowReachableSHA1InWant is
not in effect. With v2, all oids are allowed, and we do not bother
creating or consulting the oidset at all. To future-proof our test
against the upcoming GIT_TEST_PROTOCOL_VERSION flag, we'll manually mark
it as a v0-only test.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-04-13 07:57:37 +02:00
|
|
|
check_refname_format(ref->name, 0)) {
|
|
|
|
/*
|
|
|
|
* trash or a peeled value; do not even add it to
|
|
|
|
* unmatched list
|
|
|
|
*/
|
|
|
|
free_one_ref(ref);
|
|
|
|
continue;
|
|
|
|
} else {
|
2013-01-29 23:02:15 +01:00
|
|
|
while (i < nr_sought) {
|
|
|
|
int cmp = strcmp(ref->name, sought[i]->name);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (cmp < 0)
|
|
|
|
break; /* definitely do not have it */
|
|
|
|
else if (cmp == 0) {
|
|
|
|
keep = 1; /* definitely have it */
|
2017-02-22 17:05:57 +01:00
|
|
|
sought[i]->match_status = REF_MATCHED;
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
2013-01-29 23:02:15 +01:00
|
|
|
i++;
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
2018-06-11 07:53:57 +02:00
|
|
|
if (!keep && args->fetch_all &&
|
|
|
|
(!args->deepen || !starts_with(ref->name, "refs/tags/")))
|
|
|
|
keep = 1;
|
|
|
|
}
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
if (keep) {
|
|
|
|
*newtail = ref;
|
|
|
|
ref->next = NULL;
|
|
|
|
newtail = &ref->next;
|
|
|
|
} else {
|
2017-05-15 19:32:20 +02:00
|
|
|
ref->next = unmatched;
|
|
|
|
unmatched = ref;
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-04 17:09:39 +02:00
|
|
|
if (strict) {
|
|
|
|
for (i = 0; i < nr_sought; i++) {
|
|
|
|
ref = sought[i];
|
|
|
|
if (!is_unmatched_ref(ref))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
add_refs_to_oidset(&tip_oids, unmatched);
|
|
|
|
add_refs_to_oidset(&tip_oids, newlist);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-29 23:02:15 +01:00
|
|
|
/* Append unmatched requests to the list */
|
2017-02-22 17:05:57 +01:00
|
|
|
for (i = 0; i < nr_sought; i++) {
|
|
|
|
ref = sought[i];
|
2018-10-04 17:09:06 +02:00
|
|
|
if (!is_unmatched_ref(ref))
|
2017-02-22 17:05:57 +01:00
|
|
|
continue;
|
2013-01-29 23:02:15 +01:00
|
|
|
|
2018-10-04 17:09:39 +02:00
|
|
|
if (!strict || oidset_contains(&tip_oids, &ref->old_oid)) {
|
2017-02-22 17:05:57 +01:00
|
|
|
ref->match_status = REF_MATCHED;
|
filter_ref: make a copy of extra "sought" entries
If the server supports allow_tip_sha1_in_want, we add any
unmatched raw-sha1 entries in our "sought" list of refs to
the list of refs we will ask the other side for. We do so by
inserting the original "struct ref" directly into our list,
rather than making a copy. This has several problems.
The most minor problem is that one cannot ever free the
resulting list; it contains structs that are copies of the
remote refs (made earlier by fetch_pack) along with sought
refs that are referenced elsewhere.
But more importantly that we set the ref->next pointer to
NULL, chopping off the remainder of any existing list that
the ref was a part of. We get the set of "sought" refs in
an array rather than a linked list, but that array is often
in turn generated from a list. The test modification in
t5516 demonstrates this. Rather than fetching just an exact
sha1, we fetch that sha1 plus another ref:
- we build a linked list of refs to fetch when do_fetch
calls get_ref_map; the exact sha1 is first, followed by
the named ref ("refs/heads/extra" in this case).
- we pass that linked list to transport_fetch_ref, which
squashes it into an array of pointers
- that array goes to fetch_pack, which calls filter_ref.
There we generate the want list from a mix of what the
remote side has advertised, and the "sought" entry for
the exact sha1. We set the sought entry's "next" pointer
to NULL.
- after we return from transport_fetch_refs, we then try
to update the refs by following the linked list. But our
list is now truncated, and we do not update
refs/heads/extra at all.
We can fix this by making a copy of the ref. There's nothing
that fetch_pack does to it that must be reflected in the
original "sought" list (and indeed, if that were the case we
would have a serious bug, because it is only exact-sha1
entries which are treated this way).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-19 21:37:09 +01:00
|
|
|
*newtail = copy_ref(ref);
|
|
|
|
newtail = &(*newtail)->next;
|
2017-02-22 17:05:57 +01:00
|
|
|
} else {
|
|
|
|
ref->match_status = REF_UNADVERTISED_NOT_ALLOWED;
|
2013-01-29 23:02:15 +01:00
|
|
|
}
|
|
|
|
}
|
2017-05-15 19:32:20 +02:00
|
|
|
|
|
|
|
oidset_clear(&tip_oids);
|
2019-04-13 07:54:09 +02:00
|
|
|
free_refs(unmatched);
|
2017-05-15 19:32:20 +02:00
|
|
|
|
2012-10-26 17:53:55 +02:00
|
|
|
*refs = newlist;
|
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
static void mark_alternate_complete(struct fetch_negotiator *unused,
|
2018-06-15 00:54:26 +02:00
|
|
|
struct object *obj)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
2017-05-01 04:28:54 +02:00
|
|
|
mark_complete(&obj->oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
2018-03-14 07:32:42 +01:00
|
|
|
struct loose_object_iter {
|
|
|
|
struct oidset *loose_object_set;
|
|
|
|
struct ref *refs;
|
|
|
|
};
|
|
|
|
|
2018-06-06 22:47:07 +02:00
|
|
|
/*
|
|
|
|
* Mark recent commits available locally and reachable from a local ref as
|
|
|
|
* COMPLETE. If args->no_dependents is false, also mark COMPLETE remote refs as
|
|
|
|
* COMMON_REF (otherwise, we are not planning to participate in negotiation, and
|
|
|
|
* thus do not need COMMON_REF marks).
|
|
|
|
*
|
|
|
|
* The cutoff time for recency is determined by this heuristic: it is the
|
|
|
|
* earliest commit time of the objects in refs that are commits and that we know
|
|
|
|
* the commit time of.
|
|
|
|
*/
|
2018-06-15 00:54:28 +02:00
|
|
|
static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
|
2018-06-15 00:54:26 +02:00
|
|
|
struct fetch_pack_args *args,
|
2018-06-06 22:47:07 +02:00
|
|
|
struct ref **refs)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
|
|
|
struct ref *ref;
|
fetch-pack: restore save_commit_buffer after use
In fetch-pack, the global variable save_commit_buffer is set to 0, but
not restored to its original value after use.
In particular, if show_log() (in log-tree.c) is invoked after
fetch_pack() in the same process, show_log() will return before printing
out the commit message (because the invocation to
get_cached_commit_buffer() returns NULL, because the commit buffer was
not saved). I discovered this when attempting to run "git log -S" in a
partial clone, triggering the case where revision walking lazily loads
missing objects.
Therefore, restore save_commit_buffer to its original value after use.
An alternative to solve the problem I had is to replace
get_cached_commit_buffer() with get_commit_buffer(). That invocation was
introduced in commit a97934d ("use get_cached_commit_buffer where
appropriate", 2014-06-13) to replace "commit->buffer" introduced in
commit 3131b71 ("Add "--show-all" revision walker flag for debugging",
2008-02-13). In the latter commit, the commit author seems to be
deciding between not showing an unparsed commit at all and showing an
unparsed commit without the message (which is what the commit does), and
did not mention parsing the unparsed commit, so I prefer to preserve the
existing behavior.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-08 16:58:48 +01:00
|
|
|
int old_save_commit_buffer = save_commit_buffer;
|
2017-04-26 21:29:31 +02:00
|
|
|
timestamp_t cutoff = 0;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
save_commit_buffer = 0;
|
|
|
|
|
|
|
|
for (ref = *refs; ref; ref = ref->next) {
|
|
|
|
struct object *o;
|
|
|
|
|
2018-11-12 15:55:58 +01:00
|
|
|
if (!has_object_file_with_flags(&ref->old_oid,
|
|
|
|
OBJECT_INFO_QUICK))
|
2018-03-14 07:32:42 +01:00
|
|
|
continue;
|
2018-06-29 03:21:51 +02:00
|
|
|
o = parse_object(the_repository, &ref->old_oid);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (!o)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* We already have it -- which may mean that we were
|
|
|
|
* in sync with the other side at some time after
|
|
|
|
* that (it is OK if we guess wrong here).
|
|
|
|
*/
|
|
|
|
if (o->type == OBJ_COMMIT) {
|
|
|
|
struct commit *commit = (struct commit *)o;
|
|
|
|
if (!cutoff || cutoff < commit->date)
|
|
|
|
cutoff = commit->date;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
if (!args->deepen) {
|
|
|
|
for_each_ref(mark_complete_oid, NULL);
|
|
|
|
for_each_cached_alternate(NULL, mark_alternate_complete);
|
|
|
|
commit_list_sort_by_date(&complete);
|
|
|
|
if (cutoff)
|
|
|
|
mark_recent_complete_commits(args, cutoff);
|
|
|
|
}
|
2012-10-26 17:53:55 +02:00
|
|
|
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
/*
|
|
|
|
* Mark all complete remote refs as common refs.
|
|
|
|
* Don't mark them common yet; the server has to be told so first.
|
|
|
|
*/
|
|
|
|
for (ref = *refs; ref; ref = ref->next) {
|
|
|
|
struct object *o = deref_tag(the_repository,
|
|
|
|
lookup_object(the_repository,
|
2019-06-20 09:41:14 +02:00
|
|
|
&ref->old_oid),
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
NULL, 0);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE))
|
|
|
|
continue;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
negotiator->known_common(negotiator,
|
|
|
|
(struct commit *)o);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
2018-06-06 22:47:07 +02:00
|
|
|
save_commit_buffer = old_save_commit_buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns 1 if every object pointed to by the given remote refs is available
|
|
|
|
* locally and reachable from a local ref, and 0 otherwise.
|
|
|
|
*/
|
|
|
|
static int everything_local(struct fetch_pack_args *args,
|
|
|
|
struct ref **refs)
|
|
|
|
{
|
|
|
|
struct ref *ref;
|
|
|
|
int retval;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
for (retval = 1, ref = *refs; ref ; ref = ref->next) {
|
2017-05-01 04:28:54 +02:00
|
|
|
const struct object_id *remote = &ref->old_oid;
|
2012-10-26 17:53:55 +02:00
|
|
|
struct object *o;
|
|
|
|
|
2019-06-20 09:41:14 +02:00
|
|
|
o = lookup_object(the_repository, remote);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (!o || !(o->flags & COMPLETE)) {
|
|
|
|
retval = 0;
|
2017-05-01 04:28:54 +02:00
|
|
|
print_verbose(args, "want %s (%s)", oid_to_hex(remote),
|
2016-06-12 12:53:54 +02:00
|
|
|
ref->name);
|
2012-10-26 17:53:55 +02:00
|
|
|
continue;
|
|
|
|
}
|
2017-05-01 04:28:54 +02:00
|
|
|
print_verbose(args, _("already have %s (%s)"), oid_to_hex(remote),
|
2016-06-12 12:53:54 +02:00
|
|
|
ref->name);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
fetch-pack: restore save_commit_buffer after use
In fetch-pack, the global variable save_commit_buffer is set to 0, but
not restored to its original value after use.
In particular, if show_log() (in log-tree.c) is invoked after
fetch_pack() in the same process, show_log() will return before printing
out the commit message (because the invocation to
get_cached_commit_buffer() returns NULL, because the commit buffer was
not saved). I discovered this when attempting to run "git log -S" in a
partial clone, triggering the case where revision walking lazily loads
missing objects.
Therefore, restore save_commit_buffer to its original value after use.
An alternative to solve the problem I had is to replace
get_cached_commit_buffer() with get_commit_buffer(). That invocation was
introduced in commit a97934d ("use get_cached_commit_buffer where
appropriate", 2014-06-13) to replace "commit->buffer" introduced in
commit 3131b71 ("Add "--show-all" revision walker flag for debugging",
2008-02-13). In the latter commit, the commit author seems to be
deciding between not showing an unparsed commit at all and showing an
unparsed commit without the message (which is what the commit does), and
did not mention parsing the unparsed commit, so I prefer to preserve the
existing behavior.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-08 16:58:48 +01:00
|
|
|
|
2012-10-26 17:53:55 +02:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sideband_demux(int in, int out, void *data)
|
|
|
|
{
|
|
|
|
int *xd = data;
|
fetch-pack: ignore SIGPIPE in sideband demuxer
If the other side feeds us a bogus pack, index-pack (or
unpack-objects) may die early, before consuming all of its
input. As a result, the sideband demuxer may get SIGPIPE
(racily, depending on whether our data made it into the pipe
buffer or not). If this happens and we are compiled with
pthread support, it will take down the main thread, too.
This isn't the end of the world, as the main process will
just die() anyway when it sees index-pack failed. But it
does mean we don't get a chance to say "fatal: index-pack
failed" or similar. And it also means that we racily fail
t5504, as we sometimes die() and sometimes are killed by
SIGPIPE.
So let's ignore SIGPIPE while demuxing the sideband. We are
already careful to check the return value of write(), so we
won't waste time writing to a broken pipe. The caller will
notice the error return from the async thread, though in
practice we don't even get that far, as we die() as soon as
we see that index-pack failed.
The non-sideband case is already fine; we let index-pack
read straight from the socket, so there is no SIGPIPE at
all. Technically the non-threaded async case is also OK
without this (the forked async process gets SIGPIPE), but
it's not worth distinguishing from the threaded case here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-02-24 08:44:58 +01:00
|
|
|
int ret;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
fetch-pack: ignore SIGPIPE in sideband demuxer
If the other side feeds us a bogus pack, index-pack (or
unpack-objects) may die early, before consuming all of its
input. As a result, the sideband demuxer may get SIGPIPE
(racily, depending on whether our data made it into the pipe
buffer or not). If this happens and we are compiled with
pthread support, it will take down the main thread, too.
This isn't the end of the world, as the main process will
just die() anyway when it sees index-pack failed. But it
does mean we don't get a chance to say "fatal: index-pack
failed" or similar. And it also means that we racily fail
t5504, as we sometimes die() and sometimes are killed by
SIGPIPE.
So let's ignore SIGPIPE while demuxing the sideband. We are
already careful to check the return value of write(), so we
won't waste time writing to a broken pipe. The caller will
notice the error return from the async thread, though in
practice we don't even get that far, as we die() as soon as
we see that index-pack failed.
The non-sideband case is already fine; we let index-pack
read straight from the socket, so there is no SIGPIPE at
all. Technically the non-threaded async case is also OK
without this (the forked async process gets SIGPIPE), but
it's not worth distinguishing from the threaded case here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-02-24 08:44:58 +01:00
|
|
|
ret = recv_sideband("fetch-pack", xd[0], out);
|
2012-10-26 17:53:55 +02:00
|
|
|
close(out);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int get_pack(struct fetch_pack_args *args,
|
|
|
|
int xd[2], char **pack_lockfile)
|
|
|
|
{
|
|
|
|
struct async demux;
|
|
|
|
int do_keep = args->keep_pack;
|
2015-09-24 23:07:54 +02:00
|
|
|
const char *cmd_name;
|
|
|
|
struct pack_header header;
|
|
|
|
int pass_header = 0;
|
2014-08-19 21:09:35 +02:00
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
2013-05-26 03:16:17 +02:00
|
|
|
int ret;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
memset(&demux, 0, sizeof(demux));
|
|
|
|
if (use_sideband) {
|
|
|
|
/* xd[] is talking with upload-pack; subprocess reads from
|
|
|
|
* xd[0], spits out band#2 to stderr, and feeds us band#1
|
|
|
|
* through demux->out.
|
|
|
|
*/
|
|
|
|
demux.proc = sideband_demux;
|
|
|
|
demux.data = xd;
|
|
|
|
demux.out = -1;
|
2016-04-20 00:50:29 +02:00
|
|
|
demux.isolate_sigpipe = 1;
|
2012-10-26 17:53:55 +02:00
|
|
|
if (start_async(&demux))
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("fetch-pack: unable to fork off sideband demultiplexer"));
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
demux.out = xd[0];
|
|
|
|
|
|
|
|
if (!args->keep_pack && unpack_limit) {
|
|
|
|
|
|
|
|
if (read_pack_header(demux.out, &header))
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("protocol error: bad pack header"));
|
2015-09-24 23:07:54 +02:00
|
|
|
pass_header = 1;
|
2012-10-26 17:53:55 +02:00
|
|
|
if (ntohl(header.hdr_entries) < unpack_limit)
|
|
|
|
do_keep = 0;
|
|
|
|
else
|
|
|
|
do_keep = 1;
|
|
|
|
}
|
|
|
|
|
2013-05-26 03:16:15 +02:00
|
|
|
if (alternate_shallow_file) {
|
2015-09-24 23:07:54 +02:00
|
|
|
argv_array_push(&cmd.args, "--shallow-file");
|
|
|
|
argv_array_push(&cmd.args, alternate_shallow_file);
|
2013-05-26 03:16:15 +02:00
|
|
|
}
|
|
|
|
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-05 17:58:49 +01:00
|
|
|
if (do_keep || args->from_promisor) {
|
2012-10-26 17:53:55 +02:00
|
|
|
if (pack_lockfile)
|
|
|
|
cmd.out = -1;
|
2015-09-24 23:07:54 +02:00
|
|
|
cmd_name = "index-pack";
|
|
|
|
argv_array_push(&cmd.args, cmd_name);
|
|
|
|
argv_array_push(&cmd.args, "--stdin");
|
2012-10-26 17:53:55 +02:00
|
|
|
if (!args->quiet && !args->no_progress)
|
2015-09-24 23:07:54 +02:00
|
|
|
argv_array_push(&cmd.args, "-v");
|
2012-10-26 17:53:55 +02:00
|
|
|
if (args->use_thin_pack)
|
2015-09-24 23:07:54 +02:00
|
|
|
argv_array_push(&cmd.args, "--fix-thin");
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-05 17:58:49 +01:00
|
|
|
if (do_keep && (args->lock_pack || unpack_limit)) {
|
2017-04-18 23:57:42 +02:00
|
|
|
char hostname[HOST_NAME_MAX + 1];
|
2017-04-18 23:57:43 +02:00
|
|
|
if (xgethostname(hostname, sizeof(hostname)))
|
2015-09-24 23:07:54 +02:00
|
|
|
xsnprintf(hostname, sizeof(hostname), "localhost");
|
|
|
|
argv_array_pushf(&cmd.args,
|
|
|
|
"--keep=fetch-pack %"PRIuMAX " on %s",
|
|
|
|
(uintmax_t)getpid(), hostname);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
2013-05-26 03:16:17 +02:00
|
|
|
if (args->check_self_contained_and_connected)
|
2015-09-24 23:07:54 +02:00
|
|
|
argv_array_push(&cmd.args, "--check-self-contained-and-connected");
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-05 17:58:49 +01:00
|
|
|
if (args->from_promisor)
|
|
|
|
argv_array_push(&cmd.args, "--promisor");
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
else {
|
2015-09-24 23:07:54 +02:00
|
|
|
cmd_name = "unpack-objects";
|
|
|
|
argv_array_push(&cmd.args, cmd_name);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (args->quiet || args->no_progress)
|
2015-09-24 23:07:54 +02:00
|
|
|
argv_array_push(&cmd.args, "-q");
|
2013-05-26 03:16:17 +02:00
|
|
|
args->check_self_contained_and_connected = 0;
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
2015-09-24 23:07:54 +02:00
|
|
|
|
|
|
|
if (pass_header)
|
|
|
|
argv_array_pushf(&cmd.args, "--pack_header=%"PRIu32",%"PRIu32,
|
|
|
|
ntohl(header.hdr_version),
|
|
|
|
ntohl(header.hdr_entries));
|
2012-10-26 17:53:55 +02:00
|
|
|
if (fetch_fsck_objects >= 0
|
|
|
|
? fetch_fsck_objects
|
|
|
|
: transfer_fsck_objects >= 0
|
|
|
|
? transfer_fsck_objects
|
2018-03-14 19:42:41 +01:00
|
|
|
: 0) {
|
|
|
|
if (args->from_promisor)
|
|
|
|
/*
|
|
|
|
* We cannot use --strict in index-pack because it
|
|
|
|
* checks both broken objects and links, but we only
|
|
|
|
* want to check for broken objects.
|
|
|
|
*/
|
|
|
|
argv_array_push(&cmd.args, "--fsck-objects");
|
|
|
|
else
|
2018-07-27 16:37:17 +02:00
|
|
|
argv_array_pushf(&cmd.args, "--strict%s",
|
|
|
|
fsck_msg_types.buf);
|
2018-03-14 19:42:41 +01:00
|
|
|
}
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
cmd.in = demux.out;
|
|
|
|
cmd.git_cmd = 1;
|
|
|
|
if (start_command(&cmd))
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("fetch-pack: unable to fork off %s"), cmd_name);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (do_keep && pack_lockfile) {
|
|
|
|
*pack_lockfile = index_pack_lockfile(cmd.out);
|
|
|
|
close(cmd.out);
|
|
|
|
}
|
|
|
|
|
2013-10-22 15:36:02 +02:00
|
|
|
if (!use_sideband)
|
|
|
|
/* Closed by start_command() */
|
|
|
|
xd[0] = -1;
|
|
|
|
|
2013-05-26 03:16:17 +02:00
|
|
|
ret = finish_command(&cmd);
|
|
|
|
if (!ret || (args->check_self_contained_and_connected && ret == 1))
|
|
|
|
args->self_contained_and_connected =
|
|
|
|
args->check_self_contained_and_connected &&
|
|
|
|
ret == 0;
|
|
|
|
else
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("%s failed"), cmd_name);
|
2012-10-26 17:53:55 +02:00
|
|
|
if (use_sideband && finish_async(&demux))
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("error in sideband demultiplexer"));
|
2012-10-26 17:53:55 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-01-29 23:02:15 +01:00
|
|
|
static int cmp_ref_by_name(const void *a_, const void *b_)
|
|
|
|
{
|
|
|
|
const struct ref *a = *((const struct ref **)a_);
|
|
|
|
const struct ref *b = *((const struct ref **)b_);
|
|
|
|
return strcmp(a->name, b->name);
|
|
|
|
}
|
|
|
|
|
2012-10-26 17:53:55 +02:00
|
|
|
static struct ref *do_fetch_pack(struct fetch_pack_args *args,
|
|
|
|
int fd[2],
|
|
|
|
const struct ref *orig_ref,
|
2013-01-29 23:02:15 +01:00
|
|
|
struct ref **sought, int nr_sought,
|
2013-12-05 14:02:39 +01:00
|
|
|
struct shallow_info *si,
|
2012-10-26 17:53:55 +02:00
|
|
|
char **pack_lockfile)
|
|
|
|
{
|
2019-08-13 20:37:48 +02:00
|
|
|
struct repository *r = the_repository;
|
2012-10-26 17:53:55 +02:00
|
|
|
struct ref *ref = copy_ref_list(orig_ref);
|
2017-05-01 04:28:54 +02:00
|
|
|
struct object_id oid;
|
2012-10-26 17:53:55 +02:00
|
|
|
const char *agent_feature;
|
|
|
|
int agent_len;
|
2018-06-15 00:54:28 +02:00
|
|
|
struct fetch_negotiator negotiator;
|
2019-08-13 20:37:48 +02:00
|
|
|
fetch_negotiator_init(r, &negotiator);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
sort_ref_list(&ref, ref_compare_name);
|
2016-09-29 17:27:31 +02:00
|
|
|
QSORT(sought, nr_sought, cmp_ref_by_name);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
2019-06-20 13:59:51 +02:00
|
|
|
if ((agent_feature = server_feature_value("agent", &agent_len))) {
|
|
|
|
agent_supported = 1;
|
|
|
|
if (agent_len)
|
|
|
|
print_verbose(args, _("Server version is %.*s"),
|
|
|
|
agent_len, agent_feature);
|
|
|
|
}
|
|
|
|
|
2019-06-20 13:59:50 +02:00
|
|
|
if (server_supports("shallow"))
|
|
|
|
print_verbose(args, _("Server supports %s"), "shallow");
|
2019-08-13 20:37:48 +02:00
|
|
|
else if (args->depth > 0 || is_repository_shallow(r))
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("Server does not support shallow clients"));
|
2016-06-12 12:54:04 +02:00
|
|
|
if (args->depth > 0 || args->deepen_since || args->deepen_not)
|
2016-06-12 12:53:56 +02:00
|
|
|
args->deepen = 1;
|
2012-10-26 17:53:55 +02:00
|
|
|
if (server_supports("multi_ack_detailed")) {
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "multi_ack_detailed");
|
2012-10-26 17:53:55 +02:00
|
|
|
multi_ack = 2;
|
|
|
|
if (server_supports("no-done")) {
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "no-done");
|
2012-10-26 17:53:55 +02:00
|
|
|
if (args->stateless_rpc)
|
|
|
|
no_done = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (server_supports("multi_ack")) {
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "multi_ack");
|
2012-10-26 17:53:55 +02:00
|
|
|
multi_ack = 1;
|
|
|
|
}
|
|
|
|
if (server_supports("side-band-64k")) {
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "side-band-64k");
|
2012-10-26 17:53:55 +02:00
|
|
|
use_sideband = 2;
|
|
|
|
}
|
|
|
|
else if (server_supports("side-band")) {
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "side-band");
|
2012-10-26 17:53:55 +02:00
|
|
|
use_sideband = 1;
|
|
|
|
}
|
2013-01-29 23:02:15 +01:00
|
|
|
if (server_supports("allow-tip-sha1-in-want")) {
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "allow-tip-sha1-in-want");
|
2015-05-21 22:23:38 +02:00
|
|
|
allow_unadvertised_object_request |= ALLOW_TIP_SHA1;
|
2013-01-29 23:02:15 +01:00
|
|
|
}
|
2015-05-21 22:23:39 +02:00
|
|
|
if (server_supports("allow-reachable-sha1-in-want")) {
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "allow-reachable-sha1-in-want");
|
2015-05-21 22:23:39 +02:00
|
|
|
allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
|
|
|
|
}
|
2019-06-20 13:59:50 +02:00
|
|
|
if (server_supports("thin-pack"))
|
|
|
|
print_verbose(args, _("Server supports %s"), "thin-pack");
|
|
|
|
else
|
2012-10-26 17:53:55 +02:00
|
|
|
args->use_thin_pack = 0;
|
2019-06-20 13:59:50 +02:00
|
|
|
if (server_supports("no-progress"))
|
|
|
|
print_verbose(args, _("Server supports %s"), "no-progress");
|
|
|
|
else
|
2012-10-26 17:53:55 +02:00
|
|
|
args->no_progress = 0;
|
2019-06-20 13:59:50 +02:00
|
|
|
if (server_supports("include-tag"))
|
|
|
|
print_verbose(args, _("Server supports %s"), "include-tag");
|
|
|
|
else
|
2012-10-26 17:53:55 +02:00
|
|
|
args->include_tag = 0;
|
2016-06-12 12:53:54 +02:00
|
|
|
if (server_supports("ofs-delta"))
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "ofs-delta");
|
2016-06-12 12:53:54 +02:00
|
|
|
else
|
2012-10-26 17:53:55 +02:00
|
|
|
prefer_ofs_delta = 0;
|
|
|
|
|
2017-12-08 16:58:40 +01:00
|
|
|
if (server_supports("filter")) {
|
|
|
|
server_supports_filtering = 1;
|
2019-06-20 13:59:49 +02:00
|
|
|
print_verbose(args, _("Server supports %s"), "filter");
|
2017-12-08 16:58:40 +01:00
|
|
|
} else if (args->filter_options.choice) {
|
|
|
|
warning("filtering not recognized by server, ignoring");
|
|
|
|
}
|
|
|
|
|
2019-06-20 13:59:50 +02:00
|
|
|
if (server_supports("deepen-since")) {
|
|
|
|
print_verbose(args, _("Server supports %s"), "deepen-since");
|
2016-06-12 12:53:59 +02:00
|
|
|
deepen_since_ok = 1;
|
2019-06-20 13:59:50 +02:00
|
|
|
} else if (args->deepen_since)
|
2016-06-12 12:53:59 +02:00
|
|
|
die(_("Server does not support --shallow-since"));
|
2019-06-20 13:59:50 +02:00
|
|
|
if (server_supports("deepen-not")) {
|
|
|
|
print_verbose(args, _("Server supports %s"), "deepen-not");
|
2016-06-12 12:54:04 +02:00
|
|
|
deepen_not_ok = 1;
|
2019-06-20 13:59:50 +02:00
|
|
|
} else if (args->deepen_not)
|
2016-06-12 12:54:04 +02:00
|
|
|
die(_("Server does not support --shallow-exclude"));
|
2019-06-20 13:59:50 +02:00
|
|
|
if (server_supports("deepen-relative"))
|
|
|
|
print_verbose(args, _("Server supports %s"), "deepen-relative");
|
|
|
|
else if (args->deepen_relative)
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
|
|
|
die(_("Server does not support --deepen"));
|
2012-10-26 17:53:55 +02:00
|
|
|
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
if (!args->no_dependents) {
|
|
|
|
mark_complete_and_common_ref(&negotiator, args, &ref);
|
|
|
|
filter_refs(args, &ref, sought, nr_sought);
|
|
|
|
if (everything_local(args, &ref)) {
|
|
|
|
packet_flush(fd[1]);
|
|
|
|
goto all_done;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
filter_refs(args, &ref, sought, nr_sought);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
2018-06-15 00:54:28 +02:00
|
|
|
if (find_common(&negotiator, args, fd, &oid, ref) < 0)
|
2012-10-26 17:53:55 +02:00
|
|
|
if (!args->keep_pack)
|
|
|
|
/* When cloning, it is not unusual to have
|
|
|
|
* no common commit.
|
|
|
|
*/
|
2016-06-12 12:53:55 +02:00
|
|
|
warning(_("no common commits"));
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
if (args->stateless_rpc)
|
|
|
|
packet_flush(fd[1]);
|
2016-06-12 12:53:56 +02:00
|
|
|
if (args->deepen)
|
2013-12-05 14:02:34 +01:00
|
|
|
setup_alternate_shallow(&shallow_lock, &alternate_shallow_file,
|
|
|
|
NULL);
|
2013-12-05 14:02:40 +01:00
|
|
|
else if (si->nr_ours || si->nr_theirs)
|
2013-12-05 14:02:39 +01:00
|
|
|
alternate_shallow_file = setup_temporary_shallow(si->shallow);
|
2013-08-26 04:17:26 +02:00
|
|
|
else
|
|
|
|
alternate_shallow_file = NULL;
|
2012-10-26 17:53:55 +02:00
|
|
|
if (get_pack(args, fd, pack_lockfile))
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("git fetch-pack: fetch failed."));
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
all_done:
|
2018-06-15 00:54:28 +02:00
|
|
|
negotiator.release(&negotiator);
|
2012-10-26 17:53:55 +02:00
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
|
2018-03-15 18:31:29 +01:00
|
|
|
static void add_shallow_requests(struct strbuf *req_buf,
|
|
|
|
const struct fetch_pack_args *args)
|
|
|
|
{
|
2018-07-18 21:20:27 +02:00
|
|
|
if (is_repository_shallow(the_repository))
|
2018-03-15 18:31:29 +01:00
|
|
|
write_shallow_commits(req_buf, 1, NULL);
|
|
|
|
if (args->depth > 0)
|
|
|
|
packet_buf_write(req_buf, "deepen %d", args->depth);
|
|
|
|
if (args->deepen_since) {
|
|
|
|
timestamp_t max_age = approxidate(args->deepen_since);
|
|
|
|
packet_buf_write(req_buf, "deepen-since %"PRItime, max_age);
|
|
|
|
}
|
|
|
|
if (args->deepen_not) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < args->deepen_not->nr; i++) {
|
|
|
|
struct string_list_item *s = args->deepen_not->items + i;
|
|
|
|
packet_buf_write(req_buf, "deepen-not %s", s->string);
|
|
|
|
}
|
|
|
|
}
|
2018-12-18 22:24:35 +01:00
|
|
|
if (args->deepen_relative)
|
|
|
|
packet_buf_write(req_buf, "deepen-relative\n");
|
2018-03-15 18:31:29 +01:00
|
|
|
}
|
|
|
|
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
static void add_wants(int no_dependents, const struct ref *wants, struct strbuf *req_buf)
|
2018-03-15 18:31:28 +01:00
|
|
|
{
|
2018-06-28 00:30:23 +02:00
|
|
|
int use_ref_in_want = server_supports_feature("fetch", "ref-in-want", 0);
|
|
|
|
|
2018-03-15 18:31:28 +01:00
|
|
|
for ( ; wants ; wants = wants->next) {
|
|
|
|
const struct object_id *remote = &wants->old_oid;
|
|
|
|
struct object *o;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If that object is complete (i.e. it is an ancestor of a
|
|
|
|
* local ref), we tell them we have it but do not have to
|
|
|
|
* tell them about its ancestors, which they already know
|
|
|
|
* about.
|
|
|
|
*
|
|
|
|
* We use lookup_object here because we are only
|
|
|
|
* interested in the case we *know* the object is
|
|
|
|
* reachable and we have already scanned it.
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
*
|
|
|
|
* Do this only if args->no_dependents is false (if it is true,
|
|
|
|
* we cannot trust the object flags).
|
2018-03-15 18:31:28 +01:00
|
|
|
*/
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
if (!no_dependents &&
|
2019-06-20 09:41:14 +02:00
|
|
|
((o = lookup_object(the_repository, remote)) != NULL) &&
|
2018-03-15 18:31:28 +01:00
|
|
|
(o->flags & COMPLETE)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-06-28 00:30:23 +02:00
|
|
|
if (!use_ref_in_want || wants->exact_oid)
|
|
|
|
packet_buf_write(req_buf, "want %s\n", oid_to_hex(remote));
|
|
|
|
else
|
|
|
|
packet_buf_write(req_buf, "want-ref %s\n", wants->name);
|
2018-03-15 18:31:28 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add_common(struct strbuf *req_buf, struct oidset *common)
|
|
|
|
{
|
|
|
|
struct oidset_iter iter;
|
|
|
|
const struct object_id *oid;
|
|
|
|
oidset_iter_init(common, &iter);
|
|
|
|
|
|
|
|
while ((oid = oidset_iter_next(&iter))) {
|
|
|
|
packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
static int add_haves(struct fetch_negotiator *negotiator,
|
|
|
|
struct strbuf *req_buf,
|
2018-06-15 00:54:26 +02:00
|
|
|
int *haves_to_send, int *in_vain)
|
2018-03-15 18:31:28 +01:00
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
int haves_added = 0;
|
|
|
|
const struct object_id *oid;
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
while ((oid = negotiator->next(negotiator))) {
|
2018-03-15 18:31:28 +01:00
|
|
|
packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
|
|
|
|
if (++haves_added >= *haves_to_send)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
*in_vain += haves_added;
|
|
|
|
if (!haves_added || *in_vain >= MAX_IN_VAIN) {
|
|
|
|
/* Send Done */
|
|
|
|
packet_buf_write(req_buf, "done\n");
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Increase haves to send on next round */
|
|
|
|
*haves_to_send = next_flush(1, *haves_to_send);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
|
2019-06-28 00:54:10 +02:00
|
|
|
struct fetch_pack_args *args,
|
2018-03-15 18:31:28 +01:00
|
|
|
const struct ref *wants, struct oidset *common,
|
2019-01-16 20:28:14 +01:00
|
|
|
int *haves_to_send, int *in_vain,
|
|
|
|
int sideband_all)
|
2018-03-15 18:31:28 +01:00
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
struct strbuf req_buf = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (server_supports_v2("fetch", 1))
|
|
|
|
packet_buf_write(&req_buf, "command=fetch");
|
|
|
|
if (server_supports_v2("agent", 0))
|
|
|
|
packet_buf_write(&req_buf, "agent=%s", git_user_agent_sanitized());
|
2018-04-24 00:46:24 +02:00
|
|
|
if (args->server_options && args->server_options->nr &&
|
|
|
|
server_supports_v2("server-option", 1)) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < args->server_options->nr; i++)
|
2019-05-22 22:08:22 +02:00
|
|
|
packet_buf_write(&req_buf, "server-option=%s",
|
2018-04-24 00:46:24 +02:00
|
|
|
args->server_options->items[i].string);
|
|
|
|
}
|
2018-03-15 18:31:28 +01:00
|
|
|
|
|
|
|
packet_buf_delim(&req_buf);
|
|
|
|
if (args->use_thin_pack)
|
|
|
|
packet_buf_write(&req_buf, "thin-pack");
|
|
|
|
if (args->no_progress)
|
|
|
|
packet_buf_write(&req_buf, "no-progress");
|
|
|
|
if (args->include_tag)
|
|
|
|
packet_buf_write(&req_buf, "include-tag");
|
|
|
|
if (prefer_ofs_delta)
|
|
|
|
packet_buf_write(&req_buf, "ofs-delta");
|
2019-01-16 20:28:14 +01:00
|
|
|
if (sideband_all)
|
|
|
|
packet_buf_write(&req_buf, "sideband-all");
|
2018-03-15 18:31:28 +01:00
|
|
|
|
2018-03-15 18:31:29 +01:00
|
|
|
/* Add shallow-info and deepen request */
|
|
|
|
if (server_supports_feature("fetch", "shallow", 0))
|
|
|
|
add_shallow_requests(&req_buf, args);
|
2018-07-18 21:20:27 +02:00
|
|
|
else if (is_repository_shallow(the_repository) || args->deepen)
|
2018-03-15 18:31:29 +01:00
|
|
|
die(_("Server does not support shallow requests"));
|
|
|
|
|
2018-05-04 01:46:56 +02:00
|
|
|
/* Add filter */
|
|
|
|
if (server_supports_feature("fetch", "filter", 0) &&
|
|
|
|
args->filter_options.choice) {
|
2019-06-28 00:54:10 +02:00
|
|
|
const char *spec =
|
|
|
|
expand_list_objects_filter_spec(&args->filter_options);
|
2018-05-04 01:46:56 +02:00
|
|
|
print_verbose(args, _("Server supports filter"));
|
2019-06-28 00:54:10 +02:00
|
|
|
packet_buf_write(&req_buf, "filter %s", spec);
|
2018-05-04 01:46:56 +02:00
|
|
|
} else if (args->filter_options.choice) {
|
|
|
|
warning("filtering not recognized by server, ignoring");
|
|
|
|
}
|
|
|
|
|
2018-03-15 18:31:28 +01:00
|
|
|
/* add wants */
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
add_wants(args->no_dependents, wants, &req_buf);
|
2018-03-15 18:31:28 +01:00
|
|
|
|
2018-05-04 01:46:56 +02:00
|
|
|
if (args->no_dependents) {
|
|
|
|
packet_buf_write(&req_buf, "done");
|
|
|
|
ret = 1;
|
|
|
|
} else {
|
|
|
|
/* Add all of the common commits we've found in previous rounds */
|
|
|
|
add_common(&req_buf, common);
|
2018-03-15 18:31:28 +01:00
|
|
|
|
2018-05-04 01:46:56 +02:00
|
|
|
/* Add initial haves */
|
2018-06-15 00:54:28 +02:00
|
|
|
ret = add_haves(negotiator, &req_buf, haves_to_send, in_vain);
|
2018-05-04 01:46:56 +02:00
|
|
|
}
|
2018-03-15 18:31:28 +01:00
|
|
|
|
|
|
|
/* Send request */
|
|
|
|
packet_buf_flush(&req_buf);
|
2019-03-05 05:11:39 +01:00
|
|
|
if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
|
|
|
|
die_errno(_("unable to write request to remote"));
|
2018-03-15 18:31:28 +01:00
|
|
|
|
|
|
|
strbuf_release(&req_buf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Processes a section header in a server's response and checks if it matches
|
|
|
|
* `section`. If the value of `peek` is 1, the header line will be peeked (and
|
|
|
|
* not consumed); if 0, the line will be consumed and the function will die if
|
|
|
|
* the section header doesn't match what was expected.
|
|
|
|
*/
|
|
|
|
static int process_section_header(struct packet_reader *reader,
|
|
|
|
const char *section, int peek)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (packet_reader_peek(reader) != PACKET_READ_NORMAL)
|
2018-07-23 19:56:35 +02:00
|
|
|
die(_("error reading section header '%s'"), section);
|
2018-03-15 18:31:28 +01:00
|
|
|
|
|
|
|
ret = !strcmp(reader->line, section);
|
|
|
|
|
|
|
|
if (!peek) {
|
|
|
|
if (!ret)
|
2018-07-23 19:56:35 +02:00
|
|
|
die(_("expected '%s', received '%s'"),
|
2018-03-15 18:31:28 +01:00
|
|
|
section, reader->line);
|
|
|
|
packet_reader_read(reader);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
static int process_acks(struct fetch_negotiator *negotiator,
|
2018-06-15 00:54:26 +02:00
|
|
|
struct packet_reader *reader,
|
|
|
|
struct oidset *common)
|
2018-03-15 18:31:28 +01:00
|
|
|
{
|
|
|
|
/* received */
|
|
|
|
int received_ready = 0;
|
|
|
|
int received_ack = 0;
|
|
|
|
|
|
|
|
process_section_header(reader, "acknowledgments", 0);
|
|
|
|
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
|
|
|
|
const char *arg;
|
|
|
|
|
|
|
|
if (!strcmp(reader->line, "NAK"))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (skip_prefix(reader->line, "ACK ", &arg)) {
|
|
|
|
struct object_id oid;
|
|
|
|
if (!get_oid_hex(arg, &oid)) {
|
|
|
|
struct commit *commit;
|
|
|
|
oidset_insert(common, &oid);
|
2018-06-29 03:21:59 +02:00
|
|
|
commit = lookup_commit(the_repository, &oid);
|
2018-06-15 00:54:28 +02:00
|
|
|
negotiator->ack(negotiator, commit);
|
2018-03-15 18:31:28 +01:00
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(reader->line, "ready")) {
|
|
|
|
received_ready = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-07-23 19:56:35 +02:00
|
|
|
die(_("unexpected acknowledgment line: '%s'"), reader->line);
|
2018-03-15 18:31:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (reader->status != PACKET_READ_FLUSH &&
|
|
|
|
reader->status != PACKET_READ_DELIM)
|
2018-07-23 19:56:35 +02:00
|
|
|
die(_("error processing acks: %d"), reader->status);
|
2018-03-15 18:31:28 +01:00
|
|
|
|
fetch-pack: be more precise in parsing v2 response
Each section in a protocol v2 response is followed by either a DELIM
packet (indicating more sections to follow) or a FLUSH packet
(indicating none to follow). But when parsing the "acknowledgments"
section, do_fetch_pack_v2() is liberal in accepting both, but determines
whether to continue reading or not based solely on the contents of the
"acknowledgments" section, not on whether DELIM or FLUSH was read.
There is no issue with a protocol-compliant server, but can result in
confusing error messages when communicating with a server that
serves unexpected additional sections. Consider a server that sends
"new-section" after "acknowledgments":
- client writes request
- client reads the "acknowledgments" section which contains no "ready",
then DELIM
- since there was no "ready", client needs to continue negotiation, and
writes request
- client reads "new-section", and reports to the end user "expected
'acknowledgments', received 'new-section'"
For the person debugging the involved Git implementation(s), the error
message is confusing in that "new-section" was not received in response
to the latest request, but to the first one.
One solution is to always continue reading after DELIM, but in this
case, we can do better. We know from the protocol that "ready" means at
least the packfile section is coming (hence, DELIM) and that no "ready"
means that no sections are to follow (hence, FLUSH). So teach
process_acks() to enforce this.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-20 00:54:04 +02:00
|
|
|
/*
|
|
|
|
* If an "acknowledgments" section is sent, a packfile is sent if and
|
|
|
|
* only if "ready" was sent in this section. The other sections
|
|
|
|
* ("shallow-info" and "wanted-refs") are sent only if a packfile is
|
|
|
|
* sent. Therefore, a DELIM is expected if "ready" is sent, and a FLUSH
|
|
|
|
* otherwise.
|
|
|
|
*/
|
|
|
|
if (received_ready && reader->status != PACKET_READ_DELIM)
|
|
|
|
die(_("expected packfile to be sent after 'ready'"));
|
|
|
|
if (!received_ready && reader->status != PACKET_READ_FLUSH)
|
|
|
|
die(_("expected no other sections to be sent after no 'ready'"));
|
|
|
|
|
2018-03-15 18:31:28 +01:00
|
|
|
/* return 0 if no common, 1 if there are common, or 2 if ready */
|
|
|
|
return received_ready ? 2 : (received_ack ? 1 : 0);
|
|
|
|
}
|
|
|
|
|
2018-03-15 18:31:29 +01:00
|
|
|
static void receive_shallow_info(struct fetch_pack_args *args,
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
struct packet_reader *reader,
|
|
|
|
struct oid_array *shallows,
|
|
|
|
struct shallow_info *si)
|
2018-03-15 18:31:29 +01:00
|
|
|
{
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
int unshallow_received = 0;
|
fetch-pack: do not take shallow lock unnecessarily
When fetching using protocol v2, the remote may send a "shallow-info"
section if the client is shallow. If so, Git as the client currently
takes the shallow file lock, even if the "shallow-info" section is
empty.
This is not a problem except that Git does not support taking the
shallow file lock after modifying the shallow file, because
is_repository_shallow() stores information that is never cleared. And
this take-after-modify occurs when Git does a tag-following fetch from a
shallow repository on a transport that does not support tag following
(since in this case, 2 fetches are performed).
To solve this issue, take the shallow file lock (and perform all other
shallow processing) only if the "shallow-info" section is non-empty;
otherwise, behave as if it were empty.
A full solution (probably, ensuring that any action of committing
shallow file locks also includes clearing the information stored by
is_repository_shallow()) would solve the issue without need for this
patch, but this patch is independently useful (as an optimization to
prevent writing a file in an unnecessary case), hence why I wrote it. I
have included a NEEDSWORK outlining the full solution.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-01-10 20:36:45 +01:00
|
|
|
|
2018-03-15 18:31:29 +01:00
|
|
|
process_section_header(reader, "shallow-info", 0);
|
|
|
|
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
|
|
|
|
const char *arg;
|
|
|
|
struct object_id oid;
|
|
|
|
|
|
|
|
if (skip_prefix(reader->line, "shallow ", &arg)) {
|
|
|
|
if (get_oid_hex(arg, &oid))
|
|
|
|
die(_("invalid shallow line: %s"), reader->line);
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
oid_array_append(shallows, &oid);
|
2018-03-15 18:31:29 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (skip_prefix(reader->line, "unshallow ", &arg)) {
|
|
|
|
if (get_oid_hex(arg, &oid))
|
|
|
|
die(_("invalid unshallow line: %s"), reader->line);
|
2019-06-20 09:41:14 +02:00
|
|
|
if (!lookup_object(the_repository, &oid))
|
2018-03-15 18:31:29 +01:00
|
|
|
die(_("object not found: %s"), reader->line);
|
|
|
|
/* make sure that it is parsed as shallow */
|
2018-06-29 03:21:51 +02:00
|
|
|
if (!parse_object(the_repository, &oid))
|
2018-03-15 18:31:29 +01:00
|
|
|
die(_("error in object: %s"), reader->line);
|
|
|
|
if (unregister_shallow(&oid))
|
|
|
|
die(_("no shallow found: %s"), reader->line);
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
unshallow_received = 1;
|
2018-03-15 18:31:29 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
die(_("expected shallow/unshallow, got %s"), reader->line);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reader->status != PACKET_READ_FLUSH &&
|
|
|
|
reader->status != PACKET_READ_DELIM)
|
2018-07-23 19:56:35 +02:00
|
|
|
die(_("error processing shallow info: %d"), reader->status);
|
2018-03-15 18:31:29 +01:00
|
|
|
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
if (args->deepen || unshallow_received) {
|
|
|
|
/*
|
|
|
|
* Treat these as shallow lines caused by our depth settings.
|
|
|
|
* In v0, these lines cannot cause refs to be rejected; do the
|
|
|
|
* same.
|
|
|
|
*/
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < shallows->nr; i++)
|
|
|
|
register_shallow(the_repository, &shallows->oid[i]);
|
fetch-pack: do not take shallow lock unnecessarily
When fetching using protocol v2, the remote may send a "shallow-info"
section if the client is shallow. If so, Git as the client currently
takes the shallow file lock, even if the "shallow-info" section is
empty.
This is not a problem except that Git does not support taking the
shallow file lock after modifying the shallow file, because
is_repository_shallow() stores information that is never cleared. And
this take-after-modify occurs when Git does a tag-following fetch from a
shallow repository on a transport that does not support tag following
(since in this case, 2 fetches are performed).
To solve this issue, take the shallow file lock (and perform all other
shallow processing) only if the "shallow-info" section is non-empty;
otherwise, behave as if it were empty.
A full solution (probably, ensuring that any action of committing
shallow file locks also includes clearing the information stored by
is_repository_shallow()) would solve the issue without need for this
patch, but this patch is independently useful (as an optimization to
prevent writing a file in an unnecessary case), hence why I wrote it. I
have included a NEEDSWORK outlining the full solution.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-01-10 20:36:45 +01:00
|
|
|
setup_alternate_shallow(&shallow_lock, &alternate_shallow_file,
|
|
|
|
NULL);
|
|
|
|
args->deepen = 1;
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
} else if (shallows->nr) {
|
|
|
|
/*
|
|
|
|
* Treat these as shallow lines caused by the remote being
|
|
|
|
* shallow. In v0, remote refs that reach these objects are
|
|
|
|
* rejected (unless --update-shallow is set); do the same.
|
|
|
|
*/
|
|
|
|
prepare_shallow_info(si, shallows);
|
|
|
|
if (si->nr_ours || si->nr_theirs)
|
|
|
|
alternate_shallow_file =
|
|
|
|
setup_temporary_shallow(si->shallow);
|
|
|
|
else
|
|
|
|
alternate_shallow_file = NULL;
|
2019-02-07 00:59:37 +01:00
|
|
|
} else {
|
|
|
|
alternate_shallow_file = NULL;
|
fetch-pack: do not take shallow lock unnecessarily
When fetching using protocol v2, the remote may send a "shallow-info"
section if the client is shallow. If so, Git as the client currently
takes the shallow file lock, even if the "shallow-info" section is
empty.
This is not a problem except that Git does not support taking the
shallow file lock after modifying the shallow file, because
is_repository_shallow() stores information that is never cleared. And
this take-after-modify occurs when Git does a tag-following fetch from a
shallow repository on a transport that does not support tag following
(since in this case, 2 fetches are performed).
To solve this issue, take the shallow file lock (and perform all other
shallow processing) only if the "shallow-info" section is non-empty;
otherwise, behave as if it were empty.
A full solution (probably, ensuring that any action of committing
shallow file locks also includes clearing the information stored by
is_repository_shallow()) would solve the issue without need for this
patch, but this patch is independently useful (as an optimization to
prevent writing a file in an unnecessary case), hence why I wrote it. I
have included a NEEDSWORK outlining the full solution.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-01-10 20:36:45 +01:00
|
|
|
}
|
2018-03-15 18:31:29 +01:00
|
|
|
}
|
|
|
|
|
2019-03-27 22:11:10 +01:00
|
|
|
static int cmp_name_ref(const void *name, const void *ref)
|
|
|
|
{
|
|
|
|
return strcmp(name, (*(struct ref **)ref)->name);
|
|
|
|
}
|
|
|
|
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 22:13:20 +02:00
|
|
|
static void receive_wanted_refs(struct packet_reader *reader,
|
|
|
|
struct ref **sought, int nr_sought)
|
2018-06-28 00:30:23 +02:00
|
|
|
{
|
|
|
|
process_section_header(reader, "wanted-refs", 0);
|
|
|
|
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
|
|
|
|
struct object_id oid;
|
|
|
|
const char *end;
|
2019-03-27 22:11:10 +01:00
|
|
|
struct ref **found;
|
2018-06-28 00:30:23 +02:00
|
|
|
|
|
|
|
if (parse_oid_hex(reader->line, &oid, &end) || *end++ != ' ')
|
2018-07-23 19:56:35 +02:00
|
|
|
die(_("expected wanted-ref, got '%s'"), reader->line);
|
2018-06-28 00:30:23 +02:00
|
|
|
|
2019-03-27 22:11:10 +01:00
|
|
|
found = bsearch(end, sought, nr_sought, sizeof(*sought),
|
|
|
|
cmp_name_ref);
|
|
|
|
if (!found)
|
2018-07-23 19:56:35 +02:00
|
|
|
die(_("unexpected wanted-ref: '%s'"), reader->line);
|
2019-03-27 22:11:10 +01:00
|
|
|
oidcpy(&(*found)->old_oid, &oid);
|
2018-06-28 00:30:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (reader->status != PACKET_READ_DELIM)
|
2018-07-23 19:56:35 +02:00
|
|
|
die(_("error processing wanted refs: %d"), reader->status);
|
2018-06-28 00:30:23 +02:00
|
|
|
}
|
|
|
|
|
2018-03-15 18:31:28 +01:00
|
|
|
enum fetch_state {
|
|
|
|
FETCH_CHECK_LOCAL = 0,
|
|
|
|
FETCH_SEND_REQUEST,
|
|
|
|
FETCH_PROCESS_ACKS,
|
|
|
|
FETCH_GET_PACK,
|
|
|
|
FETCH_DONE,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
|
|
|
|
int fd[2],
|
|
|
|
const struct ref *orig_ref,
|
|
|
|
struct ref **sought, int nr_sought,
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
struct oid_array *shallows,
|
|
|
|
struct shallow_info *si,
|
2018-03-15 18:31:28 +01:00
|
|
|
char **pack_lockfile)
|
|
|
|
{
|
2019-08-13 20:37:48 +02:00
|
|
|
struct repository *r = the_repository;
|
2018-03-15 18:31:28 +01:00
|
|
|
struct ref *ref = copy_ref_list(orig_ref);
|
|
|
|
enum fetch_state state = FETCH_CHECK_LOCAL;
|
|
|
|
struct oidset common = OIDSET_INIT;
|
|
|
|
struct packet_reader reader;
|
2019-10-03 01:49:28 +02:00
|
|
|
int in_vain = 0, negotiation_started = 0;
|
2018-03-15 18:31:28 +01:00
|
|
|
int haves_to_send = INITIAL_FLUSH;
|
2018-06-15 00:54:28 +02:00
|
|
|
struct fetch_negotiator negotiator;
|
2019-08-13 20:37:48 +02:00
|
|
|
fetch_negotiator_init(r, &negotiator);
|
2018-03-15 18:31:28 +01:00
|
|
|
packet_reader_init(&reader, fd[0], NULL, 0,
|
pack-protocol.txt: accept error packets in any context
In the Git pack protocol definition, an error packet may appear only in
a certain context. However, servers can face a runtime error (e.g. I/O
error) at an arbitrary timing. This patch changes the protocol to allow
an error packet to be sent instead of any packet.
Without this protocol spec change, when a server cannot process a
request, there's no way to tell that to a client. Since the server
cannot produce a valid response, it would be forced to cut a connection
without telling why. With this protocol spec change, the server can be
more gentle in this situation. An old client may see these error packets
as an unexpected packet, but this is not worse than having an unexpected
EOF.
Following this protocol spec change, the error packet handling code is
moved to pkt-line.c. Implementation wise, this implementation uses
pkt-line to communicate with a subprocess. Since this is not a part of
Git protocol, it's possible that a packet that is not supposed to be an
error packet is mistakenly parsed as an error packet. This error packet
handling is enabled only for the Git pack protocol parsing code
considering this.
Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-12-29 22:19:15 +01:00
|
|
|
PACKET_READ_CHOMP_NEWLINE |
|
|
|
|
PACKET_READ_DIE_ON_ERR_PACKET);
|
2019-01-16 20:28:15 +01:00
|
|
|
if (git_env_bool("GIT_TEST_SIDEBAND_ALL", 1) &&
|
|
|
|
server_supports_feature("fetch", "sideband-all", 0)) {
|
2019-01-16 20:28:14 +01:00
|
|
|
reader.use_sideband = 1;
|
|
|
|
reader.me = "fetch-pack";
|
|
|
|
}
|
2018-03-15 18:31:28 +01:00
|
|
|
|
|
|
|
while (state != FETCH_DONE) {
|
|
|
|
switch (state) {
|
|
|
|
case FETCH_CHECK_LOCAL:
|
|
|
|
sort_ref_list(&ref, ref_compare_name);
|
|
|
|
QSORT(sought, nr_sought, cmp_ref_by_name);
|
|
|
|
|
|
|
|
/* v2 supports these by default */
|
|
|
|
allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
|
|
|
|
use_sideband = 2;
|
2018-03-15 18:31:29 +01:00
|
|
|
if (args->depth > 0 || args->deepen_since || args->deepen_not)
|
|
|
|
args->deepen = 1;
|
2018-03-15 18:31:28 +01:00
|
|
|
|
|
|
|
/* Filter 'ref' by 'sought' and those that aren't local */
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
if (!args->no_dependents) {
|
|
|
|
mark_complete_and_common_ref(&negotiator, args, &ref);
|
|
|
|
filter_refs(args, &ref, sought, nr_sought);
|
|
|
|
if (everything_local(args, &ref))
|
|
|
|
state = FETCH_DONE;
|
|
|
|
else
|
|
|
|
state = FETCH_SEND_REQUEST;
|
|
|
|
|
|
|
|
mark_tips(&negotiator, args->negotiation_tips);
|
|
|
|
for_each_cached_alternate(&negotiator,
|
|
|
|
insert_one_alternate_object);
|
|
|
|
} else {
|
|
|
|
filter_refs(args, &ref, sought, nr_sought);
|
2018-03-15 18:31:28 +01:00
|
|
|
state = FETCH_SEND_REQUEST;
|
fetch-pack: avoid object flags if no_dependents
When fetch_pack() is invoked as part of another Git command (due to a
lazy fetch from a partial clone, for example), it uses object flags that
may already be used by the outer Git command.
The commit that introduced the lazy fetch feature (88e2f9ed8e
("introduce fetch-object: fetch one promisor object", 2017-12-05)) tried
to avoid this overlap, but it did not avoid it totally. It was
successful in avoiding writing COMPLETE, but did not avoid reading
COMPLETE, and did not avoid writing and reading ALTERNATE.
Ensure that no flags are written or read by fetch_pack() in the case
where it is used to perform a lazy fetch. To do this, it is sufficient
to avoid checking completeness of wanted refs (unnecessary in the case
of lazy fetches), and to avoid negotiation-related work (in the current
implementation, already, no negotiation is performed). After that was
done, the lack of overlap was verified by checking all direct and
indirect usages of COMPLETE and ALTERNATE - that they are read or
written only if no_dependents is false.
There are other possible solutions to this issue:
(1) Split fetch-pack.{c,h} into a flag-using part and a non-flag-using
part, and whenever no_dependents is set, only use the
non-flag-using part.
(2) Make fetch_pack() be able to be used with arbitrary repository
objects. fetch_pack() should then create its own repository object
based on the given repository object, with its own object
hashtable, so that the flags do not conflict.
(1) is possible but invasive - some functions would need to be split;
and such invasiveness would potentially be unnecessary if we ever were
to need (2) anyway. (2) would be useful if we were to support, say,
submodules that were partial clones themselves, but I don't know when or
if the Git project plans to support those.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:52 +02:00
|
|
|
}
|
2018-03-15 18:31:28 +01:00
|
|
|
break;
|
|
|
|
case FETCH_SEND_REQUEST:
|
2019-10-03 01:49:28 +02:00
|
|
|
if (!negotiation_started) {
|
|
|
|
negotiation_started = 1;
|
|
|
|
trace2_region_enter("fetch-pack",
|
|
|
|
"negotiation_v2",
|
|
|
|
the_repository);
|
|
|
|
}
|
2018-06-15 00:54:28 +02:00
|
|
|
if (send_fetch_request(&negotiator, fd[1], args, ref,
|
|
|
|
&common,
|
2019-01-16 20:28:14 +01:00
|
|
|
&haves_to_send, &in_vain,
|
|
|
|
reader.use_sideband))
|
2018-03-15 18:31:28 +01:00
|
|
|
state = FETCH_GET_PACK;
|
|
|
|
else
|
|
|
|
state = FETCH_PROCESS_ACKS;
|
|
|
|
break;
|
|
|
|
case FETCH_PROCESS_ACKS:
|
|
|
|
/* Process ACKs/NAKs */
|
2018-06-15 00:54:28 +02:00
|
|
|
switch (process_acks(&negotiator, &reader, &common)) {
|
2018-03-15 18:31:28 +01:00
|
|
|
case 2:
|
|
|
|
state = FETCH_GET_PACK;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
in_vain = 0;
|
|
|
|
/* fallthrough */
|
|
|
|
default:
|
|
|
|
state = FETCH_SEND_REQUEST;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FETCH_GET_PACK:
|
2019-10-03 01:49:28 +02:00
|
|
|
trace2_region_leave("fetch-pack",
|
|
|
|
"negotiation_v2",
|
|
|
|
the_repository);
|
2018-03-15 18:31:29 +01:00
|
|
|
/* Check for shallow-info section */
|
|
|
|
if (process_section_header(&reader, "shallow-info", 1))
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
receive_shallow_info(args, &reader, shallows, si);
|
2018-03-15 18:31:29 +01:00
|
|
|
|
2018-06-28 00:30:23 +02:00
|
|
|
if (process_section_header(&reader, "wanted-refs", 1))
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 22:13:20 +02:00
|
|
|
receive_wanted_refs(&reader, sought, nr_sought);
|
2018-06-28 00:30:23 +02:00
|
|
|
|
2018-03-15 18:31:28 +01:00
|
|
|
/* get the pack */
|
|
|
|
process_section_header(&reader, "packfile", 0);
|
|
|
|
if (get_pack(args, fd, pack_lockfile))
|
|
|
|
die(_("git fetch-pack: fetch failed."));
|
|
|
|
|
|
|
|
state = FETCH_DONE;
|
|
|
|
break;
|
|
|
|
case FETCH_DONE:
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-15 00:54:28 +02:00
|
|
|
negotiator.release(&negotiator);
|
2018-03-15 18:31:28 +01:00
|
|
|
oidset_clear(&common);
|
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
|
2018-07-27 16:37:17 +02:00
|
|
|
static int fetch_pack_config_cb(const char *var, const char *value, void *cb)
|
|
|
|
{
|
|
|
|
if (strcmp(var, "fetch.fsck.skiplist") == 0) {
|
|
|
|
const char *path;
|
|
|
|
|
|
|
|
if (git_config_pathname(&path, var, value))
|
|
|
|
return 1;
|
|
|
|
strbuf_addf(&fsck_msg_types, "%cskiplist=%s",
|
|
|
|
fsck_msg_types.len ? ',' : '=', path);
|
|
|
|
free((char *)path);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (skip_prefix(var, "fetch.fsck.", &var)) {
|
|
|
|
if (is_valid_msg_type(var, value))
|
|
|
|
strbuf_addf(&fsck_msg_types, "%c%s=%s",
|
|
|
|
fsck_msg_types.len ? ',' : '=', var, value);
|
|
|
|
else
|
|
|
|
warning("Skipping unknown msg id '%s'", var);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return git_default_config(var, value, cb);
|
|
|
|
}
|
|
|
|
|
2014-08-07 18:21:20 +02:00
|
|
|
static void fetch_pack_config(void)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
2014-08-07 18:21:20 +02:00
|
|
|
git_config_get_int("fetch.unpacklimit", &fetch_unpack_limit);
|
|
|
|
git_config_get_int("transfer.unpacklimit", &transfer_unpack_limit);
|
|
|
|
git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta);
|
|
|
|
git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects);
|
|
|
|
git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
2018-07-27 16:37:17 +02:00
|
|
|
git_config(fetch_pack_config_cb, NULL);
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void fetch_pack_setup(void)
|
|
|
|
{
|
|
|
|
static int did_setup;
|
|
|
|
if (did_setup)
|
|
|
|
return;
|
2014-08-07 18:21:20 +02:00
|
|
|
fetch_pack_config();
|
2012-10-26 17:53:55 +02:00
|
|
|
if (0 <= transfer_unpack_limit)
|
|
|
|
unpack_limit = transfer_unpack_limit;
|
|
|
|
else if (0 <= fetch_unpack_limit)
|
|
|
|
unpack_limit = fetch_unpack_limit;
|
|
|
|
did_setup = 1;
|
|
|
|
}
|
|
|
|
|
2013-01-29 23:02:15 +01:00
|
|
|
static int remove_duplicates_in_refs(struct ref **ref, int nr)
|
|
|
|
{
|
|
|
|
struct string_list names = STRING_LIST_INIT_NODUP;
|
|
|
|
int src, dst;
|
|
|
|
|
|
|
|
for (src = dst = 0; src < nr; src++) {
|
|
|
|
struct string_list_item *item;
|
|
|
|
item = string_list_insert(&names, ref[src]->name);
|
|
|
|
if (item->util)
|
|
|
|
continue; /* already have it */
|
|
|
|
item->util = ref[src];
|
|
|
|
if (src != dst)
|
|
|
|
ref[dst] = ref[src];
|
|
|
|
dst++;
|
|
|
|
}
|
|
|
|
for (src = dst; src < nr; src++)
|
|
|
|
ref[src] = NULL;
|
|
|
|
string_list_clear(&names, 0);
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2013-12-05 14:02:39 +01:00
|
|
|
static void update_shallow(struct fetch_pack_args *args,
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 22:13:20 +02:00
|
|
|
struct ref **sought, int nr_sought,
|
2013-12-05 14:02:39 +01:00
|
|
|
struct shallow_info *si)
|
2013-12-05 14:02:37 +01:00
|
|
|
{
|
2017-03-31 03:40:00 +02:00
|
|
|
struct oid_array ref = OID_ARRAY_INIT;
|
2013-12-05 14:02:40 +01:00
|
|
|
int *status;
|
2013-12-05 14:02:39 +01:00
|
|
|
int i;
|
|
|
|
|
2016-06-12 12:53:56 +02:00
|
|
|
if (args->deepen && alternate_shallow_file) {
|
2013-12-05 14:02:37 +01:00
|
|
|
if (*alternate_shallow_file == '\0') { /* --unshallow */
|
2018-05-18 00:51:51 +02:00
|
|
|
unlink_or_warn(git_path_shallow(the_repository));
|
2013-12-05 14:02:37 +01:00
|
|
|
rollback_lock_file(&shallow_lock);
|
|
|
|
} else
|
|
|
|
commit_lock_file(&shallow_lock);
|
2019-02-04 01:06:50 +01:00
|
|
|
alternate_shallow_file = NULL;
|
2013-12-05 14:02:37 +01:00
|
|
|
return;
|
|
|
|
}
|
2013-12-05 14:02:39 +01:00
|
|
|
|
|
|
|
if (!si->shallow || !si->shallow->nr)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (args->cloning) {
|
|
|
|
/*
|
|
|
|
* remote is shallow, but this is a clone, there are
|
|
|
|
* no objects in repo to worry about. Accept any
|
|
|
|
* shallow points that exist in the pack (iow in repo
|
|
|
|
* after get_pack() and reprepare_packed_git())
|
|
|
|
*/
|
2017-03-31 03:40:00 +02:00
|
|
|
struct oid_array extra = OID_ARRAY_INIT;
|
2017-03-26 18:01:37 +02:00
|
|
|
struct object_id *oid = si->shallow->oid;
|
2013-12-05 14:02:39 +01:00
|
|
|
for (i = 0; i < si->shallow->nr; i++)
|
2017-03-26 18:01:37 +02:00
|
|
|
if (has_object_file(&oid[i]))
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_append(&extra, &oid[i]);
|
2013-12-05 14:02:39 +01:00
|
|
|
if (extra.nr) {
|
|
|
|
setup_alternate_shallow(&shallow_lock,
|
|
|
|
&alternate_shallow_file,
|
|
|
|
&extra);
|
|
|
|
commit_lock_file(&shallow_lock);
|
2019-02-04 01:06:50 +01:00
|
|
|
alternate_shallow_file = NULL;
|
2013-12-05 14:02:39 +01:00
|
|
|
}
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_clear(&extra);
|
2013-12-05 14:02:39 +01:00
|
|
|
return;
|
|
|
|
}
|
2013-12-05 14:02:40 +01:00
|
|
|
|
|
|
|
if (!si->nr_ours && !si->nr_theirs)
|
|
|
|
return;
|
|
|
|
|
|
|
|
remove_nonexistent_theirs_shallow(si);
|
|
|
|
if (!si->nr_ours && !si->nr_theirs)
|
|
|
|
return;
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 22:13:20 +02:00
|
|
|
for (i = 0; i < nr_sought; i++)
|
|
|
|
oid_array_append(&ref, &sought[i]->old_oid);
|
2013-12-05 14:02:40 +01:00
|
|
|
si->ref = &ref;
|
|
|
|
|
2013-12-05 14:02:42 +01:00
|
|
|
if (args->update_shallow) {
|
|
|
|
/*
|
|
|
|
* remote is also shallow, .git/shallow may be updated
|
|
|
|
* so all refs can be accepted. Make sure we only add
|
|
|
|
* shallow roots that are actually reachable from new
|
|
|
|
* refs.
|
|
|
|
*/
|
2017-03-31 03:40:00 +02:00
|
|
|
struct oid_array extra = OID_ARRAY_INIT;
|
2017-03-26 18:01:37 +02:00
|
|
|
struct object_id *oid = si->shallow->oid;
|
2013-12-05 14:02:42 +01:00
|
|
|
assign_shallow_commits_to_refs(si, NULL, NULL);
|
|
|
|
if (!si->nr_ours && !si->nr_theirs) {
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_clear(&ref);
|
2013-12-05 14:02:42 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (i = 0; i < si->nr_ours; i++)
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_append(&extra, &oid[si->ours[i]]);
|
2013-12-05 14:02:42 +01:00
|
|
|
for (i = 0; i < si->nr_theirs; i++)
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_append(&extra, &oid[si->theirs[i]]);
|
2013-12-05 14:02:42 +01:00
|
|
|
setup_alternate_shallow(&shallow_lock,
|
|
|
|
&alternate_shallow_file,
|
|
|
|
&extra);
|
|
|
|
commit_lock_file(&shallow_lock);
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_clear(&extra);
|
|
|
|
oid_array_clear(&ref);
|
2019-02-04 01:06:50 +01:00
|
|
|
alternate_shallow_file = NULL;
|
2013-12-05 14:02:42 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-12-05 14:02:40 +01:00
|
|
|
/*
|
|
|
|
* remote is also shallow, check what ref is safe to update
|
|
|
|
* without updating .git/shallow
|
|
|
|
*/
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 22:13:20 +02:00
|
|
|
status = xcalloc(nr_sought, sizeof(*status));
|
2013-12-05 14:02:40 +01:00
|
|
|
assign_shallow_commits_to_refs(si, NULL, status);
|
|
|
|
if (si->nr_ours || si->nr_theirs) {
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 22:13:20 +02:00
|
|
|
for (i = 0; i < nr_sought; i++)
|
2013-12-05 14:02:40 +01:00
|
|
|
if (status[i])
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 22:13:20 +02:00
|
|
|
sought[i]->status = REF_STATUS_REJECT_SHALLOW;
|
2013-12-05 14:02:40 +01:00
|
|
|
}
|
|
|
|
free(status);
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_clear(&ref);
|
2013-12-05 14:02:37 +01:00
|
|
|
}
|
|
|
|
|
fetch-pack: write shallow, then check connectivity
When fetching, connectivity is checked after the shallow file is
updated. There are 2 issues with this: (1) the connectivity check is
only performed up to ancestors of existing refs (which is not thorough
enough if we were deepening an existing ref in the first place), and (2)
there is no rollback of the shallow file if the connectivity check
fails.
To solve (1), update the connectivity check to check the ancestry chain
completely in the case of a deepening fetch by refraining from passing
"--not --all" when invoking rev-list in connected.c.
To solve (2), have fetch_pack() perform its own connectivity check
before updating the shallow file. To support existing use cases in which
"git fetch-pack" is used to download objects without much regard as to
the connectivity of the resulting objects with respect to the existing
repository, the connectivity check is only done if necessary (that is,
the fetch is not a clone, and the fetch involves shallow/deepen
functionality). "git fetch" still performs its own connectivity check,
preserving correctness but sometimes performing redundant work. This
redundancy is mitigated by the fact that fetch_pack() reports if it has
performed a connectivity check itself, and if the transport supports
connect or stateless-connect, it will bubble up that report so that "git
fetch" knows not to perform the connectivity check in such a case.
This was noticed when a user tried to deepen an existing repository by
fetching with --no-shallow from a server that did not send all necessary
objects - the connectivity check as run by "git fetch" succeeded, but a
subsequent "git fsck" failed.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-03 00:08:43 +02:00
|
|
|
static int iterate_ref_map(void *cb_data, struct object_id *oid)
|
|
|
|
{
|
|
|
|
struct ref **rm = cb_data;
|
|
|
|
struct ref *ref = *rm;
|
|
|
|
|
|
|
|
if (!ref)
|
|
|
|
return -1; /* end of the list */
|
|
|
|
*rm = ref->next;
|
|
|
|
oidcpy(oid, &ref->old_oid);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-10-26 17:53:55 +02:00
|
|
|
struct ref *fetch_pack(struct fetch_pack_args *args,
|
2019-03-20 09:16:14 +01:00
|
|
|
int fd[],
|
2012-10-26 17:53:55 +02:00
|
|
|
const struct ref *ref,
|
2013-01-29 23:02:15 +01:00
|
|
|
struct ref **sought, int nr_sought,
|
2017-03-31 03:40:00 +02:00
|
|
|
struct oid_array *shallow,
|
2018-03-15 18:31:28 +01:00
|
|
|
char **pack_lockfile,
|
|
|
|
enum protocol_version version)
|
2012-10-26 17:53:55 +02:00
|
|
|
{
|
|
|
|
struct ref *ref_cpy;
|
2013-12-05 14:02:39 +01:00
|
|
|
struct shallow_info si;
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
struct oid_array shallows_scratch = OID_ARRAY_INIT;
|
2012-10-26 17:53:55 +02:00
|
|
|
|
|
|
|
fetch_pack_setup();
|
2013-01-29 23:02:15 +01:00
|
|
|
if (nr_sought)
|
|
|
|
nr_sought = remove_duplicates_in_refs(sought, nr_sought);
|
2012-10-26 17:53:55 +02:00
|
|
|
|
fetch-pack: exclude blobs when lazy-fetching trees
A partial clone with missing trees can be obtained using "git clone
--filter=tree:none <repo>". In such a repository, when a tree needs to
be lazily fetched, any tree or blob it directly or indirectly references
is fetched as well, regardless of whether the original command required
those objects, or if the local repository already had some of them.
This is because the fetch protocol, which the lazy fetch uses, does not
allow clients to request that only the wanted objects be sent, which
would be the ideal solution. This patch implements a partial solution:
specify the "blob:none" filter, somewhat reducing the fetch payload.
This change has no effect when lazily fetching blobs (due to how filters
work). And if lazily fetching a commit (such repositories are difficult
to construct and is not a use case we support very well, but it is
possible), referenced commits and trees are still fetched - only the
blobs are not fetched.
The necessary code change is done in fetch_pack() instead of somewhere
closer to where the "filter" instruction is written to the wire so that
only one part of the code needs to be changed in order for users of all
protocol versions to benefit from this optimization.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 01:04:53 +02:00
|
|
|
if (args->no_dependents && !args->filter_options.choice) {
|
|
|
|
/*
|
|
|
|
* The protocol does not support requesting that only the
|
|
|
|
* wanted objects be sent, so approximate this by setting a
|
|
|
|
* "blob:none" filter if no filter is already set. This works
|
|
|
|
* for all object types: note that wanted blobs will still be
|
|
|
|
* sent because they are directly specified as a "want".
|
|
|
|
*
|
|
|
|
* NEEDSWORK: Add an option in the protocol to request that
|
|
|
|
* only the wanted objects be sent, and implement it.
|
|
|
|
*/
|
|
|
|
parse_list_objects_filter(&args->filter_options, "blob:none");
|
|
|
|
}
|
|
|
|
|
2018-09-27 21:24:05 +02:00
|
|
|
if (version != protocol_v2 && !ref) {
|
2012-10-26 17:53:55 +02:00
|
|
|
packet_flush(fd[1]);
|
2016-06-12 12:53:55 +02:00
|
|
|
die(_("no matching remote head"));
|
2012-10-26 17:53:55 +02:00
|
|
|
}
|
2019-03-26 20:31:20 +01:00
|
|
|
if (version == protocol_v2) {
|
|
|
|
if (shallow->nr)
|
|
|
|
BUG("Protocol V2 does not provide shallows at this point in the fetch");
|
|
|
|
memset(&si, 0, sizeof(si));
|
2018-03-15 18:31:28 +01:00
|
|
|
ref_cpy = do_fetch_pack_v2(args, fd, ref, sought, nr_sought,
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
&shallows_scratch, &si,
|
2018-03-15 18:31:28 +01:00
|
|
|
pack_lockfile);
|
2019-03-26 20:31:20 +01:00
|
|
|
} else {
|
|
|
|
prepare_shallow_info(&si, shallow);
|
2018-03-15 18:31:28 +01:00
|
|
|
ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought,
|
|
|
|
&si, pack_lockfile);
|
2019-03-26 20:31:20 +01:00
|
|
|
}
|
2018-03-23 18:45:21 +01:00
|
|
|
reprepare_packed_git(the_repository);
|
fetch-pack: write shallow, then check connectivity
When fetching, connectivity is checked after the shallow file is
updated. There are 2 issues with this: (1) the connectivity check is
only performed up to ancestors of existing refs (which is not thorough
enough if we were deepening an existing ref in the first place), and (2)
there is no rollback of the shallow file if the connectivity check
fails.
To solve (1), update the connectivity check to check the ancestry chain
completely in the case of a deepening fetch by refraining from passing
"--not --all" when invoking rev-list in connected.c.
To solve (2), have fetch_pack() perform its own connectivity check
before updating the shallow file. To support existing use cases in which
"git fetch-pack" is used to download objects without much regard as to
the connectivity of the resulting objects with respect to the existing
repository, the connectivity check is only done if necessary (that is,
the fetch is not a clone, and the fetch involves shallow/deepen
functionality). "git fetch" still performs its own connectivity check,
preserving correctness but sometimes performing redundant work. This
redundancy is mitigated by the fact that fetch_pack() reports if it has
performed a connectivity check itself, and if the transport supports
connect or stateless-connect, it will bubble up that report so that "git
fetch" knows not to perform the connectivity check in such a case.
This was noticed when a user tried to deepen an existing repository by
fetching with --no-shallow from a server that did not send all necessary
objects - the connectivity check as run by "git fetch" succeeded, but a
subsequent "git fsck" failed.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-03 00:08:43 +02:00
|
|
|
|
|
|
|
if (!args->cloning && args->deepen) {
|
|
|
|
struct check_connected_options opt = CHECK_CONNECTED_INIT;
|
|
|
|
struct ref *iterator = ref_cpy;
|
|
|
|
opt.shallow_file = alternate_shallow_file;
|
|
|
|
if (args->deepen)
|
|
|
|
opt.is_deepening_fetch = 1;
|
|
|
|
if (check_connected(iterate_ref_map, &iterator, &opt)) {
|
|
|
|
error(_("remote did not send all necessary objects"));
|
|
|
|
free_refs(ref_cpy);
|
|
|
|
ref_cpy = NULL;
|
|
|
|
rollback_lock_file(&shallow_lock);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
args->connectivity_checked = 1;
|
|
|
|
}
|
|
|
|
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-01 22:13:20 +02:00
|
|
|
update_shallow(args, sought, nr_sought, &si);
|
fetch-pack: write shallow, then check connectivity
When fetching, connectivity is checked after the shallow file is
updated. There are 2 issues with this: (1) the connectivity check is
only performed up to ancestors of existing refs (which is not thorough
enough if we were deepening an existing ref in the first place), and (2)
there is no rollback of the shallow file if the connectivity check
fails.
To solve (1), update the connectivity check to check the ancestry chain
completely in the case of a deepening fetch by refraining from passing
"--not --all" when invoking rev-list in connected.c.
To solve (2), have fetch_pack() perform its own connectivity check
before updating the shallow file. To support existing use cases in which
"git fetch-pack" is used to download objects without much regard as to
the connectivity of the resulting objects with respect to the existing
repository, the connectivity check is only done if necessary (that is,
the fetch is not a clone, and the fetch involves shallow/deepen
functionality). "git fetch" still performs its own connectivity check,
preserving correctness but sometimes performing redundant work. This
redundancy is mitigated by the fact that fetch_pack() reports if it has
performed a connectivity check itself, and if the transport supports
connect or stateless-connect, it will bubble up that report so that "git
fetch" knows not to perform the connectivity check in such a case.
This was noticed when a user tried to deepen an existing repository by
fetching with --no-shallow from a server that did not send all necessary
objects - the connectivity check as run by "git fetch" succeeded, but a
subsequent "git fsck" failed.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-03 00:08:43 +02:00
|
|
|
cleanup:
|
2013-12-05 14:02:39 +01:00
|
|
|
clear_shallow_info(&si);
|
fetch-pack: respect --no-update-shallow in v2
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.
Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.
This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-26 20:31:21 +01:00
|
|
|
oid_array_clear(&shallows_scratch);
|
2012-10-26 17:53:55 +02:00
|
|
|
return ref_cpy;
|
|
|
|
}
|
2017-02-22 17:01:22 +01:00
|
|
|
|
|
|
|
int report_unmatched_refs(struct ref **sought, int nr_sought)
|
|
|
|
{
|
|
|
|
int i, ret = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_sought; i++) {
|
2017-02-22 17:05:57 +01:00
|
|
|
if (!sought[i])
|
2017-02-22 17:01:22 +01:00
|
|
|
continue;
|
2017-02-22 17:05:57 +01:00
|
|
|
switch (sought[i]->match_status) {
|
|
|
|
case REF_MATCHED:
|
|
|
|
continue;
|
|
|
|
case REF_NOT_MATCHED:
|
|
|
|
error(_("no such remote ref %s"), sought[i]->name);
|
|
|
|
break;
|
|
|
|
case REF_UNADVERTISED_NOT_ALLOWED:
|
|
|
|
error(_("Server does not allow request for unadvertised object %s"),
|
|
|
|
sought[i]->name);
|
|
|
|
break;
|
|
|
|
}
|
2017-02-22 17:01:22 +01:00
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|