Merge branch 'jh/partial-clone'

The machinery to clone & fetch, which in turn involves packing and
unpacking objects, have been told how to omit certain objects using
the filtering mechanism introduced by the jh/object-filtering
topic, and also mark the resulting pack as a promisor pack to
tolerate missing objects, taking advantage of the mechanism
introduced by the jh/fsck-promisors topic.

* jh/partial-clone:
  t5616: test bulk prefetch after partial fetch
  fetch: inherit filter-spec from partial clone
  t5616: end-to-end tests for partial clone
  fetch-pack: restore save_commit_buffer after use
  unpack-trees: batch fetching of missing blobs
  clone: partial clone
  partial-clone: define partial clone settings in config
  fetch: support filters
  fetch: refactor calculation of remote list
  fetch-pack: test support excluding large blobs
  fetch-pack: add --no-filter
  fetch-pack, index-pack, transport: partial clone
  upload-pack: add object filtering for partial clone
This commit is contained in:
Junio C Hamano 2018-02-13 13:39:04 -08:00
commit 6bed209a20
26 changed files with 657 additions and 30 deletions

View File

@ -3343,6 +3343,10 @@ uploadpack.packObjectsHook::
was run. I.e., `upload-pack` will feed input intended for
`pack-objects` to the hook, and expects a completed packfile on
stdout.
uploadpack.allowFilter::
If this option is set, `upload-pack` will advertise partial
clone and partial fetch object filtering.
+
Note that this configuration variable is ignored if it is seen in the
repository-level config (this is a safety measure against fetching from

View File

@ -241,6 +241,7 @@ out of what the server said it could do with the first 'want' line.
upload-request = want-list
*shallow-line
*1depth-request
[filter-request]
flush-pkt
want-list = first-want
@ -256,6 +257,8 @@ out of what the server said it could do with the first 'want' line.
additional-want = PKT-LINE("want" SP obj-id)
depth = 1*DIGIT
filter-request = PKT-LINE("filter" SP filter-spec)
----
Clients MUST send all the obj-ids it wants from the reference
@ -278,6 +281,11 @@ complete those commits. Commits whose parents are not received as a
result are defined as shallow and marked as such in the server. This
information is sent back to the client in the next step.
The client can optionally request that pack-objects omit various
objects from the packfile using one of several filtering techniques.
These are intended for use with partial clone and partial fetch
operations. See `rev-list` for possible "filter-spec" values.
Once all the 'want's and 'shallow's (and optional 'deepen') are
transferred, clients MUST send a flush-pkt, to tell the server side
that it is done sending the list.

View File

@ -309,3 +309,11 @@ to accept a signed push certificate, and asks the <nonce> to be
included in the push certificate. A send-pack client MUST NOT
send a push-cert packet unless the receive-pack server advertises
this capability.
filter
------
If the upload-pack server advertises the 'filter' capability,
fetch-pack may send "filter" commands to request a partial clone
or partial fetch and request that the server omit various objects
from the packfile.

View File

@ -26,6 +26,7 @@
#include "run-command.h"
#include "connected.h"
#include "packfile.h"
#include "list-objects-filter-options.h"
/*
* Overall FIXMEs:
@ -60,6 +61,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP;
static int option_dissociate;
static int max_jobs = -1;
static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP;
static struct list_objects_filter_options filter_options;
static int recurse_submodules_cb(const struct option *opt,
const char *arg, int unset)
@ -135,6 +137,7 @@ static struct option builtin_clone_options[] = {
TRANSPORT_FAMILY_IPV4),
OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
TRANSPORT_FAMILY_IPV6),
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
OPT_END()
};
@ -893,6 +896,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
struct refspec *refspec;
const char *fetch_pattern;
fetch_if_missing = 0;
packet_trace_identity("clone");
argc = parse_options(argc, argv, prefix, builtin_clone_options,
builtin_clone_usage, 0);
@ -1090,6 +1095,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
warning(_("--shallow-since is ignored in local clones; use file:// instead."));
if (option_not.nr)
warning(_("--shallow-exclude is ignored in local clones; use file:// instead."));
if (filter_options.choice)
warning(_("--filter is ignored in local clones; use file:// instead."));
if (!access(mkpath("%s/shallow", path), F_OK)) {
if (option_local > 0)
warning(_("source repository is shallow, ignoring --local"));
@ -1118,7 +1125,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
transport_set_option(transport, TRANS_OPT_UPLOADPACK,
option_upload_pack);
if (transport->smart_options && !deepen)
if (filter_options.choice) {
transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
filter_options.filter_spec);
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
}
if (transport->smart_options && !deepen && !filter_options.choice)
transport->smart_options->check_self_contained_and_connected = 1;
refs = transport_get_remote_refs(transport);
@ -1178,13 +1191,17 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
write_refspec_config(src_ref_prefix, our_head_points_at,
remote_head_points_at, &branch_top);
if (filter_options.choice)
partial_clone_register("origin", &filter_options);
if (is_local)
clone_local(path, git_dir);
else if (refs && complete_refs_before_fetch)
transport_fetch_refs(transport, mapped_refs);
update_remote_refs(refs, mapped_refs, remote_head_points_at,
branch_top.buf, reflog_msg.buf, transport, !is_local);
branch_top.buf, reflog_msg.buf, transport,
!is_local && !filter_options.choice);
update_head(our_head_points_at, remote_head, reflog_msg.buf);
@ -1205,6 +1222,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
}
junk_mode = JUNK_LEAVE_REPO;
fetch_if_missing = 1;
err = checkout(submodule_progress);
strbuf_release(&reflog_msg);

View File

@ -153,6 +153,14 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
args.no_dependents = 1;
continue;
}
if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
parse_list_objects_filter(&args.filter_options, arg);
continue;
}
if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
list_objects_filter_set_no_filter(&args.filter_options);
continue;
}
usage(fetch_pack_usage);
}
if (deepen_not.nr)

View File

@ -19,6 +19,7 @@
#include "argv-array.h"
#include "utf8.h"
#include "packfile.h"
#include "list-objects-filter-options.h"
static const char * const builtin_fetch_usage[] = {
N_("git fetch [<options>] [<repository> [<refspec>...]]"),
@ -56,6 +57,7 @@ static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND;
static int shown_url = 0;
static int refmap_alloc, refmap_nr;
static const char **refmap_array;
static struct list_objects_filter_options filter_options;
static int git_fetch_config(const char *k, const char *v, void *cb)
{
@ -161,6 +163,7 @@ static struct option builtin_fetch_options[] = {
TRANSPORT_FAMILY_IPV4),
OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
TRANSPORT_FAMILY_IPV6),
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
OPT_END()
};
@ -1045,6 +1048,11 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
if (update_shallow)
set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
if (filter_options.choice) {
set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
filter_options.filter_spec);
set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
}
return transport;
}
@ -1265,6 +1273,56 @@ static int fetch_multiple(struct string_list *list)
return result;
}
/*
* Fetching from the promisor remote should use the given filter-spec
* or inherit the default filter-spec from the config.
*/
static inline void fetch_one_setup_partial(struct remote *remote)
{
/*
* Explicit --no-filter argument overrides everything, regardless
* of any prior partial clones and fetches.
*/
if (filter_options.no_filter)
return;
/*
* If no prior partial clone/fetch and the current fetch DID NOT
* request a partial-fetch, do a normal fetch.
*/
if (!repository_format_partial_clone && !filter_options.choice)
return;
/*
* If this is the FIRST partial-fetch request, we enable partial
* on this repo and remember the given filter-spec as the default
* for subsequent fetches to this remote.
*/
if (!repository_format_partial_clone && filter_options.choice) {
partial_clone_register(remote->name, &filter_options);
return;
}
/*
* We are currently limited to only ONE promisor remote and only
* allow partial-fetches from the promisor remote.
*/
if (strcmp(remote->name, repository_format_partial_clone)) {
if (filter_options.choice)
die(_("--filter can only be used with the remote configured in core.partialClone"));
return;
}
/*
* Do a partial-fetch from the promisor remote using either the
* explicitly given filter-spec or inherit the filter-spec from
* the config.
*/
if (!filter_options.choice)
partial_clone_get_default_filter_spec(&filter_options);
return;
}
static int fetch_one(struct remote *remote, int argc, const char **argv)
{
static const char **refs = NULL;
@ -1320,12 +1378,14 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
{
int i;
struct string_list list = STRING_LIST_INIT_DUP;
struct remote *remote;
struct remote *remote = NULL;
int result = 0;
struct argv_array argv_gc_auto = ARGV_ARRAY_INIT;
packet_trace_identity("fetch");
fetch_if_missing = 0;
/* Record the command line for the reflog */
strbuf_addstr(&default_rla, "fetch");
for (i = 1; i < argc; i++)
@ -1359,23 +1419,23 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
if (depth || deepen_since || deepen_not.nr)
deepen = 1;
if (filter_options.choice && !repository_format_partial_clone)
die("--filter can only be used when extensions.partialClone is set");
if (all) {
if (argc == 1)
die(_("fetch --all does not take a repository argument"));
else if (argc > 1)
die(_("fetch --all does not make sense with refspecs"));
(void) for_each_remote(get_one_remote_for_fetch, &list);
result = fetch_multiple(&list);
} else if (argc == 0) {
/* No arguments -- use default remote */
remote = remote_get(NULL);
result = fetch_one(remote, argc, argv);
} else if (multiple) {
/* All arguments are assumed to be remotes or groups */
for (i = 0; i < argc; i++)
if (!add_remote_or_group(argv[i], &list))
die(_("No such remote or remote group: %s"), argv[i]);
result = fetch_multiple(&list);
} else {
/* Single remote or group */
(void) add_remote_or_group(argv[0], &list);
@ -1383,14 +1443,25 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
/* More than one remote */
if (argc > 1)
die(_("Fetching a group and specifying refspecs does not make sense"));
result = fetch_multiple(&list);
} else {
/* Zero or one remotes */
remote = remote_get(argv[0]);
result = fetch_one(remote, argc-1, argv+1);
argc--;
argv++;
}
}
if (remote) {
if (filter_options.choice || repository_format_partial_clone)
fetch_one_setup_partial(remote);
result = fetch_one(remote, argc, argv);
} else {
if (filter_options.choice)
die(_("--filter can only be used with the remote configured in core.partialClone"));
/* TODO should this also die if we have a previous partial-clone? */
result = fetch_multiple(&list);
}
if (!result && (recurse_submodules != RECURSE_SUBMODULES_OFF)) {
struct argv_array options = ARGV_ARRAY_INIT;

View File

@ -460,7 +460,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
continue;
}
if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
list_objects_filter_release(&filter_options);
list_objects_filter_set_no_filter(&filter_options);
continue;
}
if (!strcmp(arg, "--filter-print-omitted")) {

View File

@ -915,6 +915,7 @@ extern int grafts_replace_parents;
#define GIT_REPO_VERSION_READ 1
extern int repository_format_precious_objects;
extern char *repository_format_partial_clone;
extern const char *core_partial_clone_filter_default;
struct repository_format {
int version;

View File

@ -1251,6 +1251,11 @@ static int git_default_core_config(const char *var, const char *value)
return 0;
}
if (!strcmp(var, "core.partialclonefilter")) {
return git_config_string(&core_partial_clone_filter_default,
var, value);
}
/* Add other config variables here and to Documentation/config.txt. */
return 0;
}

View File

@ -56,6 +56,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
argv_array_push(&rev_list.args,"rev-list");
argv_array_push(&rev_list.args, "--objects");
argv_array_push(&rev_list.args, "--stdin");
if (repository_format_partial_clone)
argv_array_push(&rev_list.args, "--exclude-promisor-objects");
argv_array_push(&rev_list.args, "--not");
argv_array_push(&rev_list.args, "--all");
argv_array_push(&rev_list.args, "--quiet");

View File

@ -28,6 +28,7 @@ int warn_on_object_refname_ambiguity = 1;
int ref_paranoia = -1;
int repository_format_precious_objects;
char *repository_format_partial_clone;
const char *core_partial_clone_filter_default;
const char *git_commit_encoding;
const char *git_log_output_encoding;
const char *apply_default_whitespace;

View File

@ -5,11 +5,10 @@
#include "transport.h"
#include "fetch-object.h"
void fetch_object(const char *remote_name, const unsigned char *sha1)
static void fetch_refs(const char *remote_name, struct ref *ref)
{
struct remote *remote;
struct transport *transport;
struct ref *ref;
int original_fetch_if_missing = fetch_if_missing;
fetch_if_missing = 0;
@ -18,10 +17,29 @@ void fetch_object(const char *remote_name, const unsigned char *sha1)
die(_("Remote with no URL"));
transport = transport_get(remote, remote->url[0]);
ref = alloc_ref(sha1_to_hex(sha1));
hashcpy(ref->old_oid.hash, sha1);
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1");
transport_fetch_refs(transport, ref);
fetch_if_missing = original_fetch_if_missing;
}
void fetch_object(const char *remote_name, const unsigned char *sha1)
{
struct ref *ref = alloc_ref(sha1_to_hex(sha1));
hashcpy(ref->old_oid.hash, sha1);
fetch_refs(remote_name, ref);
}
void fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
{
struct ref *ref = NULL;
int i;
for (i = 0; i < to_fetch->nr; i++) {
struct ref *new_ref = alloc_ref(oid_to_hex(&to_fetch->oid[i]));
oidcpy(&new_ref->old_oid, &to_fetch->oid[i]);
new_ref->next = ref;
ref = new_ref;
}
fetch_refs(remote_name, ref);
}

View File

@ -1,6 +1,11 @@
#ifndef FETCH_OBJECT_H
#define FETCH_OBJECT_H
#include "sha1-array.h"
extern void fetch_object(const char *remote_name, const unsigned char *sha1);
extern void fetch_objects(const char *remote_name,
const struct oid_array *to_fetch);
#endif

View File

@ -29,6 +29,7 @@ static int deepen_not_ok;
static int fetch_fsck_objects = -1;
static int transfer_fsck_objects = -1;
static int agent_supported;
static int server_supports_filtering;
static struct lock_file shallow_lock;
static const char *alternate_shallow_file;
@ -379,6 +380,8 @@ static int find_common(struct fetch_pack_args *args,
if (deepen_not_ok) strbuf_addstr(&c, " deepen-not");
if (agent_supported) strbuf_addf(&c, " agent=%s",
git_user_agent_sanitized());
if (args->filter_options.choice)
strbuf_addstr(&c, " filter");
packet_buf_write(&req_buf, "want %s%s\n", remote_hex, c.buf);
strbuf_release(&c);
} else
@ -407,6 +410,9 @@ static int find_common(struct fetch_pack_args *args,
packet_buf_write(&req_buf, "deepen-not %s", s->string);
}
}
if (server_supports_filtering && args->filter_options.choice)
packet_buf_write(&req_buf, "filter %s",
args->filter_options.filter_spec);
packet_buf_flush(&req_buf);
state_len = req_buf.len;
@ -711,6 +717,7 @@ static int everything_local(struct fetch_pack_args *args,
{
struct ref *ref;
int retval;
int old_save_commit_buffer = save_commit_buffer;
timestamp_t cutoff = 0;
save_commit_buffer = 0;
@ -781,6 +788,9 @@ static int everything_local(struct fetch_pack_args *args,
print_verbose(args, _("already have %s (%s)"), oid_to_hex(remote),
ref->name);
}
save_commit_buffer = old_save_commit_buffer;
return retval;
}
@ -970,6 +980,13 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
else
prefer_ofs_delta = 0;
if (server_supports("filter")) {
server_supports_filtering = 1;
print_verbose(args, _("Server supports filter"));
} else if (args->filter_options.choice) {
warning("filtering not recognized by server, ignoring");
}
if ((agent_feature = server_feature_value("agent", &agent_len))) {
agent_supported = 1;
if (agent_len)

View File

@ -3,6 +3,7 @@
#include "string-list.h"
#include "run-command.h"
#include "list-objects-filter-options.h"
struct oid_array;
@ -12,6 +13,7 @@ struct fetch_pack_args {
int depth;
const char *deepen_since;
const struct string_list *deepen_not;
struct list_objects_filter_options filter_options;
unsigned deepen_relative:1;
unsigned quiet:1;
unsigned keep_pack:1;

View File

@ -21,29 +21,36 @@
* subordinate commands when necessary. We also "intern" the arg for
* the convenience of the current command.
*/
int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
const char *arg)
static int gently_parse_list_objects_filter(
struct list_objects_filter_options *filter_options,
const char *arg,
struct strbuf *errbuf)
{
const char *v0;
if (filter_options->choice)
die(_("multiple object filter types cannot be combined"));
if (filter_options->choice) {
if (errbuf) {
strbuf_init(errbuf, 0);
strbuf_addstr(
errbuf,
_("multiple filter-specs cannot be combined"));
}
return 1;
}
filter_options->filter_spec = strdup(arg);
if (!strcmp(arg, "blob:none")) {
filter_options->choice = LOFC_BLOB_NONE;
return 0;
}
if (skip_prefix(arg, "blob:limit=", &v0)) {
if (!git_parse_ulong(v0, &filter_options->blob_limit_value))
die(_("invalid filter-spec expression '%s'"), arg);
filter_options->choice = LOFC_BLOB_LIMIT;
return 0;
}
} else if (skip_prefix(arg, "blob:limit=", &v0)) {
if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
filter_options->choice = LOFC_BLOB_LIMIT;
return 0;
}
if (skip_prefix(arg, "sparse:oid=", &v0)) {
} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
struct object_context oc;
struct object_id sparse_oid;
@ -57,15 +64,27 @@ int parse_list_objects_filter(struct list_objects_filter_options *filter_options
filter_options->sparse_oid_value = oiddup(&sparse_oid);
filter_options->choice = LOFC_SPARSE_OID;
return 0;
}
if (skip_prefix(arg, "sparse:path=", &v0)) {
} else if (skip_prefix(arg, "sparse:path=", &v0)) {
filter_options->choice = LOFC_SPARSE_PATH;
filter_options->sparse_path_value = strdup(v0);
return 0;
}
die(_("invalid filter-spec expression '%s'"), arg);
if (errbuf) {
strbuf_init(errbuf, 0);
strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
}
memset(filter_options, 0, sizeof(*filter_options));
return 1;
}
int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
const char *arg)
{
struct strbuf buf = STRBUF_INIT;
if (gently_parse_list_objects_filter(filter_options, arg, &buf))
die("%s", buf.buf);
return 0;
}
@ -75,7 +94,7 @@ int opt_parse_list_objects_filter(const struct option *opt,
struct list_objects_filter_options *filter_options = opt->value;
if (unset || !arg) {
list_objects_filter_release(filter_options);
list_objects_filter_set_no_filter(filter_options);
return 0;
}
@ -90,3 +109,44 @@ void list_objects_filter_release(
free(filter_options->sparse_path_value);
memset(filter_options, 0, sizeof(*filter_options));
}
void partial_clone_register(
const char *remote,
const struct list_objects_filter_options *filter_options)
{
/*
* Record the name of the partial clone remote in the
* config and in the global variable -- the latter is
* used throughout to indicate that partial clone is
* enabled and to expect missing objects.
*/
if (repository_format_partial_clone &&
*repository_format_partial_clone &&
strcmp(remote, repository_format_partial_clone))
die(_("cannot change partial clone promisor remote"));
git_config_set("core.repositoryformatversion", "1");
git_config_set("extensions.partialclone", remote);
repository_format_partial_clone = xstrdup(remote);
/*
* Record the initial filter-spec in the config as
* the default for subsequent fetches from this remote.
*/
core_partial_clone_filter_default =
xstrdup(filter_options->filter_spec);
git_config_set("core.partialclonefilter",
core_partial_clone_filter_default);
}
void partial_clone_get_default_filter_spec(
struct list_objects_filter_options *filter_options)
{
/*
* Parse default value, but silently ignore it if it is invalid.
*/
gently_parse_list_objects_filter(filter_options,
core_partial_clone_filter_default,
NULL);
}

View File

@ -30,6 +30,11 @@ struct list_objects_filter_options {
*/
enum list_objects_filter_choice choice;
/*
* Choice is LOFC_DISABLED because "--no-filter" was requested.
*/
unsigned int no_filter : 1;
/*
* Parsed values (fields) from within the filter-spec. These are
* choice-specific; not all values will be defined for any given
@ -58,4 +63,17 @@ int opt_parse_list_objects_filter(const struct option *opt,
void list_objects_filter_release(
struct list_objects_filter_options *filter_options);
static inline void list_objects_filter_set_no_filter(
struct list_objects_filter_options *filter_options)
{
list_objects_filter_release(filter_options);
filter_options->no_filter = 1;
}
void partial_clone_register(
const char *remote,
const struct list_objects_filter_options *filter_options);
void partial_clone_get_default_filter_spec(
struct list_objects_filter_options *filter_options);
#endif /* LIST_OBJECTS_FILTER_OPTIONS_H */

View File

@ -24,6 +24,7 @@ struct options {
char *deepen_since;
struct string_list deepen_not;
struct string_list push_options;
char *filter;
unsigned progress : 1,
check_self_contained_and_connected : 1,
cloning : 1,
@ -165,6 +166,9 @@ static int set_option(const char *name, const char *value)
} else if (!strcmp(name, "no-dependents")) {
options.no_dependents = 1;
return 0;
} else if (!strcmp(name, "filter")) {
options.filter = xstrdup(value);;
return 0;
} else {
return 1 /* unsupported */;
}
@ -834,6 +838,8 @@ static int fetch_git(struct discovery *heads,
argv_array_push(&args, "--from-promisor");
if (options.no_dependents)
argv_array_push(&args, "--no-dependents");
if (options.filter)
argv_array_pushf(&args, "--filter=%s", options.filter);
argv_array_push(&args, url.buf);
for (i = 0; i < nr_heads; i++) {

View File

@ -755,4 +755,67 @@ test_expect_success 'fetching deepen' '
)
'
test_expect_success 'filtering by size' '
rm -rf server client &&
test_create_repo server &&
test_commit -C server one &&
test_config -C server uploadpack.allowfilter 1 &&
test_create_repo client &&
git -C client fetch-pack --filter=blob:limit=0 ../server HEAD &&
# Ensure that object is not inadvertently fetched
test_must_fail git -C client cat-file -e $(git hash-object server/one.t)
'
test_expect_success 'filtering by size has no effect if support for it is not advertised' '
rm -rf server client &&
test_create_repo server &&
test_commit -C server one &&
test_create_repo client &&
git -C client fetch-pack --filter=blob:limit=0 ../server HEAD 2> err &&
# Ensure that object is fetched
git -C client cat-file -e $(git hash-object server/one.t) &&
test_i18ngrep "filtering not recognized by server" err
'
fetch_filter_blob_limit_zero () {
SERVER="$1"
URL="$2"
rm -rf "$SERVER" client &&
test_create_repo "$SERVER" &&
test_commit -C "$SERVER" one &&
test_config -C "$SERVER" uploadpack.allowfilter 1 &&
git clone "$URL" client &&
test_config -C client extensions.partialclone origin &&
test_commit -C "$SERVER" two &&
git -C client fetch --filter=blob:limit=0 origin HEAD:somewhere &&
# Ensure that commit is fetched, but blob is not
test_config -C client extensions.partialclone "arbitrary string" &&
git -C client cat-file -e $(git -C "$SERVER" rev-parse two) &&
test_must_fail git -C client cat-file -e $(git hash-object "$SERVER/two.t")
}
test_expect_success 'fetch with --filter=blob:limit=0' '
fetch_filter_blob_limit_zero server server
'
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd
test_expect_success 'fetch with --filter=blob:limit=0 and HTTP' '
fetch_filter_blob_limit_zero "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server"
'
stop_httpd
test_done

View File

@ -628,4 +628,105 @@ test_expect_success 'clone on case-insensitive fs' '
)
'
partial_clone () {
SERVER="$1" &&
URL="$2" &&
rm -rf "$SERVER" client &&
test_create_repo "$SERVER" &&
test_commit -C "$SERVER" one &&
HASH1=$(git hash-object "$SERVER/one.t") &&
git -C "$SERVER" revert HEAD &&
test_commit -C "$SERVER" two &&
HASH2=$(git hash-object "$SERVER/two.t") &&
test_config -C "$SERVER" uploadpack.allowfilter 1 &&
test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
git clone --filter=blob:limit=0 "$URL" client &&
git -C client fsck &&
# Ensure that unneeded blobs are not inadvertently fetched.
test_config -C client extensions.partialclone "not a remote" &&
test_must_fail git -C client cat-file -e "$HASH1" &&
# But this blob was fetched, because clone performs an initial checkout
git -C client cat-file -e "$HASH2"
}
test_expect_success 'partial clone' '
partial_clone server "file://$(pwd)/server"
'
test_expect_success 'partial clone: warn if server does not support object filtering' '
rm -rf server client &&
test_create_repo server &&
test_commit -C server one &&
git clone --filter=blob:limit=0 "file://$(pwd)/server" client 2> err &&
test_i18ngrep "filtering not recognized by server" err
'
test_expect_success 'batch missing blob request during checkout' '
rm -rf server client &&
test_create_repo server &&
echo a >server/a &&
echo b >server/b &&
git -C server add a b &&
git -C server commit -m x &&
echo aa >server/a &&
echo bb >server/b &&
git -C server add a b &&
git -C server commit -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
git clone --filter=blob:limit=0 "file://$(pwd)/server" client &&
# Ensure that there is only one negotiation by checking that there is
# only "done" line sent. ("done" marks the end of negotiation.)
GIT_TRACE_PACKET="$(pwd)/trace" git -C client checkout HEAD^ &&
grep "git> done" trace >done_lines &&
test_line_count = 1 done_lines
'
test_expect_success 'batch missing blob request does not inadvertently try to fetch gitlinks' '
rm -rf server client &&
test_create_repo repo_for_submodule &&
test_commit -C repo_for_submodule x &&
test_create_repo server &&
echo a >server/a &&
echo b >server/b &&
git -C server add a b &&
git -C server commit -m x &&
echo aa >server/a &&
echo bb >server/b &&
# Also add a gitlink pointing to an arbitrary repository
git -C server submodule add "$(pwd)/repo_for_submodule" c &&
git -C server add a b c &&
git -C server commit -m x &&
test_config -C server uploadpack.allowfilter 1 &&
test_config -C server uploadpack.allowanysha1inwant 1 &&
# Make sure that it succeeds
git clone --filter=blob:limit=0 "file://$(pwd)/server" client
'
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd
test_expect_success 'partial clone using HTTP' '
partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server"
'
stop_httpd
test_done

146
t/t5616-partial-clone.sh Executable file
View File

@ -0,0 +1,146 @@
#!/bin/sh
test_description='git partial clone'
. ./test-lib.sh
# create a normal "src" repo where we can later create new commits.
# expect_1.oids will contain a list of the OIDs of all blobs.
test_expect_success 'setup normal src repo' '
echo "{print \$1}" >print_1.awk &&
echo "{print \$2}" >print_2.awk &&
git init src &&
for n in 1 2 3 4
do
echo "This is file: $n" > src/file.$n.txt
git -C src add file.$n.txt
git -C src commit -m "file $n"
git -C src ls-files -s file.$n.txt >>temp
done &&
awk -f print_2.awk <temp | sort >expect_1.oids &&
test_line_count = 4 expect_1.oids
'
# bare clone "src" giving "srv.bare" for use as our server.
test_expect_success 'setup bare clone for server' '
git clone --bare "file://$(pwd)/src" srv.bare &&
git -C srv.bare config --local uploadpack.allowfilter 1 &&
git -C srv.bare config --local uploadpack.allowanysha1inwant 1
'
# do basic partial clone from "srv.bare"
# confirm we are missing all of the known blobs.
# confirm partial clone was registered in the local config.
test_expect_success 'do partial clone 1' '
git clone --no-checkout --filter=blob:none "file://$(pwd)/srv.bare" pc1 &&
git -C pc1 rev-list HEAD --quiet --objects --missing=print \
| awk -f print_1.awk \
| sed "s/?//" \
| sort >observed.oids &&
test_cmp expect_1.oids observed.oids &&
test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" &&
test "$(git -C pc1 config --local extensions.partialclone)" = "origin" &&
test "$(git -C pc1 config --local core.partialclonefilter)" = "blob:none"
'
# checkout master to force dynamic object fetch of blobs at HEAD.
test_expect_success 'verify checkout with dynamic object fetch' '
git -C pc1 rev-list HEAD --quiet --objects --missing=print >observed &&
test_line_count = 4 observed &&
git -C pc1 checkout master &&
git -C pc1 rev-list HEAD --quiet --objects --missing=print >observed &&
test_line_count = 0 observed
'
# create new commits in "src" repo to establish a blame history on file.1.txt
# and push to "srv.bare".
test_expect_success 'push new commits to server' '
git -C src remote add srv "file://$(pwd)/srv.bare" &&
for x in a b c d e
do
echo "Mod file.1.txt $x" >>src/file.1.txt
git -C src add file.1.txt
git -C src commit -m "mod $x"
done &&
git -C src blame master -- file.1.txt >expect.blame &&
git -C src push -u srv master
'
# (partial) fetch in the partial clone repo from the promisor remote.
# verify that fetch inherited the filter-spec from the config and DOES NOT
# have the new blobs.
test_expect_success 'partial fetch inherits filter settings' '
git -C pc1 fetch origin &&
git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed &&
test_line_count = 5 observed
'
# force dynamic object fetch using diff.
# we should only get 1 new blob (for the file in origin/master).
test_expect_success 'verify diff causes dynamic object fetch' '
git -C pc1 diff master..origin/master -- file.1.txt &&
git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed &&
test_line_count = 4 observed
'
# force full dynamic object fetch of the file's history using blame.
# we should get the intermediate blobs for the file.
test_expect_success 'verify blame causes dynamic object fetch' '
git -C pc1 blame origin/master -- file.1.txt >observed.blame &&
test_cmp expect.blame observed.blame &&
git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed &&
test_line_count = 0 observed
'
# create new commits in "src" repo to establish a history on file.2.txt
# and push to "srv.bare".
test_expect_success 'push new commits to server for file.2.txt' '
for x in a b c d e f
do
echo "Mod file.2.txt $x" >>src/file.2.txt
git -C src add file.2.txt
git -C src commit -m "mod $x"
done &&
git -C src push -u srv master
'
# Do FULL fetch by disabling inherited filter-spec using --no-filter.
# Verify we have all the new blobs.
test_expect_success 'override inherited filter-spec using --no-filter' '
git -C pc1 fetch --no-filter origin &&
git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed &&
test_line_count = 0 observed
'
# create new commits in "src" repo to establish a history on file.3.txt
# and push to "srv.bare".
test_expect_success 'push new commits to server for file.3.txt' '
for x in a b c d e f
do
echo "Mod file.3.txt $x" >>src/file.3.txt
git -C src add file.3.txt
git -C src commit -m "mod $x"
done &&
git -C src push -u srv master
'
# Do a partial fetch and then try to manually fetch the missing objects.
# This can be used as the basis of a pre-command hook to bulk fetch objects
# perhaps combined with a command in dry-run mode.
test_expect_success 'manual prefetch of missing objects' '
git -C pc1 fetch --filter=blob:none origin &&
git -C pc1 rev-list master..origin/master --quiet --objects --missing=print \
| awk -f print_1.awk \
| sed "s/?//" \
| sort >observed.oids &&
test_line_count = 6 observed.oids &&
git -C pc1 fetch-pack --stdin "file://$(pwd)/srv.bare" <observed.oids &&
git -C pc1 rev-list master..origin/master --quiet --objects --missing=print \
| awk -f print_1.awk \
| sed "s/?//" \
| sort >observed.oids &&
test_line_count = 0 observed.oids
'
test_done

View File

@ -672,6 +672,11 @@ static int fetch(struct transport *transport,
if (data->transport_options.update_shallow)
set_helper_option(transport, "update-shallow", "true");
if (data->transport_options.filter_options.choice)
set_helper_option(
transport, "filter",
data->transport_options.filter_options.filter_spec);
if (data->fetch)
return fetch_with_fetch(transport, nr_heads, to_fetch);

View File

@ -167,6 +167,9 @@ static int set_git_option(struct git_transport_options *opts,
} else if (!strcmp(name, TRANS_OPT_NO_DEPENDENTS)) {
opts->no_dependents = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) {
parse_list_objects_filter(&opts->filter_options, value);
return 0;
}
return 1;
}
@ -237,6 +240,7 @@ static int fetch_refs_via_pack(struct transport *transport,
args.update_shallow = data->options.update_shallow;
args.from_promisor = data->options.from_promisor;
args.no_dependents = data->options.no_dependents;
args.filter_options = data->options.filter_options;
if (!data->got_remote_heads) {
connect_setup(transport, 0);

View File

@ -4,6 +4,7 @@
#include "cache.h"
#include "run-command.h"
#include "remote.h"
#include "list-objects-filter-options.h"
struct string_list;
@ -23,6 +24,7 @@ struct git_transport_options {
const char *uploadpack;
const char *receivepack;
struct push_cas_option *cas;
struct list_objects_filter_options filter_options;
};
enum transport_family {
@ -170,6 +172,9 @@ void transport_check_allowed(const char *type);
*/
#define TRANS_OPT_NO_DEPENDENTS "no-dependents"
/* Filter objects for partial clone and fetch */
#define TRANS_OPT_LIST_OBJECTS_FILTER "filter"
/**
* Returns 0 if the option was used, non-zero otherwise. Prints a
* message to stderr if the option is not used.

View File

@ -15,6 +15,7 @@
#include "submodule.h"
#include "submodule-config.h"
#include "fsmonitor.h"
#include "fetch-object.h"
/*
* Error messages expected by scripts out of plumbing commands such as
@ -370,6 +371,27 @@ static int check_updates(struct unpack_trees_options *o)
load_gitmodules_file(index, &state);
enable_delayed_checkout(&state);
if (repository_format_partial_clone && o->update && !o->dry_run) {
/*
* Prefetch the objects that are to be checked out in the loop
* below.
*/
struct oid_array to_fetch = OID_ARRAY_INIT;
int fetch_if_missing_store = fetch_if_missing;
fetch_if_missing = 0;
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
if ((ce->ce_flags & CE_UPDATE) &&
!S_ISGITLINK(ce->ce_mode)) {
if (!has_object_file(&ce->oid))
oid_array_append(&to_fetch, &ce->oid);
}
}
if (to_fetch.nr)
fetch_objects(repository_format_partial_clone,
&to_fetch);
fetch_if_missing = fetch_if_missing_store;
}
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];

View File

@ -10,6 +10,8 @@
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
#include "list-objects-filter.h"
#include "list-objects-filter-options.h"
#include "run-command.h"
#include "connect.h"
#include "sigchain.h"
@ -19,6 +21,7 @@
#include "argv-array.h"
#include "prio-queue.h"
#include "protocol.h"
#include "quote.h"
static const char * const upload_pack_usage[] = {
N_("git upload-pack [<options>] <dir>"),
@ -65,6 +68,10 @@ static int advertise_refs;
static int stateless_rpc;
static const char *pack_objects_hook;
static int filter_capability_requested;
static int filter_advertise;
static struct list_objects_filter_options filter_options;
static void reset_timeout(void)
{
alarm(timeout);
@ -132,6 +139,17 @@ static void create_pack_file(void)
argv_array_push(&pack_objects.args, "--delta-base-offset");
if (use_include_tag)
argv_array_push(&pack_objects.args, "--include-tag");
if (filter_options.filter_spec) {
if (pack_objects.use_shell) {
struct strbuf buf = STRBUF_INIT;
sq_quote_buf(&buf, filter_options.filter_spec);
argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
strbuf_release(&buf);
} else {
argv_array_pushf(&pack_objects.args, "--filter=%s",
filter_options.filter_spec);
}
}
pack_objects.in = -1;
pack_objects.out = -1;
@ -795,6 +813,12 @@ static void receive_needs(void)
deepen_rev_list = 1;
continue;
}
if (skip_prefix(line, "filter ", &arg)) {
if (!filter_capability_requested)
die("git upload-pack: filtering capability not negotiated");
parse_list_objects_filter(&filter_options, arg);
continue;
}
if (!skip_prefix(line, "want ", &arg) ||
get_oid_hex(arg, &oid_buf))
die("git upload-pack: protocol error, "
@ -822,6 +846,8 @@ static void receive_needs(void)
no_progress = 1;
if (parse_feature_request(features, "include-tag"))
use_include_tag = 1;
if (parse_feature_request(features, "filter"))
filter_capability_requested = 1;
o = parse_object(&oid_buf);
if (!o) {
@ -941,7 +967,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
struct strbuf symref_info = STRBUF_INIT;
format_symref_info(&symref_info, cb_data);
packet_write_fmt(1, "%s %s%c%s%s%s%s%s agent=%s\n",
packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n",
oid_to_hex(oid), refname_nons,
0, capabilities,
(allow_unadvertised_object_request & ALLOW_TIP_SHA1) ?
@ -950,6 +976,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
" allow-reachable-sha1-in-want" : "",
stateless_rpc ? " no-done" : "",
symref_info.buf,
filter_advertise ? " filter" : "",
git_user_agent_sanitized());
strbuf_release(&symref_info);
} else {
@ -1028,6 +1055,8 @@ static int upload_pack_config(const char *var, const char *value, void *unused)
} else if (current_config_scope() != CONFIG_SCOPE_REPO) {
if (!strcmp("uploadpack.packobjectshook", var))
return git_config_string(&pack_objects_hook, var, value);
} else if (!strcmp("uploadpack.allowfilter", var)) {
filter_advertise = git_config_bool(var, value);
}
return parse_hide_refs_config(var, value, "uploadpack");
}