Merge branch 'js/filter-options-should-use-plain-int'

Update the protocol message specification to allow only the limited
use of scaled quantities.  This is ensure potential compatibility
issues will not go out of hand.

* js/filter-options-should-use-plain-int:
  filter-options: expand scaled numbers
  tree:<depth>: skip some trees even when collecting omits
  list-objects-filter: teach tree:# how to handle >0
This commit is contained in:
Junio C Hamano 2019-02-05 14:26:09 -08:00
commit 073312b4c7
11 changed files with 330 additions and 45 deletions

View File

@ -730,8 +730,13 @@ specification contained in <path>.
+
The form '--filter=tree:<depth>' omits all blobs and trees whose depth
from the root tree is >= <depth> (minimum depth if an object is located
at multiple depths in the commits traversed). Currently, only <depth>=0
is supported, which omits all blobs and trees.
at multiple depths in the commits traversed). <depth>=0 will not include
any trees or blobs unless included explicitly in the command-line (or
standard input when --stdin is used). <depth>=1 will include only the
tree and blobs which are referenced directly by a commit reachable from
<commit> or an explicitly-given object. <depth>=2 is like <depth>=1
while also including trees and blobs one more level removed from an
explicitly-given commit or tree.
--no-filter::
Turn off any previous `--filter=` argument.

View File

@ -296,7 +296,13 @@ included in the client's request:
Request that various objects from the packfile be omitted
using one of several filtering techniques. These are intended
for use with partial clone and partial fetch operations. See
`rev-list` for possible "filter-spec" values.
`rev-list` for possible "filter-spec" values. When communicating
with other processes, senders SHOULD translate scaled integers
(e.g. "1k") into a fully-expanded form (e.g. "1024") to aid
interoperability with older receivers that may not understand
newly-invented scaling suffixes. However, receivers SHOULD
accept the following suffixes: 'k', 'm', and 'g' for 1024,
1048576, and 1073741824, respectively.
If the 'ref-in-want' feature is advertised, the following argument can
be included in the client's request as well as the potential addition of

View File

@ -1136,9 +1136,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
option_upload_pack);
if (filter_options.choice) {
struct strbuf expanded_filter_spec = STRBUF_INIT;
expand_list_objects_filter_spec(&filter_options,
&expanded_filter_spec);
transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
filter_options.filter_spec);
expanded_filter_spec.buf);
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
strbuf_release(&expanded_filter_spec);
}
if (transport->smart_options && !deepen && !filter_options.choice)

View File

@ -1165,6 +1165,7 @@ static void add_negotiation_tips(struct git_transport_options *smart_options)
static struct transport *prepare_transport(struct remote *remote, int deepen)
{
struct transport *transport;
transport = transport_get(remote, NULL);
transport_set_verbosity(transport, verbosity, progress);
transport->family = family;
@ -1184,9 +1185,13 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
if (update_shallow)
set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
if (filter_options.choice) {
struct strbuf expanded_filter_spec = STRBUF_INIT;
expand_list_objects_filter_spec(&filter_options,
&expanded_filter_spec);
set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
filter_options.filter_spec);
expanded_filter_spec.buf);
set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
strbuf_release(&expanded_filter_spec);
}
if (negotiation_tip.nr) {
if (transport->smart_options)

View File

@ -329,9 +329,14 @@ static int find_common(struct fetch_negotiator *negotiator,
packet_buf_write(&req_buf, "deepen-not %s", s->string);
}
}
if (server_supports_filtering && args->filter_options.choice)
if (server_supports_filtering && args->filter_options.choice) {
struct strbuf expanded_filter_spec = STRBUF_INIT;
expand_list_objects_filter_spec(&args->filter_options,
&expanded_filter_spec);
packet_buf_write(&req_buf, "filter %s",
args->filter_options.filter_spec);
expanded_filter_spec.buf);
strbuf_release(&expanded_filter_spec);
}
packet_buf_flush(&req_buf);
state_len = req_buf.len;
@ -1122,9 +1127,13 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
/* Add filter */
if (server_supports_feature("fetch", "filter", 0) &&
args->filter_options.choice) {
struct strbuf expanded_filter_spec = STRBUF_INIT;
print_verbose(args, _("Server supports filter"));
expand_list_objects_filter_spec(&args->filter_options,
&expanded_filter_spec);
packet_buf_write(&req_buf, "filter %s",
args->filter_options.filter_spec);
expanded_filter_spec.buf);
strbuf_release(&expanded_filter_spec);
} else if (args->filter_options.choice) {
warning("filtering not recognized by server, ignoring");
}

View File

@ -18,8 +18,9 @@
* See Documentation/rev-list-options.txt for allowed values for <arg>.
*
* Capture the given arg as the "filter_spec". This can be forwarded to
* subordinate commands when necessary. We also "intern" the arg for
* the convenience of the current command.
* subordinate commands when necessary (although it's better to pass it through
* expand_list_objects_filter_spec() first). We also "intern" the arg for the
* convenience of the current command.
*/
static int gently_parse_list_objects_filter(
struct list_objects_filter_options *filter_options,
@ -50,16 +51,15 @@ static int gently_parse_list_objects_filter(
}
} else if (skip_prefix(arg, "tree:", &v0)) {
unsigned long depth;
if (!git_parse_ulong(v0, &depth) || depth != 0) {
if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
if (errbuf) {
strbuf_addstr(
errbuf,
_("only 'tree:0' is supported"));
_("expected 'tree:<depth>'"));
}
return 1;
}
filter_options->choice = LOFC_TREE_NONE;
filter_options->choice = LOFC_TREE_DEPTH;
return 0;
} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
@ -112,6 +112,21 @@ int opt_parse_list_objects_filter(const struct option *opt,
return parse_list_objects_filter(filter_options, arg);
}
void expand_list_objects_filter_spec(
const struct list_objects_filter_options *filter,
struct strbuf *expanded_spec)
{
strbuf_init(expanded_spec, strlen(filter->filter_spec));
if (filter->choice == LOFC_BLOB_LIMIT)
strbuf_addf(expanded_spec, "blob:limit=%lu",
filter->blob_limit_value);
else if (filter->choice == LOFC_TREE_DEPTH)
strbuf_addf(expanded_spec, "tree:%lu",
filter->tree_exclude_depth);
else
strbuf_addstr(expanded_spec, filter->filter_spec);
}
void list_objects_filter_release(
struct list_objects_filter_options *filter_options)
{

View File

@ -2,6 +2,7 @@
#define LIST_OBJECTS_FILTER_OPTIONS_H
#include "parse-options.h"
#include "strbuf.h"
/*
* The list of defined filters for list-objects.
@ -10,7 +11,7 @@ enum list_objects_filter_choice {
LOFC_DISABLED = 0,
LOFC_BLOB_NONE,
LOFC_BLOB_LIMIT,
LOFC_TREE_NONE,
LOFC_TREE_DEPTH,
LOFC_SPARSE_OID,
LOFC_SPARSE_PATH,
LOFC__COUNT /* must be last */
@ -20,8 +21,9 @@ struct list_objects_filter_options {
/*
* 'filter_spec' is the raw argument value given on the command line
* or protocol request. (The part after the "--keyword=".) For
* commands that launch filtering sub-processes, this value should be
* passed to them as received by the current process.
* commands that launch filtering sub-processes, or for communication
* over the network, don't use this value; use the result of
* expand_list_objects_filter_spec() instead.
*/
char *filter_spec;
@ -44,6 +46,7 @@ struct list_objects_filter_options {
struct object_id *sparse_oid_value;
char *sparse_path_value;
unsigned long blob_limit_value;
unsigned long tree_exclude_depth;
};
/* Normalized command line arguments */
@ -61,6 +64,17 @@ int opt_parse_list_objects_filter(const struct option *opt,
N_("object filtering"), 0, \
opt_parse_list_objects_filter }
/*
* Translates abbreviated numbers in the filter's filter_spec into their
* fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024").
*
* This form should be used instead of the raw filter_spec field when
* communicating with a remote process or subprocess.
*/
void expand_list_objects_filter_spec(
const struct list_objects_filter_options *filter,
struct strbuf *expanded_spec);
void list_objects_filter_release(
struct list_objects_filter_options *filter_options);

View File

@ -10,6 +10,7 @@
#include "list-objects.h"
#include "list-objects-filter.h"
#include "list-objects-filter-options.h"
#include "oidmap.h"
#include "oidset.h"
#include "object-store.h"
@ -84,11 +85,44 @@ static void *filter_blobs_none__init(
* A filter for list-objects to omit ALL trees and blobs from the traversal.
* Can OPTIONALLY collect a list of the omitted OIDs.
*/
struct filter_trees_none_data {
struct filter_trees_depth_data {
struct oidset *omits;
/*
* Maps trees to the minimum depth at which they were seen. It is not
* necessary to re-traverse a tree at deeper or equal depths than it has
* already been traversed.
*
* We can't use LOFR_MARK_SEEN for tree objects since this will prevent
* it from being traversed at shallower depths.
*/
struct oidmap seen_at_depth;
unsigned long exclude_depth;
unsigned long current_depth;
};
static enum list_objects_filter_result filter_trees_none(
struct seen_map_entry {
struct oidmap_entry base;
size_t depth;
};
/* Returns 1 if the oid was in the omits set before it was invoked. */
static int filter_trees_update_omits(
struct object *obj,
struct filter_trees_depth_data *filter_data,
int include_it)
{
if (!filter_data->omits)
return 0;
if (include_it)
return oidset_remove(filter_data->omits, &obj->oid);
else
return oidset_insert(filter_data->omits, &obj->oid);
}
static enum list_objects_filter_result filter_trees_depth(
struct repository *r,
enum list_objects_filter_situation filter_situation,
struct object *obj,
@ -96,43 +130,91 @@ static enum list_objects_filter_result filter_trees_none(
const char *filename,
void *filter_data_)
{
struct filter_trees_none_data *filter_data = filter_data_;
struct filter_trees_depth_data *filter_data = filter_data_;
struct seen_map_entry *seen_info;
int include_it = filter_data->current_depth <
filter_data->exclude_depth;
int filter_res;
int already_seen;
/*
* Note that we do not use _MARK_SEEN in order to allow re-traversal in
* case we encounter a tree or blob again at a shallower depth.
*/
switch (filter_situation) {
default:
BUG("unknown filter_situation: %d", filter_situation);
case LOFS_BEGIN_TREE:
case LOFS_BLOB:
if (filter_data->omits) {
oidset_insert(filter_data->omits, &obj->oid);
/* _MARK_SEEN but not _DO_SHOW (hard omit) */
return LOFR_MARK_SEEN;
} else {
/*
* Not collecting omits so no need to to traverse tree.
*/
return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
}
case LOFS_END_TREE:
assert(obj->type == OBJ_TREE);
filter_data->current_depth--;
return LOFR_ZERO;
case LOFS_BLOB:
filter_trees_update_omits(obj, filter_data, include_it);
return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
case LOFS_BEGIN_TREE:
seen_info = oidmap_get(
&filter_data->seen_at_depth, &obj->oid);
if (!seen_info) {
seen_info = xcalloc(1, sizeof(*seen_info));
oidcpy(&seen_info->base.oid, &obj->oid);
seen_info->depth = filter_data->current_depth;
oidmap_put(&filter_data->seen_at_depth, seen_info);
already_seen = 0;
} else {
already_seen =
filter_data->current_depth >= seen_info->depth;
}
if (already_seen) {
filter_res = LOFR_SKIP_TREE;
} else {
int been_omitted = filter_trees_update_omits(
obj, filter_data, include_it);
seen_info->depth = filter_data->current_depth;
if (include_it)
filter_res = LOFR_DO_SHOW;
else if (filter_data->omits && !been_omitted)
/*
* Must update omit information of children
* recursively; they have not been omitted yet.
*/
filter_res = LOFR_ZERO;
else
filter_res = LOFR_SKIP_TREE;
}
filter_data->current_depth++;
return filter_res;
}
}
static void* filter_trees_none__init(
static void filter_trees_free(void *filter_data) {
struct filter_trees_depth_data *d = filter_data;
if (!d)
return;
oidmap_free(&d->seen_at_depth, 1);
free(d);
}
static void *filter_trees_depth__init(
struct oidset *omitted,
struct list_objects_filter_options *filter_options,
filter_object_fn *filter_fn,
filter_free_fn *filter_free_fn)
{
struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
d->omits = omitted;
oidmap_init(&d->seen_at_depth, 0);
d->exclude_depth = filter_options->tree_exclude_depth;
d->current_depth = 0;
*filter_fn = filter_trees_none;
*filter_free_fn = free;
*filter_fn = filter_trees_depth;
*filter_free_fn = filter_trees_free;
return d;
}
@ -430,7 +512,7 @@ static filter_init_fn s_filters[] = {
NULL,
filter_blobs_none__init,
filter_blobs_limit__init,
filter_trees_none__init,
filter_trees_depth__init,
filter_sparse_oid__init,
filter_sparse_path__init,
};

View File

@ -283,7 +283,7 @@ test_expect_success 'verify tree:0 includes trees in "filtered" output' '
# Make sure tree:0 does not iterate through any trees.
test_expect_success 'filter a GIANT tree through tree:0' '
test_expect_success 'verify skipping tree iteration when not collecting omits' '
GIT_TRACE=1 git -C r3 rev-list \
--objects --filter=tree:0 HEAD 2>filter_trace &&
grep "Skipping contents of tree [.][.][.]" filter_trace >actual &&
@ -294,6 +294,126 @@ test_expect_success 'filter a GIANT tree through tree:0' '
! grep "Skipping contents of tree [^.]" filter_trace
'
# Test tree:# filters.
expect_has () {
commit=$1 &&
name=$2 &&
hash=$(git -C r3 rev-parse $commit:$name) &&
grep "^$hash $name$" actual
}
test_expect_success 'verify tree:1 includes root trees' '
git -C r3 rev-list --objects --filter=tree:1 HEAD >actual &&
# We should get two root directories and two commits.
expect_has HEAD "" &&
expect_has HEAD~1 "" &&
test_line_count = 4 actual
'
test_expect_success 'verify tree:2 includes root trees and immediate children' '
git -C r3 rev-list --objects --filter=tree:2 HEAD >actual &&
expect_has HEAD "" &&
expect_has HEAD~1 "" &&
expect_has HEAD dir1 &&
expect_has HEAD pattern &&
expect_has HEAD sparse1 &&
expect_has HEAD sparse2 &&
# There are also 2 commit objects
test_line_count = 8 actual
'
test_expect_success 'verify tree:3 includes everything expected' '
git -C r3 rev-list --objects --filter=tree:3 HEAD >actual &&
expect_has HEAD "" &&
expect_has HEAD~1 "" &&
expect_has HEAD dir1 &&
expect_has HEAD dir1/sparse1 &&
expect_has HEAD dir1/sparse2 &&
expect_has HEAD pattern &&
expect_has HEAD sparse1 &&
expect_has HEAD sparse2 &&
# There are also 2 commit objects
test_line_count = 10 actual
'
# Test provisional omit collection logic with a repo that has objects appearing
# at multiple depths - first deeper than the filter's threshold, then shallow.
test_expect_success 'setup r4' '
git init r4 &&
echo foo > r4/foo &&
mkdir r4/subdir &&
echo bar > r4/subdir/bar &&
mkdir r4/filt &&
cp -r r4/foo r4/subdir r4/filt &&
git -C r4 add foo subdir filt &&
git -C r4 commit -m "commit msg"
'
expect_has_with_different_name () {
repo=$1 &&
name=$2 &&
hash=$(git -C $repo rev-parse HEAD:$name) &&
! grep "^$hash $name$" actual &&
grep "^$hash " actual &&
! grep "~$hash" actual
}
test_expect_success 'test tree:# filter provisional omit for blob and tree' '
git -C r4 rev-list --objects --filter-print-omitted --filter=tree:2 \
HEAD >actual &&
expect_has_with_different_name r4 filt/foo &&
expect_has_with_different_name r4 filt/subdir
'
test_expect_success 'verify skipping tree iteration when collecting omits' '
GIT_TRACE=1 git -C r4 rev-list --filter-print-omitted \
--objects --filter=tree:0 HEAD 2>filter_trace &&
grep "^Skipping contents of tree " filter_trace >actual &&
echo "Skipping contents of tree subdir/..." >expect &&
test_cmp expect actual
'
# Test tree:<depth> where a tree is iterated to twice - once where a subentry is
# too deep to be included, and again where the blob inside it is shallow enough
# to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we
# can't use it because a tree can be iterated over again at a lower depth).
test_expect_success 'tree:<depth> where we iterate over tree at two levels' '
git init r5 &&
mkdir -p r5/a/subdir/b &&
echo foo > r5/a/subdir/b/foo &&
mkdir -p r5/subdir/b &&
echo foo > r5/subdir/b/foo &&
git -C r5 add a subdir &&
git -C r5 commit -m "commit msg" &&
git -C r5 rev-list --objects --filter=tree:4 HEAD >actual &&
expect_has_with_different_name r5 a/subdir/b/foo
'
test_expect_success 'tree:<depth> which filters out blob but given as arg' '
blob_hash=$(git -C r4 rev-parse HEAD:subdir/bar) &&
git -C r4 rev-list --objects --filter=tree:1 HEAD $blob_hash >actual &&
grep ^$blob_hash actual
'
# Delete some loose objects and use rev-list, but WITHOUT any filtering.
# This models previously omitted objects that we did not receive.
@ -324,4 +444,21 @@ test_expect_success 'rev-list W/ missing=allow-any' '
git -C r1 rev-list --quiet --missing=allow-any --objects HEAD
'
# Test expansion of filter specs.
test_expect_success 'expand blob limit in protocol' '
git -C r2 config --local uploadpack.allowfilter 1 &&
GIT_TRACE_PACKET="$(pwd)/trace" git -c protocol.version=2 clone \
--filter=blob:limit=1k "file://$(pwd)/r2" limit &&
! grep "blob:limit=1k" trace &&
grep "blob:limit=1024" trace
'
test_expect_success 'expand tree depth limit in protocol' '
GIT_TRACE_PACKET="$(pwd)/tree_trace" git -c protocol.version=2 clone \
--filter=tree:0k "file://$(pwd)/r2" tree &&
! grep "tree:0k" tree_trace &&
grep "tree:0" tree_trace
'
test_done

View File

@ -679,10 +679,15 @@ static int fetch(struct transport *transport,
if (data->transport_options.update_shallow)
set_helper_option(transport, "update-shallow", "true");
if (data->transport_options.filter_options.choice)
set_helper_option(
transport, "filter",
data->transport_options.filter_options.filter_spec);
if (data->transport_options.filter_options.choice) {
struct strbuf expanded_filter_spec = STRBUF_INIT;
expand_list_objects_filter_spec(
&data->transport_options.filter_options,
&expanded_filter_spec);
set_helper_option(transport, "filter",
expanded_filter_spec.buf);
strbuf_release(&expanded_filter_spec);
}
if (data->transport_options.negotiation_tips)
warning("Ignoring --negotiation-tip because the protocol does not support it.");

View File

@ -139,14 +139,17 @@ static void create_pack_file(const struct object_array *have_obj,
if (use_include_tag)
argv_array_push(&pack_objects.args, "--include-tag");
if (filter_options.filter_spec) {
struct strbuf expanded_filter_spec = STRBUF_INIT;
expand_list_objects_filter_spec(&filter_options,
&expanded_filter_spec);
if (pack_objects.use_shell) {
struct strbuf buf = STRBUF_INIT;
sq_quote_buf(&buf, filter_options.filter_spec);
sq_quote_buf(&buf, expanded_filter_spec.buf);
argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
strbuf_release(&buf);
} else {
argv_array_pushf(&pack_objects.args, "--filter=%s",
filter_options.filter_spec);
expanded_filter_spec.buf);
}
}