list-objects-filter: teach tree:# how to handle >0
Implement positive values for <depth> in the tree:<depth> filter. The exact semantics are described in Documentation/rev-list-options.txt. The long-term goal at the end of this is to allow a partial clone to eagerly fetch an entire directory of files by fetching a tree and specifying <depth>=1. This, for instance, would make a build operation fast and convenient. It is fast because the partial clone does not need to fetch each file individually, and convenient because the user does not need to supply a sparse-checkout specification. Another way of considering this feature is as a way to reduce round-trips, since the client can get any number of levels of directories in a single request, rather than wait for each level of tree objects to come back, whose entries are used to construct a new request. Signed-off-by: Matthew DeVore <matvore@google.com> Reviewed-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
0aa9d8aa6c
commit
c813a7c35f
@ -734,8 +734,13 @@ specification contained in <path>.
|
|||||||
+
|
+
|
||||||
The form '--filter=tree:<depth>' omits all blobs and trees whose depth
|
The form '--filter=tree:<depth>' omits all blobs and trees whose depth
|
||||||
from the root tree is >= <depth> (minimum depth if an object is located
|
from the root tree is >= <depth> (minimum depth if an object is located
|
||||||
at multiple depths in the commits traversed). Currently, only <depth>=0
|
at multiple depths in the commits traversed). <depth>=0 will not include
|
||||||
is supported, which omits all blobs and trees.
|
any trees or blobs unless included explicitly in the command-line (or
|
||||||
|
standard input when --stdin is used). <depth>=1 will include only the
|
||||||
|
tree and blobs which are referenced directly by a commit reachable from
|
||||||
|
<commit> or an explicitly-given object. <depth>=2 is like <depth>=1
|
||||||
|
while also including trees and blobs one more level removed from an
|
||||||
|
explicitly-given commit or tree.
|
||||||
|
|
||||||
--no-filter::
|
--no-filter::
|
||||||
Turn off any previous `--filter=` argument.
|
Turn off any previous `--filter=` argument.
|
||||||
|
@ -50,16 +50,15 @@ static int gently_parse_list_objects_filter(
|
|||||||
}
|
}
|
||||||
|
|
||||||
} else if (skip_prefix(arg, "tree:", &v0)) {
|
} else if (skip_prefix(arg, "tree:", &v0)) {
|
||||||
unsigned long depth;
|
if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
|
||||||
if (!git_parse_ulong(v0, &depth) || depth != 0) {
|
|
||||||
if (errbuf) {
|
if (errbuf) {
|
||||||
strbuf_addstr(
|
strbuf_addstr(
|
||||||
errbuf,
|
errbuf,
|
||||||
_("only 'tree:0' is supported"));
|
_("expected 'tree:<depth>'"));
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
filter_options->choice = LOFC_TREE_NONE;
|
filter_options->choice = LOFC_TREE_DEPTH;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
|
} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
|
||||||
|
@ -10,7 +10,7 @@ enum list_objects_filter_choice {
|
|||||||
LOFC_DISABLED = 0,
|
LOFC_DISABLED = 0,
|
||||||
LOFC_BLOB_NONE,
|
LOFC_BLOB_NONE,
|
||||||
LOFC_BLOB_LIMIT,
|
LOFC_BLOB_LIMIT,
|
||||||
LOFC_TREE_NONE,
|
LOFC_TREE_DEPTH,
|
||||||
LOFC_SPARSE_OID,
|
LOFC_SPARSE_OID,
|
||||||
LOFC_SPARSE_PATH,
|
LOFC_SPARSE_PATH,
|
||||||
LOFC__COUNT /* must be last */
|
LOFC__COUNT /* must be last */
|
||||||
@ -44,6 +44,7 @@ struct list_objects_filter_options {
|
|||||||
struct object_id *sparse_oid_value;
|
struct object_id *sparse_oid_value;
|
||||||
char *sparse_path_value;
|
char *sparse_path_value;
|
||||||
unsigned long blob_limit_value;
|
unsigned long blob_limit_value;
|
||||||
|
unsigned long tree_exclude_depth;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Normalized command line arguments */
|
/* Normalized command line arguments */
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "list-objects.h"
|
#include "list-objects.h"
|
||||||
#include "list-objects-filter.h"
|
#include "list-objects-filter.h"
|
||||||
#include "list-objects-filter-options.h"
|
#include "list-objects-filter-options.h"
|
||||||
|
#include "oidmap.h"
|
||||||
#include "oidset.h"
|
#include "oidset.h"
|
||||||
#include "object-store.h"
|
#include "object-store.h"
|
||||||
|
|
||||||
@ -84,11 +85,43 @@ static void *filter_blobs_none__init(
|
|||||||
* A filter for list-objects to omit ALL trees and blobs from the traversal.
|
* A filter for list-objects to omit ALL trees and blobs from the traversal.
|
||||||
* Can OPTIONALLY collect a list of the omitted OIDs.
|
* Can OPTIONALLY collect a list of the omitted OIDs.
|
||||||
*/
|
*/
|
||||||
struct filter_trees_none_data {
|
struct filter_trees_depth_data {
|
||||||
struct oidset *omits;
|
struct oidset *omits;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maps trees to the minimum depth at which they were seen. It is not
|
||||||
|
* necessary to re-traverse a tree at deeper or equal depths than it has
|
||||||
|
* already been traversed.
|
||||||
|
*
|
||||||
|
* We can't use LOFR_MARK_SEEN for tree objects since this will prevent
|
||||||
|
* it from being traversed at shallower depths.
|
||||||
|
*/
|
||||||
|
struct oidmap seen_at_depth;
|
||||||
|
|
||||||
|
unsigned long exclude_depth;
|
||||||
|
unsigned long current_depth;
|
||||||
};
|
};
|
||||||
|
|
||||||
static enum list_objects_filter_result filter_trees_none(
|
struct seen_map_entry {
|
||||||
|
struct oidmap_entry base;
|
||||||
|
size_t depth;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void filter_trees_update_omits(
|
||||||
|
struct object *obj,
|
||||||
|
struct filter_trees_depth_data *filter_data,
|
||||||
|
int include_it)
|
||||||
|
{
|
||||||
|
if (!filter_data->omits)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (include_it)
|
||||||
|
oidset_remove(filter_data->omits, &obj->oid);
|
||||||
|
else
|
||||||
|
oidset_insert(filter_data->omits, &obj->oid);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum list_objects_filter_result filter_trees_depth(
|
||||||
struct repository *r,
|
struct repository *r,
|
||||||
enum list_objects_filter_situation filter_situation,
|
enum list_objects_filter_situation filter_situation,
|
||||||
struct object *obj,
|
struct object *obj,
|
||||||
@ -96,43 +129,86 @@ static enum list_objects_filter_result filter_trees_none(
|
|||||||
const char *filename,
|
const char *filename,
|
||||||
void *filter_data_)
|
void *filter_data_)
|
||||||
{
|
{
|
||||||
struct filter_trees_none_data *filter_data = filter_data_;
|
struct filter_trees_depth_data *filter_data = filter_data_;
|
||||||
|
struct seen_map_entry *seen_info;
|
||||||
|
int include_it = filter_data->current_depth <
|
||||||
|
filter_data->exclude_depth;
|
||||||
|
int filter_res;
|
||||||
|
int already_seen;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that we do not use _MARK_SEEN in order to allow re-traversal in
|
||||||
|
* case we encounter a tree or blob again at a shallower depth.
|
||||||
|
*/
|
||||||
|
|
||||||
switch (filter_situation) {
|
switch (filter_situation) {
|
||||||
default:
|
default:
|
||||||
BUG("unknown filter_situation: %d", filter_situation);
|
BUG("unknown filter_situation: %d", filter_situation);
|
||||||
|
|
||||||
case LOFS_BEGIN_TREE:
|
|
||||||
case LOFS_BLOB:
|
|
||||||
if (filter_data->omits) {
|
|
||||||
oidset_insert(filter_data->omits, &obj->oid);
|
|
||||||
/* _MARK_SEEN but not _DO_SHOW (hard omit) */
|
|
||||||
return LOFR_MARK_SEEN;
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Not collecting omits so no need to to traverse tree.
|
|
||||||
*/
|
|
||||||
return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
|
|
||||||
}
|
|
||||||
|
|
||||||
case LOFS_END_TREE:
|
case LOFS_END_TREE:
|
||||||
assert(obj->type == OBJ_TREE);
|
assert(obj->type == OBJ_TREE);
|
||||||
|
filter_data->current_depth--;
|
||||||
return LOFR_ZERO;
|
return LOFR_ZERO;
|
||||||
|
|
||||||
|
case LOFS_BLOB:
|
||||||
|
filter_trees_update_omits(obj, filter_data, include_it);
|
||||||
|
return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
|
||||||
|
|
||||||
|
case LOFS_BEGIN_TREE:
|
||||||
|
seen_info = oidmap_get(
|
||||||
|
&filter_data->seen_at_depth, &obj->oid);
|
||||||
|
if (!seen_info) {
|
||||||
|
seen_info = xcalloc(1, sizeof(*seen_info));
|
||||||
|
oidcpy(&seen_info->base.oid, &obj->oid);
|
||||||
|
seen_info->depth = filter_data->current_depth;
|
||||||
|
oidmap_put(&filter_data->seen_at_depth, seen_info);
|
||||||
|
already_seen = 0;
|
||||||
|
} else {
|
||||||
|
already_seen =
|
||||||
|
filter_data->current_depth >= seen_info->depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (already_seen) {
|
||||||
|
filter_res = LOFR_SKIP_TREE;
|
||||||
|
} else {
|
||||||
|
seen_info->depth = filter_data->current_depth;
|
||||||
|
filter_trees_update_omits(obj, filter_data, include_it);
|
||||||
|
|
||||||
|
if (include_it)
|
||||||
|
filter_res = LOFR_DO_SHOW;
|
||||||
|
else if (filter_data->omits)
|
||||||
|
filter_res = LOFR_ZERO;
|
||||||
|
else
|
||||||
|
filter_res = LOFR_SKIP_TREE;
|
||||||
|
}
|
||||||
|
|
||||||
|
filter_data->current_depth++;
|
||||||
|
return filter_res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void* filter_trees_none__init(
|
static void filter_trees_free(void *filter_data) {
|
||||||
|
struct filter_trees_depth_data *d = filter_data;
|
||||||
|
if (!d)
|
||||||
|
return;
|
||||||
|
oidmap_free(&d->seen_at_depth, 1);
|
||||||
|
free(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *filter_trees_depth__init(
|
||||||
struct oidset *omitted,
|
struct oidset *omitted,
|
||||||
struct list_objects_filter_options *filter_options,
|
struct list_objects_filter_options *filter_options,
|
||||||
filter_object_fn *filter_fn,
|
filter_object_fn *filter_fn,
|
||||||
filter_free_fn *filter_free_fn)
|
filter_free_fn *filter_free_fn)
|
||||||
{
|
{
|
||||||
struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
|
struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
|
||||||
d->omits = omitted;
|
d->omits = omitted;
|
||||||
|
oidmap_init(&d->seen_at_depth, 0);
|
||||||
|
d->exclude_depth = filter_options->tree_exclude_depth;
|
||||||
|
d->current_depth = 0;
|
||||||
|
|
||||||
*filter_fn = filter_trees_none;
|
*filter_fn = filter_trees_depth;
|
||||||
*filter_free_fn = free;
|
*filter_free_fn = filter_trees_free;
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -430,7 +506,7 @@ static filter_init_fn s_filters[] = {
|
|||||||
NULL,
|
NULL,
|
||||||
filter_blobs_none__init,
|
filter_blobs_none__init,
|
||||||
filter_blobs_limit__init,
|
filter_blobs_limit__init,
|
||||||
filter_trees_none__init,
|
filter_trees_depth__init,
|
||||||
filter_sparse_oid__init,
|
filter_sparse_oid__init,
|
||||||
filter_sparse_path__init,
|
filter_sparse_path__init,
|
||||||
};
|
};
|
||||||
|
@ -294,6 +294,117 @@ test_expect_success 'filter a GIANT tree through tree:0' '
|
|||||||
! grep "Skipping contents of tree [^.]" filter_trace
|
! grep "Skipping contents of tree [^.]" filter_trace
|
||||||
'
|
'
|
||||||
|
|
||||||
|
# Test tree:# filters.
|
||||||
|
|
||||||
|
expect_has () {
|
||||||
|
commit=$1 &&
|
||||||
|
name=$2 &&
|
||||||
|
|
||||||
|
hash=$(git -C r3 rev-parse $commit:$name) &&
|
||||||
|
grep "^$hash $name$" actual
|
||||||
|
}
|
||||||
|
|
||||||
|
test_expect_success 'verify tree:1 includes root trees' '
|
||||||
|
git -C r3 rev-list --objects --filter=tree:1 HEAD >actual &&
|
||||||
|
|
||||||
|
# We should get two root directories and two commits.
|
||||||
|
expect_has HEAD "" &&
|
||||||
|
expect_has HEAD~1 "" &&
|
||||||
|
test_line_count = 4 actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'verify tree:2 includes root trees and immediate children' '
|
||||||
|
git -C r3 rev-list --objects --filter=tree:2 HEAD >actual &&
|
||||||
|
|
||||||
|
expect_has HEAD "" &&
|
||||||
|
expect_has HEAD~1 "" &&
|
||||||
|
expect_has HEAD dir1 &&
|
||||||
|
expect_has HEAD pattern &&
|
||||||
|
expect_has HEAD sparse1 &&
|
||||||
|
expect_has HEAD sparse2 &&
|
||||||
|
|
||||||
|
# There are also 2 commit objects
|
||||||
|
test_line_count = 8 actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'verify tree:3 includes everything expected' '
|
||||||
|
git -C r3 rev-list --objects --filter=tree:3 HEAD >actual &&
|
||||||
|
|
||||||
|
expect_has HEAD "" &&
|
||||||
|
expect_has HEAD~1 "" &&
|
||||||
|
expect_has HEAD dir1 &&
|
||||||
|
expect_has HEAD dir1/sparse1 &&
|
||||||
|
expect_has HEAD dir1/sparse2 &&
|
||||||
|
expect_has HEAD pattern &&
|
||||||
|
expect_has HEAD sparse1 &&
|
||||||
|
expect_has HEAD sparse2 &&
|
||||||
|
|
||||||
|
# There are also 2 commit objects
|
||||||
|
test_line_count = 10 actual
|
||||||
|
'
|
||||||
|
|
||||||
|
# Test provisional omit collection logic with a repo that has objects appearing
|
||||||
|
# at multiple depths - first deeper than the filter's threshold, then shallow.
|
||||||
|
|
||||||
|
test_expect_success 'setup r4' '
|
||||||
|
git init r4 &&
|
||||||
|
|
||||||
|
echo foo > r4/foo &&
|
||||||
|
mkdir r4/subdir &&
|
||||||
|
echo bar > r4/subdir/bar &&
|
||||||
|
|
||||||
|
mkdir r4/filt &&
|
||||||
|
cp -r r4/foo r4/subdir r4/filt &&
|
||||||
|
|
||||||
|
git -C r4 add foo subdir filt &&
|
||||||
|
git -C r4 commit -m "commit msg"
|
||||||
|
'
|
||||||
|
|
||||||
|
expect_has_with_different_name () {
|
||||||
|
repo=$1 &&
|
||||||
|
name=$2 &&
|
||||||
|
|
||||||
|
hash=$(git -C $repo rev-parse HEAD:$name) &&
|
||||||
|
! grep "^$hash $name$" actual &&
|
||||||
|
grep "^$hash " actual &&
|
||||||
|
! grep "~$hash" actual
|
||||||
|
}
|
||||||
|
|
||||||
|
test_expect_success 'test tree:# filter provisional omit for blob and tree' '
|
||||||
|
git -C r4 rev-list --objects --filter-print-omitted --filter=tree:2 \
|
||||||
|
HEAD >actual &&
|
||||||
|
expect_has_with_different_name r4 filt/foo &&
|
||||||
|
expect_has_with_different_name r4 filt/subdir
|
||||||
|
'
|
||||||
|
|
||||||
|
# Test tree:<depth> where a tree is iterated to twice - once where a subentry is
|
||||||
|
# too deep to be included, and again where the blob inside it is shallow enough
|
||||||
|
# to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we
|
||||||
|
# can't use it because a tree can be iterated over again at a lower depth).
|
||||||
|
|
||||||
|
test_expect_success 'tree:<depth> where we iterate over tree at two levels' '
|
||||||
|
git init r5 &&
|
||||||
|
|
||||||
|
mkdir -p r5/a/subdir/b &&
|
||||||
|
echo foo > r5/a/subdir/b/foo &&
|
||||||
|
|
||||||
|
mkdir -p r5/subdir/b &&
|
||||||
|
echo foo > r5/subdir/b/foo &&
|
||||||
|
|
||||||
|
git -C r5 add a subdir &&
|
||||||
|
git -C r5 commit -m "commit msg" &&
|
||||||
|
|
||||||
|
git -C r5 rev-list --objects --filter=tree:4 HEAD >actual &&
|
||||||
|
expect_has_with_different_name r5 a/subdir/b/foo
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'tree:<depth> which filters out blob but given as arg' '
|
||||||
|
blob_hash=$(git -C r4 rev-parse HEAD:subdir/bar) &&
|
||||||
|
|
||||||
|
git -C r4 rev-list --objects --filter=tree:1 HEAD $blob_hash >actual &&
|
||||||
|
grep ^$blob_hash actual
|
||||||
|
'
|
||||||
|
|
||||||
# Delete some loose objects and use rev-list, but WITHOUT any filtering.
|
# Delete some loose objects and use rev-list, but WITHOUT any filtering.
|
||||||
# This models previously omitted objects that we did not receive.
|
# This models previously omitted objects that we did not receive.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user