2023-02-24 01:09:24 +01:00
|
|
|
#include "git-compat-util.h"
|
|
|
|
#include "alloc.h"
|
2017-11-21 21:58:50 +01:00
|
|
|
#include "commit.h"
|
|
|
|
#include "config.h"
|
2023-02-24 01:09:24 +01:00
|
|
|
#include "gettext.h"
|
2017-11-21 21:58:50 +01:00
|
|
|
#include "revision.h"
|
2020-07-28 22:23:39 +02:00
|
|
|
#include "strvec.h"
|
2017-11-21 21:58:50 +01:00
|
|
|
#include "list-objects.h"
|
|
|
|
#include "list-objects-filter.h"
|
|
|
|
#include "list-objects-filter-options.h"
|
2019-06-25 15:40:31 +02:00
|
|
|
#include "promisor-remote.h"
|
2019-06-28 00:54:12 +02:00
|
|
|
#include "trace.h"
|
2019-06-28 00:54:08 +02:00
|
|
|
#include "url.h"
|
|
|
|
|
|
|
|
static int parse_combine_filter(
|
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
const char *arg,
|
|
|
|
struct strbuf *errbuf);
|
2017-11-21 21:58:50 +01:00
|
|
|
|
2020-07-31 22:26:26 +02:00
|
|
|
const char *list_object_filter_config_name(enum list_objects_filter_choice c)
|
|
|
|
{
|
|
|
|
switch (c) {
|
|
|
|
case LOFC_DISABLED:
|
|
|
|
/* we have no name for "no filter at all" */
|
|
|
|
break;
|
|
|
|
case LOFC_BLOB_NONE:
|
|
|
|
return "blob:none";
|
|
|
|
case LOFC_BLOB_LIMIT:
|
|
|
|
return "blob:limit";
|
|
|
|
case LOFC_TREE_DEPTH:
|
|
|
|
return "tree";
|
|
|
|
case LOFC_SPARSE_OID:
|
|
|
|
return "sparse:oid";
|
2021-04-19 13:46:53 +02:00
|
|
|
case LOFC_OBJECT_TYPE:
|
|
|
|
return "object:type";
|
2020-07-31 22:26:26 +02:00
|
|
|
case LOFC_COMBINE:
|
|
|
|
return "combine";
|
|
|
|
case LOFC__COUNT:
|
|
|
|
/* not a real filter type; just the count of all filters */
|
|
|
|
break;
|
|
|
|
}
|
2020-11-14 09:43:26 +01:00
|
|
|
BUG("list_object_filter_config_name: invalid argument '%d'", c);
|
2020-07-31 22:26:26 +02:00
|
|
|
}
|
|
|
|
|
2022-03-09 17:01:39 +01:00
|
|
|
int gently_parse_list_objects_filter(
|
2017-12-08 16:58:45 +01:00
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
const char *arg,
|
|
|
|
struct strbuf *errbuf)
|
2017-11-21 21:58:50 +01:00
|
|
|
{
|
|
|
|
const char *v0;
|
|
|
|
|
2019-06-25 15:40:32 +02:00
|
|
|
if (!arg)
|
|
|
|
return 0;
|
|
|
|
|
2019-06-28 00:54:09 +02:00
|
|
|
if (filter_options->choice)
|
|
|
|
BUG("filter_options already populated");
|
2017-11-21 21:58:50 +01:00
|
|
|
|
|
|
|
if (!strcmp(arg, "blob:none")) {
|
|
|
|
filter_options->choice = LOFC_BLOB_NONE;
|
|
|
|
return 0;
|
|
|
|
|
2017-12-08 16:58:45 +01:00
|
|
|
} else if (skip_prefix(arg, "blob:limit=", &v0)) {
|
|
|
|
if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
|
|
|
|
filter_options->choice = LOFC_BLOB_LIMIT;
|
|
|
|
return 0;
|
|
|
|
}
|
2017-11-21 21:58:50 +01:00
|
|
|
|
2018-10-05 23:31:27 +02:00
|
|
|
} else if (skip_prefix(arg, "tree:", &v0)) {
|
2019-01-09 03:59:13 +01:00
|
|
|
if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
|
2019-06-28 00:54:07 +02:00
|
|
|
strbuf_addstr(errbuf, _("expected 'tree:<depth>'"));
|
2018-10-05 23:31:27 +02:00
|
|
|
return 1;
|
|
|
|
}
|
2019-01-09 03:59:13 +01:00
|
|
|
filter_options->choice = LOFC_TREE_DEPTH;
|
2018-10-05 23:31:27 +02:00
|
|
|
return 0;
|
|
|
|
|
2017-12-08 16:58:45 +01:00
|
|
|
} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
|
list-objects-filter: delay parsing of sparse oid
The list-objects-filter code has two steps to its initialization:
1. parse_list_objects_filter() makes sure the spec is a filter we know
about and is syntactically correct. This step is done by "rev-list"
or "upload-pack" that is going to apply a filter, but also by "git
clone" or "git fetch" before they send the spec across the wire.
2. list_objects_filter__init() runs the type-specific initialization
(using function pointers established in step 1). This happens at
the start of traverse_commit_list_filtered(), when we're about to
actually use the filter.
It's a good idea to parse as much as we can in step 1, in order to catch
problems early (e.g., a blob size limit that isn't a number). But one
thing we _shouldn't_ do is resolve any oids at that step (e.g., for
sparse-file contents specified by oid). In the case of a fetch, the oid
has to be resolved on the remote side.
The current code does resolve the oid during the parse phase, but
ignores any error (which we must do, because we might just be sending
the spec across the wire). This leads to two bugs:
- if we're not in a repository (e.g., because it's git-clone parsing
the spec), then we trigger a BUG() trying to resolve the name
- if we did hit the error case, we still have to notice that later and
bail. The code path in rev-list handles this, but the one in
upload-pack does not, leading to a segfault.
We can fix both by moving the oid resolution into the sparse-oid init
function. At that point we know we have a repository (because we're
about to traverse), and handling the error there fixes the segfault.
As a bonus, we can drop the NULL sparse_oid_value check in rev-list,
since this is now handled in the sparse-oid-filter init function.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-15 18:12:44 +02:00
|
|
|
filter_options->sparse_oid_name = xstrdup(v0);
|
2017-11-21 21:58:50 +01:00
|
|
|
filter_options->choice = LOFC_SPARSE_OID;
|
|
|
|
return 0;
|
|
|
|
|
2017-12-08 16:58:45 +01:00
|
|
|
} else if (skip_prefix(arg, "sparse:path=", &v0)) {
|
2019-05-29 14:44:32 +02:00
|
|
|
if (errbuf) {
|
|
|
|
strbuf_addstr(
|
|
|
|
errbuf,
|
|
|
|
_("sparse:path filters support has been dropped"));
|
|
|
|
}
|
|
|
|
return 1;
|
2019-06-28 00:54:08 +02:00
|
|
|
|
2021-04-19 13:46:53 +02:00
|
|
|
} else if (skip_prefix(arg, "object:type=", &v0)) {
|
|
|
|
int type = type_from_string_gently(v0, strlen(v0), 1);
|
|
|
|
if (type < 0) {
|
2021-05-20 09:42:14 +02:00
|
|
|
strbuf_addf(errbuf, _("'%s' for 'object:type=<type>' is "
|
2021-04-19 13:46:53 +02:00
|
|
|
"not a valid object type"), v0);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
filter_options->object_type = type;
|
|
|
|
filter_options->choice = LOFC_OBJECT_TYPE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2019-06-28 00:54:08 +02:00
|
|
|
} else if (skip_prefix(arg, "combine:", &v0)) {
|
|
|
|
return parse_combine_filter(filter_options, v0, errbuf);
|
|
|
|
|
2017-11-21 21:58:50 +01:00
|
|
|
}
|
2019-02-16 12:24:41 +01:00
|
|
|
/*
|
|
|
|
* Please update _git_fetch() in git-completion.bash when you
|
|
|
|
* add new filters
|
|
|
|
*/
|
2017-11-21 21:58:50 +01:00
|
|
|
|
2019-06-28 00:54:07 +02:00
|
|
|
strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg);
|
2018-10-05 23:31:26 +02:00
|
|
|
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:07 +02:00
|
|
|
list_objects_filter_init(filter_options);
|
2017-12-08 16:58:45 +01:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2019-06-28 00:54:08 +02:00
|
|
|
static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
|
|
|
|
|
|
|
|
static int has_reserved_character(
|
|
|
|
struct strbuf *sub_spec, struct strbuf *errbuf)
|
2017-12-08 16:58:45 +01:00
|
|
|
{
|
2019-06-28 00:54:08 +02:00
|
|
|
const char *c = sub_spec->buf;
|
|
|
|
while (*c) {
|
|
|
|
if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
|
|
|
|
strbuf_addf(
|
|
|
|
errbuf,
|
|
|
|
_("must escape char in sub-filter-spec: '%c'"),
|
|
|
|
*c);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
c++;
|
|
|
|
}
|
|
|
|
|
2017-11-21 21:58:50 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-06-28 00:54:08 +02:00
|
|
|
static int parse_combine_subfilter(
|
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
struct strbuf *subspec,
|
|
|
|
struct strbuf *errbuf)
|
|
|
|
{
|
2019-06-28 00:54:13 +02:00
|
|
|
size_t new_index = filter_options->sub_nr;
|
2019-06-28 00:54:08 +02:00
|
|
|
char *decoded;
|
|
|
|
int result;
|
|
|
|
|
2019-06-28 00:54:13 +02:00
|
|
|
ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
|
|
|
|
filter_options->sub_alloc);
|
list-objects-filter: initialize sub-filter structs
Since commit c54980ab83 (list-objects-filter: convert filter_spec to a
strbuf, 2022-09-11), building with SANITIZE=undefined triggers an error
in t5616.
The problem is that we end up with a strbuf that has been
zero-initialized instead of via STRBUF_INIT. Feeding that strbuf to
strbuf_addbuf() in list_objects_filter_copy() means we will call memcpy
like:
memcpy(some_actual_buffer, NULL, 0);
This works on most systems because we're copying zero bytes, but it is
technically undefined behavior to ever pass NULL to memcpy.
Even though c54980ab83 is where the bug manifests, that is only because
we switched away from a string_list, which is OK with being
zero-initialized (though it may cause other problems by not duplicating
the strings, it happened to be OK in this instance).
The actual bug is caused by the commit before that, 2a01bdedf8
(list-objects-filter: add and use initializers, 2022-09-11). There we
consistently initialize the top-level filter structs, but we forgot the
dynamically allocated ones we stick in filter_options->sub when creating
combined filters.
Note that we need to fix two spots here: where we parse a "combine:"
filter, but also where we transform from a single-filter into a combined
one after seeing multiple "--filter" options. In the second spot, we'll
do some minor refactoring to avoid repeating our very-long array index.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-22 11:35:33 +02:00
|
|
|
list_objects_filter_init(&filter_options->sub[new_index]);
|
2019-06-28 00:54:08 +02:00
|
|
|
|
|
|
|
decoded = url_percent_decode(subspec->buf);
|
|
|
|
|
|
|
|
result = has_reserved_character(subspec, errbuf) ||
|
|
|
|
gently_parse_list_objects_filter(
|
|
|
|
&filter_options->sub[new_index], decoded, errbuf);
|
|
|
|
|
|
|
|
free(decoded);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_combine_filter(
|
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
const char *arg,
|
|
|
|
struct strbuf *errbuf)
|
|
|
|
{
|
|
|
|
struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
|
|
|
|
size_t sub;
|
|
|
|
int result = 0;
|
|
|
|
|
|
|
|
if (!subspecs[0]) {
|
|
|
|
strbuf_addstr(errbuf, _("expected something after combine:"));
|
|
|
|
result = 1;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (sub = 0; subspecs[sub] && !result; sub++) {
|
|
|
|
if (subspecs[sub + 1]) {
|
|
|
|
/*
|
|
|
|
* This is not the last subspec. Remove trailing "+" so
|
|
|
|
* we can parse it.
|
|
|
|
*/
|
|
|
|
size_t last = subspecs[sub]->len - 1;
|
|
|
|
assert(subspecs[sub]->buf[last] == '+');
|
|
|
|
strbuf_remove(subspecs[sub], last, 1);
|
|
|
|
}
|
|
|
|
result = parse_combine_subfilter(
|
|
|
|
filter_options, subspecs[sub], errbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
filter_options->choice = LOFC_COMBINE;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
strbuf_list_free(subspecs);
|
2022-09-11 06:58:09 +02:00
|
|
|
if (result)
|
2019-06-28 00:54:08 +02:00
|
|
|
list_objects_filter_release(filter_options);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2019-06-28 00:54:12 +02:00
|
|
|
static int allow_unencoded(char ch)
|
|
|
|
{
|
|
|
|
if (ch <= ' ' || ch == '%' || ch == '+')
|
|
|
|
return 0;
|
|
|
|
return !strchr(RESERVED_NON_WS, ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void filter_spec_append_urlencode(
|
|
|
|
struct list_objects_filter_options *filter, const char *raw)
|
2017-12-08 16:58:45 +01:00
|
|
|
{
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
size_t orig_len = filter->filter_spec.len;
|
|
|
|
strbuf_addstr_urlencode(&filter->filter_spec, raw, allow_unencoded);
|
|
|
|
trace_printf("Add to combine filter-spec: %s\n",
|
|
|
|
filter->filter_spec.buf + orig_len);
|
2019-06-28 00:54:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Changes filter_options into an equivalent LOFC_COMBINE filter options
|
|
|
|
* instance. Does not do anything if filter_options is already LOFC_COMBINE.
|
|
|
|
*/
|
|
|
|
static void transform_to_combine_type(
|
|
|
|
struct list_objects_filter_options *filter_options)
|
|
|
|
{
|
|
|
|
assert(filter_options->choice);
|
|
|
|
if (filter_options->choice == LOFC_COMBINE)
|
|
|
|
return;
|
|
|
|
{
|
|
|
|
const int initial_sub_alloc = 2;
|
|
|
|
struct list_objects_filter_options *sub_array =
|
|
|
|
xcalloc(initial_sub_alloc, sizeof(*sub_array));
|
|
|
|
sub_array[0] = *filter_options;
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:07 +02:00
|
|
|
list_objects_filter_init(filter_options);
|
2019-06-28 00:54:12 +02:00
|
|
|
filter_options->sub = sub_array;
|
|
|
|
filter_options->sub_alloc = initial_sub_alloc;
|
|
|
|
}
|
|
|
|
filter_options->sub_nr = 1;
|
|
|
|
filter_options->choice = LOFC_COMBINE;
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
strbuf_addstr(&filter_options->filter_spec, "combine:");
|
2019-06-28 00:54:12 +02:00
|
|
|
filter_spec_append_urlencode(
|
|
|
|
filter_options,
|
|
|
|
list_objects_filter_spec(&filter_options->sub[0]));
|
|
|
|
/*
|
|
|
|
* We don't need the filter_spec strings for subfilter specs, only the
|
|
|
|
* top level.
|
|
|
|
*/
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
strbuf_release(&filter_options->sub[0].filter_spec);
|
2019-06-28 00:54:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void list_objects_filter_die_if_populated(
|
|
|
|
struct list_objects_filter_options *filter_options)
|
|
|
|
{
|
2019-06-28 00:54:09 +02:00
|
|
|
if (filter_options->choice)
|
|
|
|
die(_("multiple filter-specs cannot be combined"));
|
2019-06-28 00:54:12 +02:00
|
|
|
}
|
|
|
|
|
2019-06-28 00:54:14 +02:00
|
|
|
void parse_list_objects_filter(
|
2019-06-28 00:54:12 +02:00
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
const char *arg)
|
|
|
|
{
|
|
|
|
struct strbuf errbuf = STRBUF_INIT;
|
|
|
|
int parse_error;
|
|
|
|
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
if (!filter_options->filter_spec.buf)
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:07 +02:00
|
|
|
BUG("filter_options not properly initialized");
|
list_objects_filter_options: plug leak of filter_spec strings
The list_objects_filter_options struct contains a string_list to store
the filter_spec. Because we allow the options struct to be
zero-initialized by callers, the string_list's strdup_strings field is
generally not set.
Because we don't want to depend on the memory lifetimes of any strings
passed in to the list-objects API, everything we add to the string_list
is duplicated (either via xstrdup(), or via strbuf_detach()). So far so
good, but now we have a problem at cleanup time: when we clear the
list, the string_list API doesn't realize that it needs to free all of
those strings, and we leak them.
One option would be to set strdup_strings right before clearing the
list. But this is tricky for two reasons:
1. There's one spot, in partial_clone_get_default_filter_spec(),
that fails to duplicate its argument. We could fix that, but...
2. We clear the list in a surprising number of places. As you might
expect, we do so in list_objects_filter_release(). But we also
clear and rewrite it in expand_list_objects_filter_spec(),
list_objects_filter_spec(), and transform_to_combine_type().
We'd have to put the same hack in all of those spots.
Instead, let's just set strdup_strings before adding anything. That lets
us drop the extra manual xstrdup() calls, fixes the spot mentioned
in (1) above that _should_ be duplicating, and future-proofs further
calls. We do have to switch the strbuf_detach() calls to use the nodup
form, but that's an easy change, and the resulting code more clearly
shows the expected ownership transfer.
This also resolves a weird inconsistency: when we make a deep copy with
list_objects_filter_copy(), it initializes the copy's filter_spec with
string_list_init_dup(). So the copy frees its string_list memory
correctly, but accidentally leaks the extra manual-xstrdup()'d strings!
There is one hiccup, though. In an ideal world, everyone would allocate
the list_objects_filter_options struct with an initializer which used
STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing
callers which think that zero-initializing is good enough. We can leave
them as-is by noting that the list is always initially populated via
parse_list_objects_filter(). So we can just initialize the
strdup_strings flag there.
This is arguably a band-aid, but it works reliably. And it doesn't make
anything harder if we want to switch all the callers later to a new
LIST_OBJECTS_FILTER_INIT.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-08 07:01:23 +02:00
|
|
|
|
2019-06-28 00:54:12 +02:00
|
|
|
if (!filter_options->choice) {
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
strbuf_addstr(&filter_options->filter_spec, arg);
|
2019-06-28 00:54:12 +02:00
|
|
|
|
|
|
|
parse_error = gently_parse_list_objects_filter(
|
|
|
|
filter_options, arg, &errbuf);
|
|
|
|
} else {
|
list-objects-filter: initialize sub-filter structs
Since commit c54980ab83 (list-objects-filter: convert filter_spec to a
strbuf, 2022-09-11), building with SANITIZE=undefined triggers an error
in t5616.
The problem is that we end up with a strbuf that has been
zero-initialized instead of via STRBUF_INIT. Feeding that strbuf to
strbuf_addbuf() in list_objects_filter_copy() means we will call memcpy
like:
memcpy(some_actual_buffer, NULL, 0);
This works on most systems because we're copying zero bytes, but it is
technically undefined behavior to ever pass NULL to memcpy.
Even though c54980ab83 is where the bug manifests, that is only because
we switched away from a string_list, which is OK with being
zero-initialized (though it may cause other problems by not duplicating
the strings, it happened to be OK in this instance).
The actual bug is caused by the commit before that, 2a01bdedf8
(list-objects-filter: add and use initializers, 2022-09-11). There we
consistently initialize the top-level filter structs, but we forgot the
dynamically allocated ones we stick in filter_options->sub when creating
combined filters.
Note that we need to fix two spots here: where we parse a "combine:"
filter, but also where we transform from a single-filter into a combined
one after seeing multiple "--filter" options. In the second spot, we'll
do some minor refactoring to avoid repeating our very-long array index.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-22 11:35:33 +02:00
|
|
|
struct list_objects_filter_options *sub;
|
|
|
|
|
2019-06-28 00:54:12 +02:00
|
|
|
/*
|
|
|
|
* Make filter_options an LOFC_COMBINE spec so we can trivially
|
|
|
|
* add subspecs to it.
|
|
|
|
*/
|
|
|
|
transform_to_combine_type(filter_options);
|
|
|
|
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
strbuf_addch(&filter_options->filter_spec, '+');
|
2019-06-28 00:54:12 +02:00
|
|
|
filter_spec_append_urlencode(filter_options, arg);
|
2019-06-28 00:54:13 +02:00
|
|
|
ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
|
|
|
|
filter_options->sub_alloc);
|
list-objects-filter: initialize sub-filter structs
Since commit c54980ab83 (list-objects-filter: convert filter_spec to a
strbuf, 2022-09-11), building with SANITIZE=undefined triggers an error
in t5616.
The problem is that we end up with a strbuf that has been
zero-initialized instead of via STRBUF_INIT. Feeding that strbuf to
strbuf_addbuf() in list_objects_filter_copy() means we will call memcpy
like:
memcpy(some_actual_buffer, NULL, 0);
This works on most systems because we're copying zero bytes, but it is
technically undefined behavior to ever pass NULL to memcpy.
Even though c54980ab83 is where the bug manifests, that is only because
we switched away from a string_list, which is OK with being
zero-initialized (though it may cause other problems by not duplicating
the strings, it happened to be OK in this instance).
The actual bug is caused by the commit before that, 2a01bdedf8
(list-objects-filter: add and use initializers, 2022-09-11). There we
consistently initialize the top-level filter structs, but we forgot the
dynamically allocated ones we stick in filter_options->sub when creating
combined filters.
Note that we need to fix two spots here: where we parse a "combine:"
filter, but also where we transform from a single-filter into a combined
one after seeing multiple "--filter" options. In the second spot, we'll
do some minor refactoring to avoid repeating our very-long array index.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-22 11:35:33 +02:00
|
|
|
sub = &filter_options->sub[filter_options->sub_nr - 1];
|
2019-06-28 00:54:12 +02:00
|
|
|
|
list-objects-filter: initialize sub-filter structs
Since commit c54980ab83 (list-objects-filter: convert filter_spec to a
strbuf, 2022-09-11), building with SANITIZE=undefined triggers an error
in t5616.
The problem is that we end up with a strbuf that has been
zero-initialized instead of via STRBUF_INIT. Feeding that strbuf to
strbuf_addbuf() in list_objects_filter_copy() means we will call memcpy
like:
memcpy(some_actual_buffer, NULL, 0);
This works on most systems because we're copying zero bytes, but it is
technically undefined behavior to ever pass NULL to memcpy.
Even though c54980ab83 is where the bug manifests, that is only because
we switched away from a string_list, which is OK with being
zero-initialized (though it may cause other problems by not duplicating
the strings, it happened to be OK in this instance).
The actual bug is caused by the commit before that, 2a01bdedf8
(list-objects-filter: add and use initializers, 2022-09-11). There we
consistently initialize the top-level filter structs, but we forgot the
dynamically allocated ones we stick in filter_options->sub when creating
combined filters.
Note that we need to fix two spots here: where we parse a "combine:"
filter, but also where we transform from a single-filter into a combined
one after seeing multiple "--filter" options. In the second spot, we'll
do some minor refactoring to avoid repeating our very-long array index.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-22 11:35:33 +02:00
|
|
|
list_objects_filter_init(sub);
|
|
|
|
parse_error = gently_parse_list_objects_filter(sub, arg,
|
|
|
|
&errbuf);
|
2019-06-28 00:54:12 +02:00
|
|
|
}
|
|
|
|
if (parse_error)
|
|
|
|
die("%s", errbuf.buf);
|
2017-11-21 21:58:50 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
int opt_parse_list_objects_filter(const struct option *opt,
|
|
|
|
const char *arg, int unset)
|
|
|
|
{
|
|
|
|
struct list_objects_filter_options *filter_options = opt->value;
|
|
|
|
|
2019-06-28 00:54:14 +02:00
|
|
|
if (unset || !arg)
|
2017-12-08 16:58:50 +01:00
|
|
|
list_objects_filter_set_no_filter(filter_options);
|
2019-06-28 00:54:14 +02:00
|
|
|
else
|
|
|
|
parse_list_objects_filter(filter_options, arg);
|
|
|
|
return 0;
|
2017-11-21 21:58:50 +01:00
|
|
|
}
|
2017-12-05 17:50:13 +01:00
|
|
|
|
2019-06-28 00:54:10 +02:00
|
|
|
const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
|
2019-01-08 01:17:09 +01:00
|
|
|
{
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
if (!filter->filter_spec.len)
|
2019-06-28 00:54:10 +02:00
|
|
|
BUG("no filter_spec available for this filter");
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
return filter->filter_spec.buf;
|
2017-11-21 21:58:50 +01:00
|
|
|
}
|
2017-12-05 17:50:13 +01:00
|
|
|
|
2019-06-28 00:54:10 +02:00
|
|
|
const char *expand_list_objects_filter_spec(
|
|
|
|
struct list_objects_filter_options *filter)
|
2019-01-08 01:17:09 +01:00
|
|
|
{
|
2019-06-28 00:54:10 +02:00
|
|
|
if (filter->choice == LOFC_BLOB_LIMIT) {
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
strbuf_release(&filter->filter_spec);
|
|
|
|
strbuf_addf(&filter->filter_spec, "blob:limit=%lu",
|
2019-01-08 01:17:09 +01:00
|
|
|
filter->blob_limit_value);
|
2019-06-28 00:54:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return list_objects_filter_spec(filter);
|
2019-01-08 01:17:09 +01:00
|
|
|
}
|
|
|
|
|
2017-12-05 17:50:13 +01:00
|
|
|
void list_objects_filter_release(
|
|
|
|
struct list_objects_filter_options *filter_options)
|
|
|
|
{
|
2019-06-28 00:54:08 +02:00
|
|
|
size_t sub;
|
|
|
|
|
|
|
|
if (!filter_options)
|
|
|
|
return;
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
strbuf_release(&filter_options->filter_spec);
|
list-objects-filter: delay parsing of sparse oid
The list-objects-filter code has two steps to its initialization:
1. parse_list_objects_filter() makes sure the spec is a filter we know
about and is syntactically correct. This step is done by "rev-list"
or "upload-pack" that is going to apply a filter, but also by "git
clone" or "git fetch" before they send the spec across the wire.
2. list_objects_filter__init() runs the type-specific initialization
(using function pointers established in step 1). This happens at
the start of traverse_commit_list_filtered(), when we're about to
actually use the filter.
It's a good idea to parse as much as we can in step 1, in order to catch
problems early (e.g., a blob size limit that isn't a number). But one
thing we _shouldn't_ do is resolve any oids at that step (e.g., for
sparse-file contents specified by oid). In the case of a fetch, the oid
has to be resolved on the remote side.
The current code does resolve the oid during the parse phase, but
ignores any error (which we must do, because we might just be sending
the spec across the wire). This leads to two bugs:
- if we're not in a repository (e.g., because it's git-clone parsing
the spec), then we trigger a BUG() trying to resolve the name
- if we did hit the error case, we still have to notice that later and
bail. The code path in rev-list handles this, but the one in
upload-pack does not, leading to a segfault.
We can fix both by moving the oid resolution into the sparse-oid init
function. At that point we know we have a repository (because we're
about to traverse), and handling the error there fixes the segfault.
As a bonus, we can drop the NULL sparse_oid_value check in rev-list,
since this is now handled in the sparse-oid-filter init function.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-15 18:12:44 +02:00
|
|
|
free(filter_options->sparse_oid_name);
|
2019-06-28 00:54:08 +02:00
|
|
|
for (sub = 0; sub < filter_options->sub_nr; sub++)
|
|
|
|
list_objects_filter_release(&filter_options->sub[sub]);
|
|
|
|
free(filter_options->sub);
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:07 +02:00
|
|
|
list_objects_filter_init(filter_options);
|
2017-12-05 17:50:13 +01:00
|
|
|
}
|
2017-12-08 16:58:45 +01:00
|
|
|
|
|
|
|
void partial_clone_register(
|
|
|
|
const char *remote,
|
2019-06-28 00:54:10 +02:00
|
|
|
struct list_objects_filter_options *filter_options)
|
2017-12-08 16:58:45 +01:00
|
|
|
{
|
2020-09-29 00:26:38 +02:00
|
|
|
struct promisor_remote *promisor_remote;
|
2019-06-25 15:40:31 +02:00
|
|
|
char *cfg_name;
|
2019-06-25 15:40:32 +02:00
|
|
|
char *filter_name;
|
2017-12-08 16:58:45 +01:00
|
|
|
|
2019-06-25 15:40:31 +02:00
|
|
|
/* Check if it is already registered */
|
2023-03-28 15:58:53 +02:00
|
|
|
if ((promisor_remote = repo_promisor_remote_find(the_repository, remote))) {
|
2020-09-29 00:26:38 +02:00
|
|
|
if (promisor_remote->partial_clone_filter)
|
|
|
|
/*
|
|
|
|
* Remote is already registered and a filter is already
|
|
|
|
* set, so we don't need to do anything here.
|
|
|
|
*/
|
|
|
|
return;
|
|
|
|
} else {
|
2020-06-05 11:10:01 +02:00
|
|
|
if (upgrade_repository_format(1) < 0)
|
|
|
|
die(_("unable to upgrade repository format to support partial clone"));
|
2017-12-08 16:58:45 +01:00
|
|
|
|
2019-06-25 15:40:31 +02:00
|
|
|
/* Add promisor config for the remote */
|
|
|
|
cfg_name = xstrfmt("remote.%s.promisor", remote);
|
|
|
|
git_config_set(cfg_name, "true");
|
|
|
|
free(cfg_name);
|
|
|
|
}
|
2017-12-08 16:58:45 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Record the initial filter-spec in the config as
|
|
|
|
* the default for subsequent fetches from this remote.
|
|
|
|
*/
|
2019-06-25 15:40:32 +02:00
|
|
|
filter_name = xstrfmt("remote.%s.partialclonefilter", remote);
|
2019-09-18 20:50:09 +02:00
|
|
|
/* NEEDSWORK: 'expand' result leaking??? */
|
|
|
|
git_config_set(filter_name,
|
|
|
|
expand_list_objects_filter_spec(filter_options));
|
2019-06-25 15:40:32 +02:00
|
|
|
free(filter_name);
|
2019-06-25 15:40:31 +02:00
|
|
|
|
|
|
|
/* Make sure the config info are reset */
|
2023-03-28 15:58:53 +02:00
|
|
|
repo_promisor_remote_reinit(the_repository);
|
2017-12-08 16:58:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void partial_clone_get_default_filter_spec(
|
2019-06-25 15:40:32 +02:00
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
const char *remote)
|
2017-12-08 16:58:45 +01:00
|
|
|
{
|
2023-03-28 15:58:53 +02:00
|
|
|
struct promisor_remote *promisor = repo_promisor_remote_find(the_repository,
|
|
|
|
remote);
|
2019-06-28 00:54:07 +02:00
|
|
|
struct strbuf errbuf = STRBUF_INIT;
|
2019-06-25 15:40:32 +02:00
|
|
|
|
2017-12-08 16:58:45 +01:00
|
|
|
/*
|
|
|
|
* Parse default value, but silently ignore it if it is invalid.
|
|
|
|
*/
|
list-objects-filter: handle null default filter spec
When we have a remote.*.promisor config variable, we know that we're in
a partial clone. Usually there's a matching remote.*.partialclonefilter
option, which tells us which filter to use with the remote. If that
option is missing, we skip setting up the filter at all. But something
funny happens: we stick a NULL entry into the string_list storing the
text filter spec.
This is a weird state, and could possibly segfault if anybody called
called list_objects_filter_spec(), etc. In practice, nobody does,
because filter->choice will still be LOFC_DISABLED, so code generally
realizes there's no filter to use. And the string_list itself is OK,
because it starts in non-dup mode until we actually parse a filter spec.
So it blindly stores the NULL without even looking at it.
But it's probably worth avoiding this confused state. It's an accident
waiting to happen, and it will be a problem if we replace the lazy
initialization from 7e2619d8ff (list_objects_filter_options: plug leak
of filter_spec strings, 2022-09-08) with a real initialization function.
The history is a little interesting here, as the bug was introduced
during the merge resolution in 627b826834 (Merge branch
'md/list-objects-filter-combo', 2019-09-18).
The original logic comes from cac1137dc4 (list-objects: check if filter
is NULL before using, 2018-06-11), where we had a single string via
core.partialCloneFilter, and a simple NULL check was sufficient. And it
even added a test in t0410 that covers this situation.
Later, that was expanded to allow per-remote filters in fa3d1b63e8
(promisor-remote: parse remote.*.partialclonefilter, 2019-06-25). After
that commit, we get a promisor struct with a partial_clone_filter
string, which could be NULL. The commit checks only that the struct
pointer is non-NULL, which is enough. It may pass NULL to
gently_parse_list_objects_filter(), but that function is smart enough to
consider it a noop.
But in parallel, cf9ceb5a12 (list-objects-filter-options: make
filter_spec a string_list, 2019-06-27) added a new line of code: before
we call gently_parse_list_objets_filter(), we append the filter spec to
the string_list. By itself that was OK, since we'd have returned early
if the string was NULL.
When the two were merged in 627b826834, the result is that we return
early only if the struct is NULL, but not the string. And we append to
the string_list, meaning we may append NULL.
The solution is to return early if either is NULL, as it would mean we
don't have a configured filter.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:00:45 +02:00
|
|
|
if (!promisor || !promisor->partial_clone_filter)
|
2018-06-11 23:51:26 +02:00
|
|
|
return;
|
2019-06-28 00:54:08 +02:00
|
|
|
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
strbuf_addstr(&filter_options->filter_spec,
|
|
|
|
promisor->partial_clone_filter);
|
2017-12-08 16:58:45 +01:00
|
|
|
gently_parse_list_objects_filter(filter_options,
|
2019-09-18 20:50:09 +02:00
|
|
|
promisor->partial_clone_filter,
|
2019-06-28 00:54:07 +02:00
|
|
|
&errbuf);
|
|
|
|
strbuf_release(&errbuf);
|
2017-12-08 16:58:45 +01:00
|
|
|
}
|
2022-03-09 17:01:32 +01:00
|
|
|
|
|
|
|
void list_objects_filter_copy(
|
|
|
|
struct list_objects_filter_options *dest,
|
|
|
|
const struct list_objects_filter_options *src)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Copy everything. We will overwrite the pointers shortly. */
|
|
|
|
memcpy(dest, src, sizeof(struct list_objects_filter_options));
|
|
|
|
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
strbuf_init(&dest->filter_spec, 0);
|
|
|
|
strbuf_addbuf(&dest->filter_spec, &src->filter_spec);
|
2022-09-08 06:54:29 +02:00
|
|
|
dest->sparse_oid_name = xstrdup_or_null(src->sparse_oid_name);
|
2022-03-09 17:01:32 +01:00
|
|
|
|
|
|
|
ALLOC_ARRAY(dest->sub, dest->sub_alloc);
|
|
|
|
for (i = 0; i < src->sub_nr; i++)
|
|
|
|
list_objects_filter_copy(&dest->sub[i], &src->sub[i]);
|
|
|
|
}
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:07 +02:00
|
|
|
|
|
|
|
void list_objects_filter_init(struct list_objects_filter_options *filter_options)
|
|
|
|
{
|
|
|
|
struct list_objects_filter_options blank = LIST_OBJECTS_FILTER_INIT;
|
|
|
|
memcpy(filter_options, &blank, sizeof(*filter_options));
|
|
|
|
}
|