2017-11-21 21:58:50 +01:00
|
|
|
#ifndef LIST_OBJECTS_FILTER_OPTIONS_H
|
|
|
|
#define LIST_OBJECTS_FILTER_OPTIONS_H
|
|
|
|
|
2023-03-21 07:25:54 +01:00
|
|
|
#include "gettext.h"
|
2023-02-24 01:09:30 +01:00
|
|
|
#include "object.h"
|
2019-06-28 00:54:10 +02:00
|
|
|
#include "string-list.h"
|
2023-02-24 01:09:30 +01:00
|
|
|
#include "strbuf.h"
|
2017-11-21 21:58:50 +01:00
|
|
|
|
2023-03-19 17:27:12 +01:00
|
|
|
struct option;
|
|
|
|
|
2017-11-21 21:58:50 +01:00
|
|
|
/*
|
|
|
|
* The list of defined filters for list-objects.
|
|
|
|
*/
|
|
|
|
enum list_objects_filter_choice {
|
|
|
|
LOFC_DISABLED = 0,
|
|
|
|
LOFC_BLOB_NONE,
|
|
|
|
LOFC_BLOB_LIMIT,
|
2019-01-09 03:59:13 +01:00
|
|
|
LOFC_TREE_DEPTH,
|
2017-11-21 21:58:50 +01:00
|
|
|
LOFC_SPARSE_OID,
|
2021-04-19 13:46:53 +02:00
|
|
|
LOFC_OBJECT_TYPE,
|
2019-06-28 00:54:08 +02:00
|
|
|
LOFC_COMBINE,
|
2017-11-21 21:58:50 +01:00
|
|
|
LOFC__COUNT /* must be last */
|
|
|
|
};
|
|
|
|
|
2020-07-31 22:26:26 +02:00
|
|
|
/*
|
|
|
|
* Returns a configuration key suitable for describing the given object filter,
|
|
|
|
* e.g.: "blob:none", "combine", etc.
|
|
|
|
*/
|
|
|
|
const char *list_object_filter_config_name(enum list_objects_filter_choice c);
|
|
|
|
|
2017-11-21 21:58:50 +01:00
|
|
|
struct list_objects_filter_options {
|
|
|
|
/*
|
|
|
|
* 'filter_spec' is the raw argument value given on the command line
|
|
|
|
* or protocol request. (The part after the "--keyword=".) For
|
2019-01-08 01:17:09 +01:00
|
|
|
* commands that launch filtering sub-processes, or for communication
|
|
|
|
* over the network, don't use this value; use the result of
|
|
|
|
* expand_list_objects_filter_spec() instead.
|
2019-06-28 00:54:10 +02:00
|
|
|
* To get the raw filter spec given by the user, use the result of
|
|
|
|
* list_objects_filter_spec().
|
2017-11-21 21:58:50 +01:00
|
|
|
*/
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
struct strbuf filter_spec;
|
2017-11-21 21:58:50 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* 'choice' is determined by parsing the filter-spec. This indicates
|
|
|
|
* the filtering algorithm to use.
|
|
|
|
*/
|
|
|
|
enum list_objects_filter_choice choice;
|
|
|
|
|
2017-12-08 16:58:50 +01:00
|
|
|
/*
|
|
|
|
* Choice is LOFC_DISABLED because "--no-filter" was requested.
|
|
|
|
*/
|
|
|
|
unsigned int no_filter : 1;
|
|
|
|
|
2017-11-21 21:58:50 +01:00
|
|
|
/*
|
2019-06-28 00:54:08 +02:00
|
|
|
* BEGIN choice-specific parsed values from within the filter-spec. Only
|
|
|
|
* some values will be defined for any given choice.
|
2017-11-21 21:58:50 +01:00
|
|
|
*/
|
2019-06-28 00:54:08 +02:00
|
|
|
|
list-objects-filter: delay parsing of sparse oid
The list-objects-filter code has two steps to its initialization:
1. parse_list_objects_filter() makes sure the spec is a filter we know
about and is syntactically correct. This step is done by "rev-list"
or "upload-pack" that is going to apply a filter, but also by "git
clone" or "git fetch" before they send the spec across the wire.
2. list_objects_filter__init() runs the type-specific initialization
(using function pointers established in step 1). This happens at
the start of traverse_commit_list_filtered(), when we're about to
actually use the filter.
It's a good idea to parse as much as we can in step 1, in order to catch
problems early (e.g., a blob size limit that isn't a number). But one
thing we _shouldn't_ do is resolve any oids at that step (e.g., for
sparse-file contents specified by oid). In the case of a fetch, the oid
has to be resolved on the remote side.
The current code does resolve the oid during the parse phase, but
ignores any error (which we must do, because we might just be sending
the spec across the wire). This leads to two bugs:
- if we're not in a repository (e.g., because it's git-clone parsing
the spec), then we trigger a BUG() trying to resolve the name
- if we did hit the error case, we still have to notice that later and
bail. The code path in rev-list handles this, but the one in
upload-pack does not, leading to a segfault.
We can fix both by moving the oid resolution into the sparse-oid init
function. At that point we know we have a repository (because we're
about to traverse), and handling the error there fixes the segfault.
As a bonus, we can drop the NULL sparse_oid_value check in rev-list,
since this is now handled in the sparse-oid-filter init function.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-15 18:12:44 +02:00
|
|
|
char *sparse_oid_name;
|
2017-11-21 21:58:50 +01:00
|
|
|
unsigned long blob_limit_value;
|
2019-01-09 03:59:13 +01:00
|
|
|
unsigned long tree_exclude_depth;
|
2021-04-19 13:46:53 +02:00
|
|
|
enum object_type object_type;
|
2019-06-28 00:54:08 +02:00
|
|
|
|
|
|
|
/* LOFC_COMBINE values */
|
|
|
|
|
|
|
|
/* This array contains all the subfilters which this filter combines. */
|
|
|
|
size_t sub_nr, sub_alloc;
|
|
|
|
struct list_objects_filter_options *sub;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* END choice-specific parsed values.
|
|
|
|
*/
|
2017-11-21 21:58:50 +01:00
|
|
|
};
|
|
|
|
|
list-objects-filter: convert filter_spec to a strbuf
Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:
A strbuf would seem to be a more natural choice for this object, but
it unfortunately requires initialization besides just zero'ing out
the memory. This results in all container structs, and all
containers of those structs, etc., to also require initialization.
Initializing them all would be more cumbersome that simply using a
string_list, which behaves properly when its contents are zero'd.
Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.
This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:31 +02:00
|
|
|
#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRBUF_INIT }
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-11 07:03:07 +02:00
|
|
|
void list_objects_filter_init(struct list_objects_filter_options *filter_options);
|
|
|
|
|
2022-03-09 17:01:39 +01:00
|
|
|
/*
|
|
|
|
* Parse value of the argument to the "filter" keyword.
|
|
|
|
* On the command line this looks like:
|
|
|
|
* --filter=<arg>
|
|
|
|
* and in the pack protocol as:
|
|
|
|
* "filter" SP <arg>
|
|
|
|
*
|
|
|
|
* The filter keyword will be used by many commands.
|
|
|
|
* See Documentation/rev-list-options.txt for allowed values for <arg>.
|
|
|
|
*
|
|
|
|
* Capture the given arg as the "filter_spec". This can be forwarded to
|
|
|
|
* subordinate commands when necessary (although it's better to pass it through
|
|
|
|
* expand_list_objects_filter_spec() first). We also "intern" the arg for the
|
|
|
|
* convenience of the current command.
|
|
|
|
*/
|
|
|
|
int gently_parse_list_objects_filter(
|
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
const char *arg,
|
|
|
|
struct strbuf *errbuf);
|
|
|
|
|
2019-06-28 00:54:12 +02:00
|
|
|
void list_objects_filter_die_if_populated(
|
|
|
|
struct list_objects_filter_options *filter_options);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parses the filter spec string given by arg and either (1) simply places the
|
|
|
|
* result in filter_options if it is not yet populated or (2) combines it with
|
|
|
|
* the filter already in filter_options if it is already populated. In the case
|
|
|
|
* of (2), the filter specs are combined as if specified with 'combine:'.
|
|
|
|
*
|
|
|
|
* Dies and prints a user-facing message if an error occurs.
|
|
|
|
*/
|
2019-06-28 00:54:14 +02:00
|
|
|
void parse_list_objects_filter(
|
2017-11-21 21:58:50 +01:00
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
const char *arg);
|
|
|
|
|
pack-objects: lazily set up "struct rev_info", don't leak
In the preceding [1] (pack-objects: move revs out of
get_object_list(), 2022-03-22) the "repo_init_revisions()" was moved
to cmd_pack_objects() so that it unconditionally took place for all
invocations of "git pack-objects".
We'd thus start leaking memory, which is easily reproduced in
e.g. git.git by feeding e83c5163316 (Initial revision of "git", the
information manager from hell, 2005-04-07) to "git pack-objects";
$ echo e83c5163316f89bfbde7d9ab23ca2e25604af290 | ./git pack-objects initial
[...]
==19130==ERROR: LeakSanitizer: detected memory leaks
Direct leak of 7120 byte(s) in 1 object(s) allocated from:
#0 0x455308 in __interceptor_malloc (/home/avar/g/git/git+0x455308)
#1 0x75b399 in do_xmalloc /home/avar/g/git/wrapper.c:41:8
#2 0x75b356 in xmalloc /home/avar/g/git/wrapper.c:62:9
#3 0x5d7609 in prep_parse_options /home/avar/g/git/diff.c:5647:2
#4 0x5d415a in repo_diff_setup /home/avar/g/git/diff.c:4621:2
#5 0x6dffbb in repo_init_revisions /home/avar/g/git/revision.c:1853:2
#6 0x4f599d in cmd_pack_objects /home/avar/g/git/builtin/pack-objects.c:3980:2
#7 0x4592ca in run_builtin /home/avar/g/git/git.c:465:11
#8 0x457d81 in handle_builtin /home/avar/g/git/git.c:718:3
#9 0x458ca5 in run_argv /home/avar/g/git/git.c:785:4
#10 0x457b40 in cmd_main /home/avar/g/git/git.c:916:19
#11 0x562259 in main /home/avar/g/git/common-main.c:56:11
#12 0x7fce792ac7ec in __libc_start_main csu/../csu/libc-start.c:332:16
#13 0x4300f9 in _start (/home/avar/g/git/git+0x4300f9)
SUMMARY: LeakSanitizer: 7120 byte(s) leaked in 1 allocation(s).
Aborted
Narrowly fixing that commit would have been easy, just add call
repo_init_revisions() right before get_object_list(), which is
effectively what was done before that commit.
But an unstated constraint when setting it up early is that it was
needed for the subsequent [2] (pack-objects: parse --filter directly
into revs.filter, 2022-03-22), i.e. we might have a --filter
command-line option, and need to either have the "struct rev_info"
setup when we encounter that option, or later.
Let's just change the control flow so that we'll instead set up the
"struct rev_info" only when we need it. Doing so leads to a bit more
verbosity, but it's a lot clearer what we're doing and why.
An earlier version of this commit[3] went behind
opt_parse_list_objects_filter()'s back by faking up a "struct option"
before calling it. Let's avoid that and instead create a blessed API
for this pattern.
We could furthermore combine the two get_object_list() invocations
here by having repo_init_revisions() invoked on &pfd.revs, but I think
clearly separating the two makes the flow clearer. Likewise
redundantly but explicitly (i.e. redundant v.s. a "{ 0 }") "0" to
"have_revs" early in cmd_pack_objects().
While we're at it add parentheses around the arguments to the OPT_*
macros in in list-objects-filter-options.h, as we need to change those
lines anyway. It doesn't matter in this case, but is good general
practice.
1. https://lore.kernel.org/git/619b757d98465dbc4995bdc11a5282fbfcbd3daa.1647970119.git.gitgitgadget@gmail.com
2. https://lore.kernel.org/git/97de926904988b89b5663bd4c59c011a1723a8f5.1647970119.git.gitgitgadget@gmail.com
3. https://lore.kernel.org/git/patch-1.1-193534b0f07-20220325T121715Z-avarab@gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-28 17:43:18 +02:00
|
|
|
/**
|
|
|
|
* The opt->value to opt_parse_list_objects_filter() is either a
|
|
|
|
* "struct list_objects_filter_option *" when using
|
|
|
|
* OPT_PARSE_LIST_OBJECTS_FILTER().
|
|
|
|
*/
|
2017-11-21 21:58:50 +01:00
|
|
|
int opt_parse_list_objects_filter(const struct option *opt,
|
|
|
|
const char *arg, int unset);
|
|
|
|
|
|
|
|
#define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
|
2022-11-29 13:26:44 +01:00
|
|
|
OPT_CALLBACK(0, "filter", (fo), N_("args"), \
|
|
|
|
N_("object filtering"), opt_parse_list_objects_filter)
|
2017-11-21 21:58:50 +01:00
|
|
|
|
2019-01-08 01:17:09 +01:00
|
|
|
/*
|
|
|
|
* Translates abbreviated numbers in the filter's filter_spec into their
|
|
|
|
* fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024").
|
2019-06-28 00:54:10 +02:00
|
|
|
* Returns a string owned by the list_objects_filter_options object.
|
2019-01-08 01:17:09 +01:00
|
|
|
*
|
2019-06-28 00:54:10 +02:00
|
|
|
* This form should be used instead of the raw list_objects_filter_spec()
|
|
|
|
* value when communicating with a remote process or subprocess.
|
2019-01-08 01:17:09 +01:00
|
|
|
*/
|
2019-06-28 00:54:10 +02:00
|
|
|
const char *expand_list_objects_filter_spec(
|
|
|
|
struct list_objects_filter_options *filter);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns the filter spec string more or less in the form as the user
|
|
|
|
* entered it. This form of the filter_spec can be used in user-facing
|
|
|
|
* messages. Returns a string owned by the list_objects_filter_options
|
|
|
|
* object.
|
|
|
|
*/
|
|
|
|
const char *list_objects_filter_spec(
|
|
|
|
struct list_objects_filter_options *filter);
|
2019-01-08 01:17:09 +01:00
|
|
|
|
2017-12-05 17:50:13 +01:00
|
|
|
void list_objects_filter_release(
|
|
|
|
struct list_objects_filter_options *filter_options);
|
|
|
|
|
2017-12-08 16:58:50 +01:00
|
|
|
static inline void list_objects_filter_set_no_filter(
|
|
|
|
struct list_objects_filter_options *filter_options)
|
|
|
|
{
|
|
|
|
list_objects_filter_release(filter_options);
|
|
|
|
filter_options->no_filter = 1;
|
|
|
|
}
|
|
|
|
|
2017-12-08 16:58:45 +01:00
|
|
|
void partial_clone_register(
|
|
|
|
const char *remote,
|
2019-06-28 00:54:10 +02:00
|
|
|
struct list_objects_filter_options *filter_options);
|
2017-12-08 16:58:45 +01:00
|
|
|
void partial_clone_get_default_filter_spec(
|
2019-06-25 15:40:32 +02:00
|
|
|
struct list_objects_filter_options *filter_options,
|
|
|
|
const char *remote);
|
2017-12-08 16:58:45 +01:00
|
|
|
|
2022-03-09 17:01:32 +01:00
|
|
|
void list_objects_filter_copy(
|
|
|
|
struct list_objects_filter_options *dest,
|
|
|
|
const struct list_objects_filter_options *src);
|
|
|
|
|
2017-11-21 21:58:50 +01:00
|
|
|
#endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
|