Merge branch 'jc/cat-file-batch-commands'

"git cat-file" learns "--batch-command" mode, which is a more
flexible interface than the existing "--batch" or "--batch-check"
modes, to allow different kinds of inquiries made.

* jc/cat-file-batch-commands:
  cat-file: add --batch-command mode
  cat-file: add remove_timestamp helper
  cat-file: introduce batch_mode enum to replace print_contents
  cat-file: rename cmdmode to transform_mode
This commit is contained in:
Junio C Hamano 2022-03-09 13:38:24 -08:00
commit d169d51504
3 changed files with 333 additions and 22 deletions

View File

@ -96,6 +96,33 @@ OPTIONS
need to specify the path, separated by whitespace. See the
section `BATCH OUTPUT` below for details.
--batch-command::
--batch-command=<format>::
Enter a command mode that reads commands and arguments from stdin. May
only be combined with `--buffer`, `--textconv` or `--filters`. In the
case of `--textconv` or `--filters`, the input lines also need to specify
the path, separated by whitespace. See the section `BATCH OUTPUT` below
for details.
+
`--batch-command` recognizes the following commands:
+
--
contents <object>::
Print object contents for object reference `<object>`. This corresponds to
the output of `--batch`.
info <object>::
Print object info for object reference `<object>`. This corresponds to the
output of `--batch-check`.
flush::
Used with `--buffer` to execute all preceding commands that were issued
since the beginning or since the last flush was issued. When `--buffer`
is used, no output will come until a `flush` is issued. When `--buffer`
is not used, commands are flushed each time without issuing `flush`.
--
+
--batch-all-objects::
Instead of reading a list of objects on stdin, perform the
requested batch operation on all objects in the repository and
@ -110,7 +137,7 @@ OPTIONS
that a process can interactively read and write from
`cat-file`. With this option, the output uses normal stdio
buffering; this is much more efficient when invoking
`--batch-check` on a large number of objects.
`--batch-check` or `--batch-command` on a large number of objects.
--unordered::
When `--batch-all-objects` is in use, visit objects in an
@ -202,6 +229,13 @@ from stdin, one per line, and print information about them. By default,
the whole line is considered as an object, as if it were fed to
linkgit:git-rev-parse[1].
When `--batch-command` is given, `cat-file` will read commands from stdin,
one per line, and print information based on the command given. With
`--batch-command`, the `info` command followed by an object will print
information about the object the same way `--batch-check` would, and the
`contents` command followed by an object prints contents in the same way
`--batch` would.
You can specify the information shown for each object by using a custom
`<format>`. The `<format>` is copied literally to stdout for each
object, with placeholders of the form `%(atom)` expanded, followed by a
@ -237,9 +271,9 @@ newline. The available atoms are:
If no format is specified, the default format is `%(objectname)
%(objecttype) %(objectsize)`.
If `--batch` is specified, the object information is followed by the
object contents (consisting of `%(objectsize)` bytes), followed by a
newline.
If `--batch` is specified, or if `--batch-command` is used with the `contents`
command, the object information is followed by the object contents (consisting
of `%(objectsize)` bytes), followed by a newline.
For example, `--batch` without a custom format would produce:

View File

@ -17,14 +17,20 @@
#include "object-store.h"
#include "promisor-remote.h"
enum batch_mode {
BATCH_MODE_CONTENTS,
BATCH_MODE_INFO,
BATCH_MODE_QUEUE_AND_DISPATCH,
};
struct batch_options {
int enabled;
int follow_symlinks;
int print_contents;
enum batch_mode batch_mode;
int buffer_output;
int all_objects;
int unordered;
int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
int transform_mode; /* may be 'w' or 'c' for --filters or --textconv */
const char *format;
};
@ -302,19 +308,19 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
if (data->type == OBJ_BLOB) {
if (opt->buffer_output)
fflush(stdout);
if (opt->cmdmode) {
if (opt->transform_mode) {
char *contents;
unsigned long size;
if (!data->rest)
die("missing path for '%s'", oid_to_hex(oid));
if (opt->cmdmode == 'w') {
if (opt->transform_mode == 'w') {
if (filter_object(data->rest, 0100644, oid,
&contents, &size))
die("could not convert '%s' %s",
oid_to_hex(oid), data->rest);
} else if (opt->cmdmode == 'c') {
} else if (opt->transform_mode == 'c') {
enum object_type type;
if (!textconv_object(the_repository,
data->rest, 0100644, oid,
@ -326,7 +332,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
die("could not convert '%s' %s",
oid_to_hex(oid), data->rest);
} else
BUG("invalid cmdmode: %c", opt->cmdmode);
BUG("invalid transform_mode: %c", opt->transform_mode);
batch_write(opt, contents, size);
free(contents);
} else {
@ -386,7 +392,7 @@ static void batch_object_write(const char *obj_name,
strbuf_addch(scratch, '\n');
batch_write(opt, scratch->buf, scratch->len);
if (opt->print_contents) {
if (opt->batch_mode == BATCH_MODE_CONTENTS) {
print_object_or_die(opt, data);
batch_write(opt, "\n", 1);
}
@ -508,6 +514,135 @@ static int batch_unordered_packed(const struct object_id *oid,
data);
}
typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
struct strbuf *, struct expand_data *);
struct queued_cmd {
parse_cmd_fn_t fn;
char *line;
};
static void parse_cmd_contents(struct batch_options *opt,
const char *line,
struct strbuf *output,
struct expand_data *data)
{
opt->batch_mode = BATCH_MODE_CONTENTS;
batch_one_object(line, output, opt, data);
}
static void parse_cmd_info(struct batch_options *opt,
const char *line,
struct strbuf *output,
struct expand_data *data)
{
opt->batch_mode = BATCH_MODE_INFO;
batch_one_object(line, output, opt, data);
}
static void dispatch_calls(struct batch_options *opt,
struct strbuf *output,
struct expand_data *data,
struct queued_cmd *cmd,
int nr)
{
int i;
if (!opt->buffer_output)
die(_("flush is only for --buffer mode"));
for (i = 0; i < nr; i++)
cmd[i].fn(opt, cmd[i].line, output, data);
fflush(stdout);
}
static void free_cmds(struct queued_cmd *cmd, size_t *nr)
{
size_t i;
for (i = 0; i < *nr; i++)
FREE_AND_NULL(cmd[i].line);
*nr = 0;
}
static const struct parse_cmd {
const char *name;
parse_cmd_fn_t fn;
unsigned takes_args;
} commands[] = {
{ "contents", parse_cmd_contents, 1},
{ "info", parse_cmd_info, 1},
{ "flush", NULL, 0},
};
static void batch_objects_command(struct batch_options *opt,
struct strbuf *output,
struct expand_data *data)
{
struct strbuf input = STRBUF_INIT;
struct queued_cmd *queued_cmd = NULL;
size_t alloc = 0, nr = 0;
while (!strbuf_getline(&input, stdin)) {
int i;
const struct parse_cmd *cmd = NULL;
const char *p = NULL, *cmd_end;
struct queued_cmd call = {0};
if (!input.len)
die(_("empty command in input"));
if (isspace(*input.buf))
die(_("whitespace before command: '%s'"), input.buf);
for (i = 0; i < ARRAY_SIZE(commands); i++) {
if (!skip_prefix(input.buf, commands[i].name, &cmd_end))
continue;
cmd = &commands[i];
if (cmd->takes_args) {
if (*cmd_end != ' ')
die(_("%s requires arguments"),
commands[i].name);
p = cmd_end + 1;
} else if (*cmd_end) {
die(_("%s takes no arguments"),
commands[i].name);
}
break;
}
if (!cmd)
die(_("unknown command: '%s'"), input.buf);
if (!strcmp(cmd->name, "flush")) {
dispatch_calls(opt, output, data, queued_cmd, nr);
free_cmds(queued_cmd, &nr);
} else if (!opt->buffer_output) {
cmd->fn(opt, p, output, data);
} else {
ALLOC_GROW(queued_cmd, nr + 1, alloc);
call.fn = cmd->fn;
call.line = xstrdup_or_null(p);
queued_cmd[nr++] = call;
}
}
if (opt->buffer_output &&
nr &&
!git_env_bool("GIT_TEST_CAT_FILE_NO_FLUSH_ON_EXIT", 0)) {
dispatch_calls(opt, output, data, queued_cmd, nr);
free_cmds(queued_cmd, &nr);
}
free(queued_cmd);
strbuf_release(&input);
}
static int batch_objects(struct batch_options *opt)
{
struct strbuf input = STRBUF_INIT;
@ -529,14 +664,14 @@ static int batch_objects(struct batch_options *opt)
strbuf_expand(&output, opt->format, expand_format, &data);
data.mark_query = 0;
strbuf_release(&output);
if (opt->cmdmode)
if (opt->transform_mode)
data.split_on_whitespace = 1;
/*
* If we are printing out the object, then always fill in the type,
* since we will want to decide whether or not to stream.
*/
if (opt->print_contents)
if (opt->batch_mode == BATCH_MODE_CONTENTS)
data.info.typep = &data.type;
if (opt->all_objects) {
@ -590,6 +725,11 @@ static int batch_objects(struct batch_options *opt)
save_warning = warn_on_object_refname_ambiguity;
warn_on_object_refname_ambiguity = 0;
if (opt->batch_mode == BATCH_MODE_QUEUE_AND_DISPATCH) {
batch_objects_command(opt, &output, &data);
goto cleanup;
}
while (strbuf_getline(&input, stdin) != EOF) {
if (data.split_on_whitespace) {
/*
@ -608,6 +748,7 @@ static int batch_objects(struct batch_options *opt)
batch_one_object(input.buf, &output, opt, &data);
}
cleanup:
strbuf_release(&input);
strbuf_release(&output);
warn_on_object_refname_ambiguity = save_warning;
@ -635,7 +776,16 @@ static int batch_option_callback(const struct option *opt,
}
bo->enabled = 1;
bo->print_contents = !strcmp(opt->long_name, "batch");
if (!strcmp(opt->long_name, "batch"))
bo->batch_mode = BATCH_MODE_CONTENTS;
else if (!strcmp(opt->long_name, "batch-check"))
bo->batch_mode = BATCH_MODE_INFO;
else if (!strcmp(opt->long_name, "batch-command"))
bo->batch_mode = BATCH_MODE_QUEUE_AND_DISPATCH;
else
BUG("%s given to batch-option-callback", opt->long_name);
bo->format = arg;
return 0;
@ -654,7 +804,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
N_("git cat-file <type> <object>"),
N_("git cat-file (-e | -p) <object>"),
N_("git cat-file (-t | -s) [--allow-unknown-type] <object>"),
N_("git cat-file (--batch | --batch-check) [--batch-all-objects]\n"
N_("git cat-file (--batch | --batch-check | --batch-command) [--batch-all-objects]\n"
" [--buffer] [--follow-symlinks] [--unordered]\n"
" [--textconv | --filters]"),
N_("git cat-file (--textconv | --filters)\n"
@ -683,6 +833,10 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
N_("like --batch, but don't emit <contents>"),
PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
batch_option_callback),
OPT_CALLBACK_F(0, "batch-command", &batch, N_("format"),
N_("read commands from stdin"),
PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
batch_option_callback),
OPT_CMDMODE(0, "batch-all-objects", &opt,
N_("with --batch[-check]: ignores stdin, batches all known objects"), 'b'),
/* Batch-specific options */
@ -742,7 +896,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
/* Return early if we're in batch mode? */
if (batch.enabled) {
if (opt_cw)
batch.cmdmode = opt;
batch.transform_mode = opt;
else if (opt && opt != 'b')
usage_msg_optf(_("'-%c' is incompatible with batch mode"),
usage, options, opt);

View File

@ -105,13 +105,18 @@ strlen () {
}
maybe_remove_timestamp () {
if test -z "$2"; then
echo_without_newline "$1"
else
echo_without_newline "$(printf '%s\n' "$1" | sed -e 's/ [0-9][0-9]* [-+][0-9][0-9][0-9][0-9]$//')"
fi
if test -z "$2"; then
echo_without_newline "$1"
else
echo_without_newline "$(printf '%s\n' "$1" | remove_timestamp)"
fi
}
remove_timestamp () {
sed -e 's/ [0-9][0-9]* [-+][0-9][0-9][0-9][0-9]$//'
}
run_tests () {
type=$1
sha1=$2
@ -177,12 +182,36 @@ $content"
test_cmp expect actual
'
for opt in --buffer --no-buffer
do
test -z "$content" ||
test_expect_success "--batch-command $opt output of $type content is correct" '
maybe_remove_timestamp "$batch_output" $no_ts >expect &&
maybe_remove_timestamp "$(test_write_lines "contents $sha1" |
git cat-file --batch-command $opt)" $no_ts >actual &&
test_cmp expect actual
'
test_expect_success "--batch-command $opt output of $type info is correct" '
echo "$sha1 $type $size" >expect &&
test_write_lines "info $sha1" |
git cat-file --batch-command $opt >actual &&
test_cmp expect actual
'
done
test_expect_success "custom --batch-check format" '
echo "$type $sha1" >expect &&
echo $sha1 | git cat-file --batch-check="%(objecttype) %(objectname)" >actual &&
test_cmp expect actual
'
test_expect_success "custom --batch-command format" '
echo "$type $sha1" >expect &&
echo "info $sha1" | git cat-file --batch-command="%(objecttype) %(objectname)" >actual &&
test_cmp expect actual
'
test_expect_success '--batch-check with %(rest)' '
echo "$type this is some extra content" >expect &&
echo "$sha1 this is some extra content" |
@ -224,6 +253,22 @@ test_expect_success "setup" '
run_tests 'blob' $hello_sha1 $hello_size "$hello_content" "$hello_content"
test_expect_success '--batch-command --buffer with flush for blob info' '
echo "$hello_sha1 blob $hello_size" >expect &&
test_write_lines "info $hello_sha1" "flush" |
GIT_TEST_CAT_FILE_NO_FLUSH_ON_EXIT=1 \
git cat-file --batch-command --buffer >actual &&
test_cmp expect actual
'
test_expect_success '--batch-command --buffer without flush for blob info' '
touch output &&
test_write_lines "info $hello_sha1" |
GIT_TEST_CAT_FILE_NO_FLUSH_ON_EXIT=1 \
git cat-file --batch-command --buffer >>output &&
test_must_be_empty output
'
test_expect_success '--batch-check without %(rest) considers whole line' '
echo "$hello_sha1 blob $hello_size" >expect &&
git update-index --add --cacheinfo 100644 $hello_sha1 "white space" &&
@ -267,7 +312,7 @@ test_expect_success \
"Reach a blob from a tag pointing to it" \
"test '$hello_content' = \"\$(git cat-file blob $tag_sha1)\""
for batch in batch batch-check
for batch in batch batch-check batch-command
do
for opt in t s e p
do
@ -373,6 +418,49 @@ test_expect_success "--batch-check with multiple sha1s gives correct format" '
"$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)"
'
test_expect_success '--batch-command with multiple info calls gives correct format' '
cat >expect <<-EOF &&
$hello_sha1 blob $hello_size
$tree_sha1 tree $tree_size
$commit_sha1 commit $commit_size
$tag_sha1 tag $tag_size
deadbeef missing
EOF
git cat-file --batch-command --buffer >actual <<-EOF &&
info $hello_sha1
info $tree_sha1
info $commit_sha1
info $tag_sha1
info deadbeef
EOF
test_cmp expect actual
'
test_expect_success '--batch-command with multiple command calls gives correct format' '
remove_timestamp >expect <<-EOF &&
$hello_sha1 blob $hello_size
$hello_content
$commit_sha1 commit $commit_size
$commit_content
$tag_sha1 tag $tag_size
$tag_content
deadbeef missing
EOF
git cat-file --batch-command --buffer >actual_raw <<-EOF &&
contents $hello_sha1
contents $commit_sha1
contents $tag_sha1
contents deadbeef
flush
EOF
remove_timestamp <actual_raw >actual &&
test_cmp expect actual
'
test_expect_success 'setup blobs which are likely to delta' '
test-tool genrandom foo 10240 >foo &&
{ cat foo && echo plus; } >foo-plus &&
@ -963,5 +1051,40 @@ test_expect_success 'cat-file --batch-all-objects --batch-check ignores replace'
echo "$orig commit $orig_size" >expect &&
test_cmp expect actual
'
test_expect_success 'batch-command empty command' '
echo "" >cmd &&
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
grep "^fatal:.*empty command in input.*" err
'
test_expect_success 'batch-command whitespace before command' '
echo " info deadbeef" >cmd &&
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
grep "^fatal:.*whitespace before command.*" err
'
test_expect_success 'batch-command unknown command' '
echo unknown_command >cmd &&
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
grep "^fatal:.*unknown command.*" err
'
test_expect_success 'batch-command missing arguments' '
echo "info" >cmd &&
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
grep "^fatal:.*info requires arguments.*" err
'
test_expect_success 'batch-command flush with arguments' '
echo "flush arg" >cmd &&
test_expect_code 128 git cat-file --batch-command --buffer <cmd 2>err &&
grep "^fatal:.*flush takes no arguments.*" err
'
test_expect_success 'batch-command flush without --buffer' '
echo "flush" >cmd &&
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
grep "^fatal:.*flush is only for --buffer mode.*" err
'
test_done