cat-file: only split on whitespace when %(rest) is used
Commitc334b87b
(cat-file: split --batch input lines on whitespace, 2013-07-11) taught `cat-file --batch-check` to split input lines on the first whitespace, and stash everything after the first token into the %(rest) output format element. It claimed: Object names cannot contain spaces, so any input with spaces would have resulted in a "missing" line. But that is not correct. Refs, object sha1s, and various peeling suffixes cannot contain spaces, but some object names can. In particular: 1. Tree paths like "[<tree>]:path with whitespace" 2. Reflog specifications like "@{2 days ago}" 3. Commit searches like "rev^{/grep me}" or ":/grep me" To remain backwards compatible, we cannot split on whitespace by default, hence we will ship 1.8.4 with the commit reverted. Resurrect its attempt but in a weaker form; only do the splitting when "%(rest)" is used in the output format. Since that element did not exist at all beforec334b87
, old scripts cannot be affected. The existence of object names with spaces does mean that you cannot reliably do: echo ":path with space and other data" | git cat-file --batch-check="%(objectname) %(rest)" as it would split the path and feed only ":path" to get_sha1. But that command is nonsensical. If you wanted to see "and other data" in "%(rest)", git cannot possibly know where the filename ends and the "rest" begins. It might be more robust to have something like "-z" to separate the input elements. But this patch is still a reasonable step before having that. It makes the easy cases easy; people who do not care about %(rest) do not have to consider it, and the %(rest) code handles the spaces and newlines of "rev-list --objects" correctly. Hard cases remain hard but possible (if you might get whitespace in your input, you do not get to use %(rest) and must split and join the output yourself using more flexible tools). And most importantly, it does not preclude us from having different splitting rules later if a "-z" (or similar) option is added. So we can make the hard cases easier later, if we choose to. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
062aeee8aa
commit
97be04077f
@ -86,10 +86,9 @@ BATCH OUTPUT
|
||||
------------
|
||||
|
||||
If `--batch` or `--batch-check` is given, `cat-file` will read objects
|
||||
from stdin, one per line, and print information about them.
|
||||
|
||||
Each line is considered as a whole object name, and is parsed as if
|
||||
given to linkgit:git-rev-parse[1].
|
||||
from stdin, one per line, and print information about them. By default,
|
||||
the whole line is considered as an object, as if it were fed to
|
||||
linkgit:git-rev-parse[1].
|
||||
|
||||
You can specify the information shown for each object by using a custom
|
||||
`<format>`. The `<format>` is copied literally to stdout for each
|
||||
@ -110,6 +109,13 @@ newline. The available atoms are:
|
||||
The size, in bytes, that the object takes up on disk. See the
|
||||
note about on-disk sizes in the `CAVEATS` section below.
|
||||
|
||||
`rest`::
|
||||
If this atom is used in the output string, input lines are split
|
||||
at the first whitespace boundary. All characters before that
|
||||
whitespace are considered to be the object name; characters
|
||||
after that first run of whitespace (i.e., the "rest" of the
|
||||
line) are output in place of the `%(rest)` atom.
|
||||
|
||||
If no format is specified, the default format is `%(objectname)
|
||||
%(objecttype) %(objectsize)`.
|
||||
|
||||
|
@ -119,6 +119,7 @@ struct expand_data {
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
unsigned long disk_size;
|
||||
const char *rest;
|
||||
|
||||
/*
|
||||
* If mark_query is true, we do not expand anything, but rather
|
||||
@ -126,6 +127,13 @@ struct expand_data {
|
||||
*/
|
||||
int mark_query;
|
||||
|
||||
/*
|
||||
* Whether to split the input on whitespace before feeding it to
|
||||
* get_sha1; this is decided during the mark_query phase based on
|
||||
* whether we have a %(rest) token in our format.
|
||||
*/
|
||||
int split_on_whitespace;
|
||||
|
||||
/*
|
||||
* After a mark_query run, this object_info is set up to be
|
||||
* passed to sha1_object_info_extended. It will point to the data
|
||||
@ -163,6 +171,11 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
|
||||
data->info.disk_sizep = &data->disk_size;
|
||||
else
|
||||
strbuf_addf(sb, "%lu", data->disk_size);
|
||||
} else if (is_atom("rest", atom, len)) {
|
||||
if (data->mark_query)
|
||||
data->split_on_whitespace = 1;
|
||||
else if (data->rest)
|
||||
strbuf_addstr(sb, data->rest);
|
||||
} else
|
||||
die("unknown format element: %.*s", len, atom);
|
||||
}
|
||||
@ -273,7 +286,23 @@ static int batch_objects(struct batch_options *opt)
|
||||
warn_on_object_refname_ambiguity = 0;
|
||||
|
||||
while (strbuf_getline(&buf, stdin, '\n') != EOF) {
|
||||
int error = batch_one_object(buf.buf, opt, &data);
|
||||
int error;
|
||||
|
||||
if (data.split_on_whitespace) {
|
||||
/*
|
||||
* Split at first whitespace, tying off the beginning
|
||||
* of the string and saving the remainder (or NULL) in
|
||||
* data.rest.
|
||||
*/
|
||||
char *p = strpbrk(buf.buf, " \t");
|
||||
if (p) {
|
||||
while (*p && strchr(" \t", *p))
|
||||
*p++ = '\0';
|
||||
}
|
||||
data.rest = p;
|
||||
}
|
||||
|
||||
error = batch_one_object(buf.buf, opt, &data);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
@ -78,6 +78,13 @@ $content"
|
||||
echo $sha1 | git cat-file --batch-check="%(objecttype) %(objectname)" >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success '--batch-check with %(rest)' '
|
||||
echo "$type this is some extra content" >expect &&
|
||||
echo "$sha1 this is some extra content" |
|
||||
git cat-file --batch-check="%(objecttype) %(rest)" >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
}
|
||||
|
||||
hello_content="Hello World"
|
||||
@ -91,6 +98,14 @@ test_expect_success "setup" '
|
||||
|
||||
run_tests 'blob' $hello_sha1 $hello_size "$hello_content" "$hello_content"
|
||||
|
||||
test_expect_success '--batch-check without %(rest) considers whole line' '
|
||||
echo "$hello_sha1 blob $hello_size" >expect &&
|
||||
git update-index --add --cacheinfo 100644 $hello_sha1 "white space" &&
|
||||
test_when_finished "git update-index --remove \"white space\"" &&
|
||||
echo ":white space" | git cat-file --batch-check >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
tree_sha1=$(git write-tree)
|
||||
tree_size=33
|
||||
tree_pretty_content="100644 blob $hello_sha1 hello"
|
||||
|
Loading…
Reference in New Issue
Block a user