Merge branch 'jk/cat-file-batch-optim'

Rework the reverted change to `cat-file --batch-check`. * jk/cat-file-batch-optim: cat-file: only split on whitespace when %(rest) is used
2013-09-09 14:33:07 -07:00 · 2013-09-09 14:33:07 -07:00 · 24703ead4b
commit 24703ead4b
parent 118b9d5836 97be04077f
3 changed files with 55 additions and 5 deletions
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@ -86,10 +86,9 @@ BATCH OUTPUT
 ------------

 If `--batch` or `--batch-check` is given, `cat-file` will read objects
-from stdin, one per line, and print information about them.
-
-Each line is considered as a whole object name, and is parsed as if
-given to linkgit:git-rev-parse[1].
+from stdin, one per line, and print information about them. By default,
+the whole line is considered as an object, as if it were fed to
+linkgit:git-rev-parse[1].

 You can specify the information shown for each object by using a custom
 `<format>`. The `<format>` is copied literally to stdout for each
@ -110,6 +109,13 @@ newline. The available atoms are:
 	The size, in bytes, that the object takes up on disk. See the
 	note about on-disk sizes in the `CAVEATS` section below.

+`rest`::
+	If this atom is used in the output string, input lines are split
+	at the first whitespace boundary. All characters before that
+	whitespace are considered to be the object name; characters
+	after that first run of whitespace (i.e., the "rest" of the
+	line) are output in place of the `%(rest)` atom.
+
 If no format is specified, the default format is `%(objectname)
 %(objecttype) %(objectsize)`.

--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@ -119,6 +119,7 @@ struct expand_data {
 	enum object_type type;
 	unsigned long size;
 	unsigned long disk_size;
+	const char *rest;

 	/*
 	 * If mark_query is true, we do not expand anything, but rather
@ -126,6 +127,13 @@ struct expand_data {
 	 */
 	int mark_query;

+	/*
+	 * Whether to split the input on whitespace before feeding it to
+	 * get_sha1; this is decided during the mark_query phase based on
+	 * whether we have a %(rest) token in our format.
+	 */
+	int split_on_whitespace;
+
 	/*
 	 * After a mark_query run, this object_info is set up to be
 	 * passed to sha1_object_info_extended. It will point to the data
@ -163,6 +171,11 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
 			data->info.disk_sizep = &data->disk_size;
 		else
 			strbuf_addf(sb, "%lu", data->disk_size);
+	} else if (is_atom("rest", atom, len)) {
+		if (data->mark_query)
+			data->split_on_whitespace = 1;
+		else if (data->rest)
+			strbuf_addstr(sb, data->rest);
 	} else
 		die("unknown format element: %.*s", len, atom);
 }
@ -273,7 +286,23 @@ static int batch_objects(struct batch_options *opt)
 	warn_on_object_refname_ambiguity = 0;

 	while (strbuf_getline(&buf, stdin, '\n') != EOF) {
-		int error = batch_one_object(buf.buf, opt, &data);
+		int error;
+
+		if (data.split_on_whitespace) {
+			/*
+			 * Split at first whitespace, tying off the beginning
+			 * of the string and saving the remainder (or NULL) in
+			 * data.rest.
+			 */
+			char *p = strpbrk(buf.buf, " \t");
+			if (p) {
+				while (*p && strchr(" \t", *p))
+					*p++ = '\0';
+			}
+			data.rest = p;
+		}
+
+		error = batch_one_object(buf.buf, opt, &data);
 		if (error)
 			return error;
 	}
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@ -78,6 +78,13 @@ $content"
 	echo $sha1 | git cat-file --batch-check="%(objecttype) %(objectname)" >actual &&
 	test_cmp expect actual
    '
+
+    test_expect_success '--batch-check with %(rest)' '
+	echo "$type this is some extra content" >expect &&
+	echo "$sha1    this is some extra content" |
+		git cat-file --batch-check="%(objecttype) %(rest)" >actual &&
+	test_cmp expect actual
+    '
 }

 hello_content="Hello World"
@ -91,6 +98,14 @@ test_expect_success "setup" '

 run_tests 'blob' $hello_sha1 $hello_size "$hello_content" "$hello_content"

+test_expect_success '--batch-check without %(rest) considers whole line' '
+	echo "$hello_sha1 blob $hello_size" >expect &&
+	git update-index --add --cacheinfo 100644 $hello_sha1 "white space" &&
+	test_when_finished "git update-index --remove \"white space\"" &&
+	echo ":white space" | git cat-file --batch-check >actual &&
+	test_cmp expect actual
+'
+
 tree_sha1=$(git write-tree)
 tree_size=33
 tree_pretty_content="100644 blob $hello_sha1	hello"