Merge branch 'jk/in-pack-size-measurement'

"git cat-file --batch-check=<format>" is added, primarily to allow on-disk footprint of objects in packfiles (often they are a lot smaller than their true size, when expressed as deltas) to be reported. * jk/in-pack-size-measurement: pack-revindex: radix-sort the revindex pack-revindex: use unsigned to store number of objects cat-file: split --batch input lines on whitespace cat-file: add %(objectsize:disk) format atom cat-file: add --batch-check=<format> cat-file: refactor --batch option parsing cat-file: teach --batch to stream blob objects t1006: modernize output comparisons teach sha1_object_info_extended a "disk_size" query zero-initialize object_info structs
2013-07-18 12:59:41 -07:00 · 2013-07-18 12:59:41 -07:00 · 802f878b86
commit 802f878b86
parent b12aecda2c 8b8dfd5132
7 changed files with 390 additions and 106 deletions
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@ -58,12 +58,16 @@ OPTIONS
 	to apply the filter to the content recorded in the index at <path>.

 --batch::
-	Print the SHA-1, type, size, and contents of each object provided on
-	stdin. May not be combined with any other options or arguments.
+--batch=<format>::
+	Print object information and contents for each object provided
+	on stdin.  May not be combined with any other options or arguments.
+	See the section `BATCH OUTPUT` below for details.

 --batch-check::
-	Print the SHA-1, type, and size of each object provided on stdin. May not
-	be combined with any other options or arguments.
+--batch-check=<format>::
+	Print object information for each object provided on stdin.  May
+	not be combined with any other options or arguments.  See the
+	section `BATCH OUTPUT` below for details.

 OUTPUT
 ------
@ -78,28 +82,81 @@ If '-p' is specified, the contents of <object> are pretty-printed.
 If <type> is specified, the raw (though uncompressed) contents of the <object>
 will be returned.

-If '--batch' is specified, output of the following form is printed for each
-object specified on stdin:
+BATCH OUTPUT
+------------
+
+If `--batch` or `--batch-check` is given, `cat-file` will read objects
+from stdin, one per line, and print information about them.
+
+Each line is split at the first whitespace boundary. All characters
+before that whitespace are considered as a whole object name, and are
+parsed as if given to linkgit:git-rev-parse[1]. Characters after that
+whitespace can be accessed using the `%(rest)` atom (see below).
+
+You can specify the information shown for each object by using a custom
+`<format>`. The `<format>` is copied literally to stdout for each
+object, with placeholders of the form `%(atom)` expanded, followed by a
+newline. The available atoms are:
+
+`objectname`::
+	The 40-hex object name of the object.
+
+`objecttype`::
+	The type of of the object (the same as `cat-file -t` reports).
+
+`objectsize`::
+	The size, in bytes, of the object (the same as `cat-file -s`
+	reports).
+
+`objectsize:disk`::
+	The size, in bytes, that the object takes up on disk. See the
+	note about on-disk sizes in the `CAVEATS` section below.
+
+`rest`::
+	The text (if any) found after the first run of whitespace on the
+	input line (i.e., the "rest" of the line).
+
+If no format is specified, the default format is `%(objectname)
+%(objecttype) %(objectsize)`.
+
+If `--batch` is specified, the object information is followed by the
+object contents (consisting of `%(objectsize)` bytes), followed by a
+newline.
+
+For example, `--batch` without a custom format would produce:

 ------------
 <sha1> SP <type> SP <size> LF
 <contents> LF
 ------------

-If '--batch-check' is specified, output of the following form is printed for
-each object specified on stdin:
+Whereas `--batch-check='%(objectname) %(objecttype)'` would produce:

 ------------
-<sha1> SP <type> SP <size> LF
+<sha1> SP <type> LF
 ------------

-For both '--batch' and '--batch-check', output of the following form is printed
-for each object specified on stdin that does not exist in the repository:
+If a name is specified on stdin that cannot be resolved to an object in
+the repository, then `cat-file` will ignore any custom format and print:

 ------------
 <object> SP missing LF
 ------------

+
+CAVEATS
+-------
+
+Note that the sizes of objects on disk are reported accurately, but care
+should be taken in drawing conclusions about which refs or objects are
+responsible for disk usage. The size of a packed non-delta object may be
+much larger than the size of objects which delta against it, but the
+choice of which object is the base and which is the delta is arbitrary
+and is subject to change during a repack. Note also that multiple copies
+of an object may be present in the object database; in this case, it is
+undefined which copy's size will be reported.
+
+
 GIT
 ---
 Part of the linkgit:git[1] suite
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@ -13,9 +13,6 @@
 #include "userdiff.h"
 #include "streaming.h"

-#define BATCH 1
-#define BATCH_CHECK 2
-
 static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 {
 	unsigned char sha1[20];
@ -117,54 +114,174 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 	return 0;
 }

-static int batch_one_object(const char *obj_name, int print_contents)
-{
+struct expand_data {
 	unsigned char sha1[20];
-	enum object_type type = 0;
+	enum object_type type;
 	unsigned long size;
-	void *contents = NULL;
+	unsigned long disk_size;
+	const char *rest;
+
+	/*
+	 * If mark_query is true, we do not expand anything, but rather
+	 * just mark the object_info with items we wish to query.
+	 */
+	int mark_query;
+
+	/*
+	 * After a mark_query run, this object_info is set up to be
+	 * passed to sha1_object_info_extended. It will point to the data
+	 * elements above, so you can retrieve the response from there.
+	 */
+	struct object_info info;
+};
+
+static int is_atom(const char *atom, const char *s, int slen)
+{
+	int alen = strlen(atom);
+	return alen == slen && !memcmp(atom, s, alen);
+}
+
+static void expand_atom(struct strbuf *sb, const char *atom, int len,
+			void *vdata)
+{
+	struct expand_data *data = vdata;
+
+	if (is_atom("objectname", atom, len)) {
+		if (!data->mark_query)
+			strbuf_addstr(sb, sha1_to_hex(data->sha1));
+	} else if (is_atom("objecttype", atom, len)) {
+		if (!data->mark_query)
+			strbuf_addstr(sb, typename(data->type));
+	} else if (is_atom("objectsize", atom, len)) {
+		if (data->mark_query)
+			data->info.sizep = &data->size;
+		else
+			strbuf_addf(sb, "%lu", data->size);
+	} else if (is_atom("objectsize:disk", atom, len)) {
+		if (data->mark_query)
+			data->info.disk_sizep = &data->disk_size;
+		else
+			strbuf_addf(sb, "%lu", data->disk_size);
+	} else if (is_atom("rest", atom, len)) {
+		if (!data->mark_query && data->rest)
+			strbuf_addstr(sb, data->rest);
+	} else
+		die("unknown format element: %.*s", len, atom);
+}
+
+static size_t expand_format(struct strbuf *sb, const char *start, void *data)
+{
+	const char *end;
+
+	if (*start != '(')
+		return 0;
+	end = strchr(start + 1, ')');
+	if (!end)
+		die("format element '%s' does not end in ')'", start);
+
+	expand_atom(sb, start + 1, end - start - 1, data);
+
+	return end - start + 1;
+}
+
+static void print_object_or_die(int fd, const unsigned char *sha1,
+				enum object_type type, unsigned long size)
+{
+	if (type == OBJ_BLOB) {
+		if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0)
+			die("unable to stream %s to stdout", sha1_to_hex(sha1));
+	}
+	else {
+		enum object_type rtype;
+		unsigned long rsize;
+		void *contents;
+
+		contents = read_sha1_file(sha1, &rtype, &rsize);
+		if (!contents)
+			die("object %s disappeared", sha1_to_hex(sha1));
+		if (rtype != type)
+			die("object %s changed type!?", sha1_to_hex(sha1));
+		if (rsize != size)
+			die("object %s change size!?", sha1_to_hex(sha1));
+
+		write_or_die(fd, contents, size);
+		free(contents);
+	}
+}
+
+struct batch_options {
+	int enabled;
+	int print_contents;
+	const char *format;
+};
+
+static int batch_one_object(const char *obj_name, struct batch_options *opt,
+			    struct expand_data *data)
+{
+	struct strbuf buf = STRBUF_INIT;

 	if (!obj_name)
 	   return 1;

-	if (get_sha1(obj_name, sha1)) {
+	if (get_sha1(obj_name, data->sha1)) {
 		printf("%s missing\n", obj_name);
 		fflush(stdout);
 		return 0;
 	}

-	if (print_contents == BATCH)
-		contents = read_sha1_file(sha1, &type, &size);
-	else
-		type = sha1_object_info(sha1, &size);
-
-	if (type <= 0) {
+	data->type = sha1_object_info_extended(data->sha1, &data->info);
+	if (data->type <= 0) {
 		printf("%s missing\n", obj_name);
 		fflush(stdout);
-		if (print_contents == BATCH)
-			free(contents);
 		return 0;
 	}

-	printf("%s %s %lu\n", sha1_to_hex(sha1), typename(type), size);
-	fflush(stdout);
+	strbuf_expand(&buf, opt->format, expand_format, data);
+	strbuf_addch(&buf, '\n');
+	write_or_die(1, buf.buf, buf.len);
+	strbuf_release(&buf);

-	if (print_contents == BATCH) {
-		write_or_die(1, contents, size);
-		printf("\n");
-		fflush(stdout);
-		free(contents);
+	if (opt->print_contents) {
+		print_object_or_die(1, data->sha1, data->type, data->size);
+		write_or_die(1, "\n", 1);
 	}
-
 	return 0;
 }

-static int batch_objects(int print_contents)
+static int batch_objects(struct batch_options *opt)
 {
 	struct strbuf buf = STRBUF_INIT;
+	struct expand_data data;
+
+	if (!opt->format)
+		opt->format = "%(objectname) %(objecttype) %(objectsize)";
+
+	/*
+	 * Expand once with our special mark_query flag, which will prime the
+	 * object_info to be handed to sha1_object_info_extended for each
+	 * object.
+	 */
+	memset(&data, 0, sizeof(data));
+	data.mark_query = 1;
+	strbuf_expand(&buf, opt->format, expand_format, &data);
+	data.mark_query = 0;

 	while (strbuf_getline(&buf, stdin, '\n') != EOF) {
-		int error = batch_one_object(buf.buf, print_contents);
+		char *p;
+		int error;
+
+		/*
+		 * Split at first whitespace, tying off the beginning of the
+		 * string and saving the remainder (or NULL) in data.rest.
+		 */
+		p = strpbrk(buf.buf, " \t");
+		if (p) {
+			while (*p && strchr(" \t", *p))
+				*p++ = '\0';
+		}
+		data.rest = p;
+
+		error = batch_one_object(buf.buf, opt, &data);
 		if (error)
 			return error;
 	}
@ -186,10 +303,29 @@ static int git_cat_file_config(const char *var, const char *value, void *cb)
 	return git_default_config(var, value, cb);
 }

+static int batch_option_callback(const struct option *opt,
+				 const char *arg,
+				 int unset)
+{
+	struct batch_options *bo = opt->value;
+
+	if (unset) {
+		memset(bo, 0, sizeof(*bo));
+		return 0;
+	}
+
+	bo->enabled = 1;
+	bo->print_contents = !strcmp(opt->long_name, "batch");
+	bo->format = arg;
+
+	return 0;
+}
+
 int cmd_cat_file(int argc, const char **argv, const char *prefix)
 {
-	int opt = 0, batch = 0;
+	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
+	struct batch_options batch = {0};

 	const struct option options[] = {
 		OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
@ -200,12 +336,12 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
 		OPT_SET_INT(0, "textconv", &opt,
 			    N_("for blob objects, run textconv on object's content"), 'c'),
-		OPT_SET_INT(0, "batch", &batch,
-			    N_("show info and content of objects fed from the standard input"),
-			    BATCH),
-		OPT_SET_INT(0, "batch-check", &batch,
-			    N_("show info about objects fed from the standard input"),
-			    BATCH_CHECK),
+		{ OPTION_CALLBACK, 0, "batch", &batch, "format",
+			N_("show info and content of objects fed from the standard input"),
+			PARSE_OPT_OPTARG, batch_option_callback },
+		{ OPTION_CALLBACK, 0, "batch-check", &batch, "format",
+			N_("show info about objects fed from the standard input"),
+			PARSE_OPT_OPTARG, batch_option_callback },
 		OPT_END()
 	};

@ -222,19 +358,19 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		else
 			usage_with_options(cat_file_usage, options);
 	}
-	if (!opt && !batch) {
+	if (!opt && !batch.enabled) {
 		if (argc == 2) {
 			exp_type = argv[0];
 			obj_name = argv[1];
 		} else
 			usage_with_options(cat_file_usage, options);
 	}
-	if (batch && (opt || argc)) {
+	if (batch.enabled && (opt || argc)) {
 		usage_with_options(cat_file_usage, options);
 	}

-	if (batch)
-		return batch_objects(batch);
+	if (batch.enabled)
+		return batch_objects(&batch);

 	return cat_one_file(opt, exp_type, obj_name);
 }
--- a/cache.h
+++ b/cache.h
@ -1130,6 +1130,7 @@ extern int unpack_object_header(struct packed_git *, struct pack_window **, off_
 struct object_info {
 	/* Request */
 	unsigned long *sizep;
+	unsigned long *disk_sizep;

 	/* Response */
 	enum {
--- a/pack-revindex.c
+++ b/pack-revindex.c
@ -59,11 +59,101 @@ static void init_pack_revindex(void)
 	/* revindex elements are lazily initialized */
 }

-static int cmp_offset(const void *a_, const void *b_)
+/*
+ * This is a least-significant-digit radix sort.
+ *
+ * It sorts each of the "n" items in "entries" by its offset field. The "max"
+ * parameter must be at least as large as the largest offset in the array,
+ * and lets us quit the sort early.
+ */
+static void sort_revindex(struct revindex_entry *entries, unsigned n, off_t max)
 {
-	const struct revindex_entry *a = a_;
-	const struct revindex_entry *b = b_;
-	return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
+	/*
+	 * We use a "digit" size of 16 bits. That keeps our memory
+	 * usage reasonable, and we can generally (for a 4G or smaller
+	 * packfile) quit after two rounds of radix-sorting.
+	 */
+#define DIGIT_SIZE (16)
+#define BUCKETS (1 << DIGIT_SIZE)
+	/*
+	 * We want to know the bucket that a[i] will go into when we are using
+	 * the digit that is N bits from the (least significant) end.
+	 */
+#define BUCKET_FOR(a, i, bits) (((a)[(i)].offset >> (bits)) & (BUCKETS-1))
+
+	/*
+	 * We need O(n) temporary storage. Rather than do an extra copy of the
+	 * partial results into "entries", we sort back and forth between the
+	 * real array and temporary storage. In each iteration of the loop, we
+	 * keep track of them with alias pointers, always sorting from "from"
+	 * to "to".
+	 */
+	struct revindex_entry *tmp = xmalloc(n * sizeof(*tmp));
+	struct revindex_entry *from = entries, *to = tmp;
+	int bits;
+	unsigned *pos = xmalloc(BUCKETS * sizeof(*pos));
+
+	/*
+	 * If (max >> bits) is zero, then we know that the radix digit we are
+	 * on (and any higher) will be zero for all entries, and our loop will
+	 * be a no-op, as everybody lands in the same zero-th bucket.
+	 */
+	for (bits = 0; max >> bits; bits += DIGIT_SIZE) {
+		struct revindex_entry *swap;
+		unsigned i;
+
+		memset(pos, 0, BUCKETS * sizeof(*pos));
+
+		/*
+		 * We want pos[i] to store the index of the last element that
+		 * will go in bucket "i" (actually one past the last element).
+		 * To do this, we first count the items that will go in each
+		 * bucket, which gives us a relative offset from the last
+		 * bucket. We can then cumulatively add the index from the
+		 * previous bucket to get the true index.
+		 */
+		for (i = 0; i < n; i++)
+			pos[BUCKET_FOR(from, i, bits)]++;
+		for (i = 1; i < BUCKETS; i++)
+			pos[i] += pos[i-1];
+
+		/*
+		 * Now we can drop the elements into their correct buckets (in
+		 * our temporary array).  We iterate the pos counter backwards
+		 * to avoid using an extra index to count up. And since we are
+		 * going backwards there, we must also go backwards through the
+		 * array itself, to keep the sort stable.
+		 *
+		 * Note that we use an unsigned iterator to make sure we can
+		 * handle 2^32-1 objects, even on a 32-bit system. But this
+		 * means we cannot use the more obvious "i >= 0" loop condition
+		 * for counting backwards, and must instead check for
+		 * wrap-around with UINT_MAX.
+		 */
+		for (i = n - 1; i != UINT_MAX; i--)
+			to[--pos[BUCKET_FOR(from, i, bits)]] = from[i];
+
+		/*
+		 * Now "to" contains the most sorted list, so we swap "from" and
+		 * "to" for the next iteration.
+		 */
+		swap = from;
+		from = to;
+		to = swap;
+	}
+
+	/*
+	 * If we ended with our data in the original array, great. If not,
+	 * we have to move it back from the temporary storage.
+	 */
+	if (from != entries)
+		memcpy(entries, tmp, n * sizeof(*entries));
+	free(tmp);
+	free(pos);
+
+#undef BUCKET_FOR
+#undef BUCKETS
+#undef DIGIT_SIZE
 }

 /*
@ -72,8 +162,8 @@ static int cmp_offset(const void *a_, const void *b_)
 static void create_pack_revindex(struct pack_revindex *rix)
 {
 	struct packed_git *p = rix->p;
-	int num_ent = p->num_objects;
-	int i;
+	unsigned num_ent = p->num_objects;
+	unsigned i;
 	const char *index = p->index_data;

 	rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
@ -108,13 +198,13 @@ static void create_pack_revindex(struct pack_revindex *rix)
 	 */
 	rix->revindex[num_ent].offset = p->pack_size - 20;
 	rix->revindex[num_ent].nr = -1;
-	qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
+	sort_revindex(rix->revindex, num_ent, p->pack_size);
 }

 struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs)
 {
 	int num;
-	int lo, hi;
+	unsigned lo, hi;
 	struct pack_revindex *rix;
 	struct revindex_entry *revindex;

@ -132,7 +222,7 @@ struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs)
 	lo = 0;
 	hi = p->num_objects + 1;
 	do {
-		int mi = (lo + hi) / 2;
+		unsigned mi = lo + (hi - lo) / 2;
 		if (revindex[mi].offset == ofs) {
 			return revindex + mi;
 		} else if (ofs < revindex[mi].offset)
--- a/sha1_file.c
+++ b/sha1_file.c
@ -1697,7 +1697,8 @@ static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
 #define POI_STACK_PREALLOC 64

 static int packed_object_info(struct packed_git *p, off_t obj_offset,
-			      unsigned long *sizep, int *rtype)
+			      unsigned long *sizep, int *rtype,
+			      unsigned long *disk_sizep)
 {
 	struct pack_window *w_curs = NULL;
 	unsigned long size;
@ -1731,6 +1732,11 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
 		}
 	}

+	if (disk_sizep) {
+		struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
+		*disk_sizep = revidx[1].offset - obj_offset;
+	}
+
 	while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
 		off_t base_offset;
 		/* Push the object we're going to leave behind */
@ -2357,7 +2363,8 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,

 }

-static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep)
+static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep,
+				  unsigned long *disk_sizep)
 {
 	int status;
 	unsigned long mapsize, size;
@ -2368,6 +2375,8 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size
 	map = map_sha1_file(sha1, &mapsize);
 	if (!map)
 		return -1;
+	if (disk_sizep)
+		*disk_sizep = mapsize;
 	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
 		status = error("unable to unpack %s header",
 			       sha1_to_hex(sha1));
@ -2391,13 +2400,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 	if (co) {
 		if (oi->sizep)
 			*(oi->sizep) = co->size;
+		if (oi->disk_sizep)
+			*(oi->disk_sizep) = 0;
 		oi->whence = OI_CACHED;
 		return co->type;
 	}

 	if (!find_pack_entry(sha1, &e)) {
 		/* Most likely it's a loose object. */
-		status = sha1_loose_object_info(sha1, oi->sizep);
+		status = sha1_loose_object_info(sha1, oi->sizep, oi->disk_sizep);
 		if (status >= 0) {
 			oi->whence = OI_LOOSE;
 			return status;
@ -2409,7 +2420,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 			return status;
 	}

-	status = packed_object_info(e.p, e.offset, oi->sizep, &rtype);
+	status = packed_object_info(e.p, e.offset, oi->sizep, &rtype,
+				    oi->disk_sizep);
 	if (status < 0) {
 		mark_bad_packed_object(e.p, sha1);
 		status = sha1_object_info_extended(sha1, oi);
@ -2428,7 +2440,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)

 int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
 {
-	struct object_info oi;
+	struct object_info oi = {0};

 	oi.sizep = sizep;
 	return sha1_object_info_extended(sha1, &oi);
--- a/streaming.c
+++ b/streaming.c
@ -135,7 +135,7 @@ struct git_istream *open_istream(const unsigned char *sha1,
 				 struct stream_filter *filter)
 {
 	struct git_istream *st;
-	struct object_info oi;
+	struct object_info oi = {0};
 	const unsigned char *real = lookup_replace_object(sha1);
 	enum input_source src = istream_source(real, type, &oi);

--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@ -36,66 +36,54 @@ $content"
    '

    test_expect_success "Type of $type is correct" '
-        test $type = "$(git cat-file -t $sha1)"
+	echo $type >expect &&
+	git cat-file -t $sha1 >actual &&
+	test_cmp expect actual
    '

    test_expect_success "Size of $type is correct" '
-        test $size = "$(git cat-file -s $sha1)"
+	echo $size >expect &&
+	git cat-file -s $sha1 >actual &&
+	test_cmp expect actual
    '

    test -z "$content" ||
    test_expect_success "Content of $type is correct" '
-	expect="$(maybe_remove_timestamp "$content" $no_ts)"
-	actual="$(maybe_remove_timestamp "$(git cat-file $type $sha1)" $no_ts)"
-
-        if test "z$expect" = "z$actual"
-	then
-		: happy
-	else
-		echo "Oops: expected $expect"
-		echo "but got $actual"
-		false
-        fi
+	maybe_remove_timestamp "$content" $no_ts >expect &&
+	maybe_remove_timestamp "$(git cat-file $type $sha1)" $no_ts >actual &&
+	test_cmp expect actual
    '

    test_expect_success "Pretty content of $type is correct" '
-	expect="$(maybe_remove_timestamp "$pretty_content" $no_ts)"
-	actual="$(maybe_remove_timestamp "$(git cat-file -p $sha1)" $no_ts)"
-        if test "z$expect" = "z$actual"
-	then
-		: happy
-	else
-		echo "Oops: expected $expect"
-		echo "but got $actual"
-		false
-        fi
+	maybe_remove_timestamp "$pretty_content" $no_ts >expect &&
+	maybe_remove_timestamp "$(git cat-file -p $sha1)" $no_ts >actual &&
+	test_cmp expect actual
    '

    test -z "$content" ||
    test_expect_success "--batch output of $type is correct" '
-	expect="$(maybe_remove_timestamp "$batch_output" $no_ts)"
-	actual="$(maybe_remove_timestamp "$(echo $sha1 | git cat-file --batch)" $no_ts)"
-        if test "z$expect" = "z$actual"
-	then
-		: happy
-	else
-		echo "Oops: expected $expect"
-		echo "but got $actual"
-		false
-        fi
+	maybe_remove_timestamp "$batch_output" $no_ts >expect &&
+	maybe_remove_timestamp "$(echo $sha1 | git cat-file --batch)" $no_ts >actual &&
+	test_cmp expect actual
    '

    test_expect_success "--batch-check output of $type is correct" '
-	expect="$sha1 $type $size"
-	actual="$(echo_without_newline $sha1 | git cat-file --batch-check)"
-        if test "z$expect" = "z$actual"
-	then
-		: happy
-	else
-		echo "Oops: expected $expect"
-		echo "but got $actual"
-		false
-        fi
+	echo "$sha1 $type $size" >expect &&
+	echo_without_newline $sha1 | git cat-file --batch-check >actual &&
+	test_cmp expect actual
+    '
+
+    test_expect_success "custom --batch-check format" '
+	echo "$type $sha1" >expect &&
+	echo $sha1 | git cat-file --batch-check="%(objecttype) %(objectname)" >actual &&
+	test_cmp expect actual
+    '
+
+    test_expect_success '--batch-check with %(rest)' '
+	echo "$type this is some extra content" >expect &&
+	echo "$sha1    this is some extra content" |
+		git cat-file --batch-check="%(objecttype) %(rest)" >actual &&
+	test_cmp expect actual
    '
 }