cat-file: provide %(deltabase) batch format
It can be useful for debugging or analysis to see which objects are stored as delta bases on top of others. This information is available by running `git verify-pack`, but that is extremely expensive (and is harder than necessary to parse). Instead, let's make it available as a cat-file query format, which makes it fast and simple to get the bases for a subset of the objects. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
5d642e7506
commit
65ea9c3c3d
@ -109,6 +109,11 @@ newline. The available atoms are:
|
|||||||
The size, in bytes, that the object takes up on disk. See the
|
The size, in bytes, that the object takes up on disk. See the
|
||||||
note about on-disk sizes in the `CAVEATS` section below.
|
note about on-disk sizes in the `CAVEATS` section below.
|
||||||
|
|
||||||
|
`deltabase`::
|
||||||
|
If the object is stored as a delta on-disk, this expands to the
|
||||||
|
40-hex sha1 of the delta base object. Otherwise, expands to the
|
||||||
|
null sha1 (40 zeroes). See `CAVEATS` below.
|
||||||
|
|
||||||
`rest`::
|
`rest`::
|
||||||
If this atom is used in the output string, input lines are split
|
If this atom is used in the output string, input lines are split
|
||||||
at the first whitespace boundary. All characters before that
|
at the first whitespace boundary. All characters before that
|
||||||
@ -152,10 +157,11 @@ should be taken in drawing conclusions about which refs or objects are
|
|||||||
responsible for disk usage. The size of a packed non-delta object may be
|
responsible for disk usage. The size of a packed non-delta object may be
|
||||||
much larger than the size of objects which delta against it, but the
|
much larger than the size of objects which delta against it, but the
|
||||||
choice of which object is the base and which is the delta is arbitrary
|
choice of which object is the base and which is the delta is arbitrary
|
||||||
and is subject to change during a repack. Note also that multiple copies
|
and is subject to change during a repack.
|
||||||
of an object may be present in the object database; in this case, it is
|
|
||||||
undefined which copy's size will be reported.
|
|
||||||
|
|
||||||
|
Note also that multiple copies of an object may be present in the object
|
||||||
|
database; in this case, it is undefined which copy's size or delta base
|
||||||
|
will be reported.
|
||||||
|
|
||||||
GIT
|
GIT
|
||||||
---
|
---
|
||||||
|
@ -118,6 +118,7 @@ struct expand_data {
|
|||||||
unsigned long size;
|
unsigned long size;
|
||||||
unsigned long disk_size;
|
unsigned long disk_size;
|
||||||
const char *rest;
|
const char *rest;
|
||||||
|
unsigned char delta_base_sha1[20];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If mark_query is true, we do not expand anything, but rather
|
* If mark_query is true, we do not expand anything, but rather
|
||||||
@ -174,6 +175,11 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
|
|||||||
data->split_on_whitespace = 1;
|
data->split_on_whitespace = 1;
|
||||||
else if (data->rest)
|
else if (data->rest)
|
||||||
strbuf_addstr(sb, data->rest);
|
strbuf_addstr(sb, data->rest);
|
||||||
|
} else if (is_atom("deltabase", atom, len)) {
|
||||||
|
if (data->mark_query)
|
||||||
|
data->info.delta_base_sha1 = data->delta_base_sha1;
|
||||||
|
else
|
||||||
|
strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1));
|
||||||
} else
|
} else
|
||||||
die("unknown format element: %.*s", len, atom);
|
die("unknown format element: %.*s", len, atom);
|
||||||
}
|
}
|
||||||
|
@ -240,4 +240,38 @@ test_expect_success "--batch-check with multiple sha1s gives correct format" '
|
|||||||
"$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)"
|
"$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)"
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'setup blobs which are likely to delta' '
|
||||||
|
test-genrandom foo 10240 >foo &&
|
||||||
|
{ cat foo; echo plus; } >foo-plus &&
|
||||||
|
git add foo foo-plus &&
|
||||||
|
git commit -m foo &&
|
||||||
|
cat >blobs <<-\EOF
|
||||||
|
HEAD:foo
|
||||||
|
HEAD:foo-plus
|
||||||
|
EOF
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'confirm that neither loose blob is a delta' '
|
||||||
|
cat >expect <<-EOF
|
||||||
|
$_z40
|
||||||
|
$_z40
|
||||||
|
EOF
|
||||||
|
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
# To avoid relying too much on the current delta heuristics,
|
||||||
|
# we will check only that one of the two objects is a delta
|
||||||
|
# against the other, but not the order. We can do so by just
|
||||||
|
# asking for the base of both, and checking whether either
|
||||||
|
# sha1 appears in the output.
|
||||||
|
test_expect_success '%(deltabase) reports packed delta bases' '
|
||||||
|
git repack -ad &&
|
||||||
|
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
|
||||||
|
{
|
||||||
|
grep "$(git rev-parse HEAD:foo)" actual ||
|
||||||
|
grep "$(git rev-parse HEAD:foo-plus)" actual
|
||||||
|
}
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
Loading…
Reference in New Issue
Block a user