builtin/describe.c: describe a blob

Sometimes users are given a hash of an object and they want to
identify it further (ex.: Use verify-pack to find the largest blobs,
but what are these? or [1])

When describing commits, we try to anchor them to tags or refs, as these
are conceptually on a higher level than the commit. And if there is no ref
or tag that matches exactly, we're out of luck.  So we employ a heuristic
to make up a name for the commit. These names are ambiguous, there might
be different tags or refs to anchor to, and there might be different
path in the DAG to travel to arrive at the commit precisely.

When describing a blob, we want to describe the blob from a higher layer
as well, which is a tuple of (commit, deep/path) as the tree objects
involved are rather uninteresting.  The same blob can be referenced by
multiple commits, so how we decide which commit to use?  This patch
implements a rather naive approach on this: As there are no back pointers
from blobs to commits in which the blob occurs, we'll start walking from
any tips available, listing the blobs in-order of the commit and once we
found the blob, we'll take the first commit that listed the blob. For
example

  git describe --tags v0.99:Makefile
  conversion-901-g7672db20c2:Makefile

tells us the Makefile as it was in v0.99 was introduced in commit 7672db20.

The walking is performed in reverse order to show the introduction of a
blob rather than its last occurrence.

[1] https://stackoverflow.com/questions/223678/which-commit-has-this-blob

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Stefan Beller 2017-11-15 18:00:39 -08:00 committed by Junio C Hamano
parent 4dbc59a4cc
commit 644eb60bd0
3 changed files with 107 additions and 7 deletions

View File

@ -3,14 +3,14 @@ git-describe(1)
NAME NAME
---- ----
git-describe - Describe a commit using the most recent tag reachable from it git-describe - Give an object a human readable name based on an available ref
SYNOPSIS SYNOPSIS
-------- --------
[verse] [verse]
'git describe' [--all] [--tags] [--contains] [--abbrev=<n>] [<commit-ish>...] 'git describe' [--all] [--tags] [--contains] [--abbrev=<n>] [<commit-ish>...]
'git describe' [--all] [--tags] [--contains] [--abbrev=<n>] --dirty[=<mark>] 'git describe' [--all] [--tags] [--contains] [--abbrev=<n>] --dirty[=<mark>]
'git describe' <blob>
DESCRIPTION DESCRIPTION
----------- -----------
@ -24,6 +24,12 @@ By default (without --all or --tags) `git describe` only shows
annotated tags. For more information about creating annotated tags annotated tags. For more information about creating annotated tags
see the -a and -s options to linkgit:git-tag[1]. see the -a and -s options to linkgit:git-tag[1].
If the given object refers to a blob, it will be described
as `<commit-ish>:<path>`, such that the blob can be found
at `<path>` in the `<commit-ish>`, which itself describes the
first commit in which this blob occurs in a reverse revision walk
from HEAD.
OPTIONS OPTIONS
------- -------
<commit-ish>...:: <commit-ish>...::
@ -186,6 +192,14 @@ selected and output. Here fewest commits different is defined as
the number of commits which would be shown by `git log tag..input` the number of commits which would be shown by `git log tag..input`
will be the smallest number of commits possible. will be the smallest number of commits possible.
BUGS
----
Tree objects as well as tag objects not pointing at commits, cannot be described.
When describing blobs, the lightweight tags pointing at blobs are ignored,
but the blob is still described as <committ-ish>:<path> despite the lightweight
tag being favorable.
GIT GIT
--- ---
Part of the linkgit:git[1] suite Part of the linkgit:git[1] suite

View File

@ -3,6 +3,7 @@
#include "lockfile.h" #include "lockfile.h"
#include "commit.h" #include "commit.h"
#include "tag.h" #include "tag.h"
#include "blob.h"
#include "refs.h" #include "refs.h"
#include "builtin.h" #include "builtin.h"
#include "exec_cmd.h" #include "exec_cmd.h"
@ -11,8 +12,9 @@
#include "hashmap.h" #include "hashmap.h"
#include "argv-array.h" #include "argv-array.h"
#include "run-command.h" #include "run-command.h"
#include "revision.h"
#include "list-objects.h"
#define SEEN (1u << 0)
#define MAX_TAGS (FLAG_BITS - 1) #define MAX_TAGS (FLAG_BITS - 1)
static const char * const describe_usage[] = { static const char * const describe_usage[] = {
@ -434,6 +436,53 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst)
strbuf_addstr(dst, suffix); strbuf_addstr(dst, suffix);
} }
struct process_commit_data {
struct object_id current_commit;
struct object_id looking_for;
struct strbuf *dst;
struct rev_info *revs;
};
static void process_commit(struct commit *commit, void *data)
{
struct process_commit_data *pcd = data;
pcd->current_commit = commit->object.oid;
}
static void process_object(struct object *obj, const char *path, void *data)
{
struct process_commit_data *pcd = data;
if (!oidcmp(&pcd->looking_for, &obj->oid) && !pcd->dst->len) {
reset_revision_walk();
describe_commit(&pcd->current_commit, pcd->dst);
strbuf_addf(pcd->dst, ":%s", path);
free_commit_list(pcd->revs->commits);
pcd->revs->commits = NULL;
}
}
static void describe_blob(struct object_id oid, struct strbuf *dst)
{
struct rev_info revs;
struct argv_array args = ARGV_ARRAY_INIT;
struct process_commit_data pcd = { null_oid, oid, dst, &revs};
argv_array_pushl(&args, "internal: The first arg is not parsed",
"--objects", "--in-commit-order", "--reverse", "HEAD",
NULL);
init_revisions(&revs, NULL);
if (setup_revisions(args.argc, args.argv, &revs, NULL) > 1)
BUG("setup_revisions could not handle all args?");
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
traverse_commit_list(&revs, process_commit, process_object, &pcd);
reset_revision_walk();
}
static void describe(const char *arg, int last_one) static void describe(const char *arg, int last_one)
{ {
struct object_id oid; struct object_id oid;
@ -445,11 +494,14 @@ static void describe(const char *arg, int last_one)
if (get_oid(arg, &oid)) if (get_oid(arg, &oid))
die(_("Not a valid object name %s"), arg); die(_("Not a valid object name %s"), arg);
cmit = lookup_commit_reference(&oid); cmit = lookup_commit_reference_gently(&oid, 1);
if (!cmit)
die(_("%s is not a valid '%s' object"), arg, commit_type);
describe_commit(&oid, &sb); if (cmit)
describe_commit(&oid, &sb);
else if (lookup_blob(&oid))
describe_blob(oid, &sb);
else
die(_("%s is neither a commit nor blob"), arg);
puts(sb.buf); puts(sb.buf);

View File

@ -310,6 +310,40 @@ test_expect_success 'describe ignoring a broken submodule' '
grep broken out grep broken out
' '
test_expect_success 'describe a blob at a directly tagged commit' '
echo "make it a unique blob" >file &&
git add file && git commit -m "content in file" &&
git tag -a -m "latest annotated tag" unique-file &&
git describe HEAD:file >actual &&
echo "unique-file:file" >expect &&
test_cmp expect actual
'
test_expect_success 'describe a blob with its first introduction' '
git commit --allow-empty -m "empty commit" &&
git rm file &&
git commit -m "delete blob" &&
git revert HEAD &&
git commit --allow-empty -m "empty commit" &&
git describe HEAD:file >actual &&
echo "unique-file:file" >expect &&
test_cmp expect actual
'
test_expect_success 'describe directly tagged blob' '
git tag test-blob unique-file:file &&
git describe test-blob >actual &&
echo "unique-file:file" >expect &&
# suboptimal: we rather want to see "test-blob"
test_cmp expect actual
'
test_expect_success 'describe tag object' '
git tag test-blob-1 -a -m msg unique-file:file &&
test_must_fail git describe test-blob-1 2>actual &&
test_i18ngrep "fatal: test-blob-1 is neither a commit nor blob" actual
'
test_expect_failure ULIMIT_STACK_SIZE 'name-rev works in a deep repo' ' test_expect_failure ULIMIT_STACK_SIZE 'name-rev works in a deep repo' '
i=1 && i=1 &&
while test $i -lt 8000 while test $i -lt 8000