2006-04-29 08:20:52 +02:00
|
|
|
/*
|
|
|
|
* Builtin "git diff"
|
|
|
|
*
|
|
|
|
* Copyright (c) 2006 Junio C Hamano
|
|
|
|
*/
|
2019-01-24 09:29:12 +01:00
|
|
|
#define USE_THE_INDEX_COMPATIBILITY_MACROS
|
2006-04-29 08:20:52 +02:00
|
|
|
#include "cache.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
git diff: improve range handling
When git diff is given a symmetric difference A...B, it chooses
some merge base from the two specified commits (as documented).
This fails, however, if there is *no* merge base: instead, you
see the differences between A and B, which is certainly not what
is expected.
Moreover, if additional revisions are specified on the command
line ("git diff A...B C"), the results get a bit weird:
* If there is a symmetric difference merge base, this is used
as the left side of the diff. The last final ref is used as
the right side.
* If there is no merge base, the symmetric status is completely
lost. We will produce a combined diff instead.
Similar weirdness occurs if you use, e.g., "git diff C A...B D".
Likewise, using multiple two-dot ranges, or tossing extra
revision specifiers into the command line with two-dot ranges,
or mixing two and three dot ranges, all produce nonsense.
To avoid all this, add a routine to catch the range cases and
verify that that the arguments make sense. As a side effect,
produce a warning showing *which* merge base is being used when
there are multiple choices; die if there is no merge base.
Signed-off-by: Chris Torek <chris.torek@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-12 18:19:59 +02:00
|
|
|
#include "ewah/ewok.h"
|
2014-10-01 12:28:42 +02:00
|
|
|
#include "lockfile.h"
|
2008-02-18 08:26:03 +01:00
|
|
|
#include "color.h"
|
2006-04-29 08:20:52 +02:00
|
|
|
#include "commit.h"
|
|
|
|
#include "blob.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "diffcore.h"
|
|
|
|
#include "revision.h"
|
|
|
|
#include "log-tree.h"
|
|
|
|
#include "builtin.h"
|
2010-08-06 00:40:48 +02:00
|
|
|
#include "submodule.h"
|
2020-03-30 16:03:46 +02:00
|
|
|
#include "oid-array.h"
|
2006-04-29 08:20:52 +02:00
|
|
|
|
2013-12-11 10:58:42 +01:00
|
|
|
#define DIFF_NO_INDEX_EXPLICIT 1
|
|
|
|
#define DIFF_NO_INDEX_IMPLICIT 2
|
|
|
|
|
2006-04-29 08:20:52 +02:00
|
|
|
static const char builtin_diff_usage[] =
|
2020-06-12 18:20:00 +02:00
|
|
|
"git diff [<options>] [<commit>] [--] [<path>...]\n"
|
|
|
|
" or: git diff [<options>] --cached [<commit>] [--] [<path>...]\n"
|
|
|
|
" or: git diff [<options>] <commit> [<commit>...] <commit> [--] [<path>...]\n"
|
|
|
|
" or: git diff [<options>] <commit>...<commit>] [--] [<path>...]\n"
|
|
|
|
" or: git diff [<options>] <blob> <blob>]\n"
|
|
|
|
" or: git diff [<options>] --no-index [--] <path> <path>]\n"
|
|
|
|
COMMON_DIFF_OPTIONS_HELP;
|
2006-04-29 08:20:52 +02:00
|
|
|
|
2017-05-19 14:59:15 +02:00
|
|
|
static const char *blob_path(struct object_array_entry *entry)
|
|
|
|
{
|
|
|
|
return entry->path ? entry->path : entry->name;
|
|
|
|
}
|
|
|
|
|
2006-04-29 08:20:52 +02:00
|
|
|
static void stuff_change(struct diff_options *opt,
|
|
|
|
unsigned old_mode, unsigned new_mode,
|
2017-03-26 18:01:26 +02:00
|
|
|
const struct object_id *old_oid,
|
|
|
|
const struct object_id *new_oid,
|
|
|
|
int old_oid_valid,
|
|
|
|
int new_oid_valid,
|
2017-05-19 14:58:05 +02:00
|
|
|
const char *old_path,
|
|
|
|
const char *new_path)
|
2006-04-29 08:20:52 +02:00
|
|
|
{
|
|
|
|
struct diff_filespec *one, *two;
|
|
|
|
|
2017-03-26 18:01:26 +02:00
|
|
|
if (!is_null_oid(old_oid) && !is_null_oid(new_oid) &&
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-28 23:22:40 +02:00
|
|
|
oideq(old_oid, new_oid) && (old_mode == new_mode))
|
2006-04-29 08:20:52 +02:00
|
|
|
return;
|
|
|
|
|
2017-10-31 19:19:11 +01:00
|
|
|
if (opt->flags.reverse_diff) {
|
2017-01-28 22:40:58 +01:00
|
|
|
SWAP(old_mode, new_mode);
|
2017-03-26 18:01:26 +02:00
|
|
|
SWAP(old_oid, new_oid);
|
2017-05-19 14:58:05 +02:00
|
|
|
SWAP(old_path, new_path);
|
2006-04-29 08:20:52 +02:00
|
|
|
}
|
diff --relative: output paths as relative to the current subdirectory
This adds --relative option to the diff family. When you start
from a subdirectory:
$ git diff --relative
shows only the diff that is inside your current subdirectory,
and without $prefix part. People who usually live in
subdirectories may like it.
There are a few things I should also mention about the change:
- This works not just with diff but also works with the log
family of commands, but the history pruning is not affected.
In other words, if you go to a subdirectory, you can say:
$ git log --relative -p
but it will show the log message even for commits that do not
touch the current directory. You can limit it by giving
pathspec yourself:
$ git log --relative -p .
This originally was not a conscious design choice, but we
have a way to affect diff pathspec and pruning pathspec
independently. IOW "git log --full-diff -p ." tells it to
prune history to commits that affect the current subdirectory
but show the changes with full context. I think it makes
more sense to leave pruning independent from --relative than
the obvious alternative of always pruning with the current
subdirectory, which would break the symmetry.
- Because this works also with the log family, you could
format-patch a single change, limiting the effect to your
subdirectory, like so:
$ cd gitk-git
$ git format-patch -1 --relative 911f1eb
But because that is a special purpose usage, this option will
never become the default, with or without repository or user
preference configuration. The risk of producing a partial
patch and sending it out by mistake is too great if we did
so.
- This is inherently incompatible with --no-index, which is a
bolted-on hack that does not have much to do with git
itself. I didn't bother checking and erroring out on the
combined use of the options, but probably I should.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-12 23:26:02 +01:00
|
|
|
|
|
|
|
if (opt->prefix &&
|
2017-05-19 14:58:05 +02:00
|
|
|
(strncmp(old_path, opt->prefix, opt->prefix_length) ||
|
|
|
|
strncmp(new_path, opt->prefix, opt->prefix_length)))
|
diff --relative: output paths as relative to the current subdirectory
This adds --relative option to the diff family. When you start
from a subdirectory:
$ git diff --relative
shows only the diff that is inside your current subdirectory,
and without $prefix part. People who usually live in
subdirectories may like it.
There are a few things I should also mention about the change:
- This works not just with diff but also works with the log
family of commands, but the history pruning is not affected.
In other words, if you go to a subdirectory, you can say:
$ git log --relative -p
but it will show the log message even for commits that do not
touch the current directory. You can limit it by giving
pathspec yourself:
$ git log --relative -p .
This originally was not a conscious design choice, but we
have a way to affect diff pathspec and pruning pathspec
independently. IOW "git log --full-diff -p ." tells it to
prune history to commits that affect the current subdirectory
but show the changes with full context. I think it makes
more sense to leave pruning independent from --relative than
the obvious alternative of always pruning with the current
subdirectory, which would break the symmetry.
- Because this works also with the log family, you could
format-patch a single change, limiting the effect to your
subdirectory, like so:
$ cd gitk-git
$ git format-patch -1 --relative 911f1eb
But because that is a special purpose usage, this option will
never become the default, with or without repository or user
preference configuration. The risk of producing a partial
patch and sending it out by mistake is too great if we did
so.
- This is inherently incompatible with --no-index, which is a
bolted-on hack that does not have much to do with git
itself. I didn't bother checking and erroring out on the
combined use of the options, but probably I should.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-12 23:26:02 +01:00
|
|
|
return;
|
|
|
|
|
2017-05-19 14:58:05 +02:00
|
|
|
one = alloc_filespec(old_path);
|
|
|
|
two = alloc_filespec(new_path);
|
2017-05-30 19:30:50 +02:00
|
|
|
fill_filespec(one, old_oid, old_oid_valid, old_mode);
|
|
|
|
fill_filespec(two, new_oid, new_oid_valid, new_mode);
|
2006-04-29 08:20:52 +02:00
|
|
|
|
|
|
|
diff_queue(&diff_queued_diff, one, two);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int builtin_diff_b_f(struct rev_info *revs,
|
|
|
|
int argc, const char **argv,
|
2017-05-19 14:57:30 +02:00
|
|
|
struct object_array_entry **blob)
|
2006-04-29 08:20:52 +02:00
|
|
|
{
|
|
|
|
/* Blob vs file in the working tree*/
|
|
|
|
struct stat st;
|
2013-11-20 02:26:41 +01:00
|
|
|
const char *path;
|
2006-04-29 08:20:52 +02:00
|
|
|
|
2006-06-24 19:23:06 +02:00
|
|
|
if (argc > 1)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
|
2013-11-20 02:26:41 +01:00
|
|
|
GUARD_PATHSPEC(&revs->prune_data, PATHSPEC_FROMTOP | PATHSPEC_LITERAL);
|
|
|
|
path = revs->prune_data.items[0].match;
|
|
|
|
|
2006-04-29 08:20:52 +02:00
|
|
|
if (lstat(path, &st))
|
2011-02-23 00:41:50 +01:00
|
|
|
die_errno(_("failed to stat '%s'"), path);
|
2006-04-29 08:20:52 +02:00
|
|
|
if (!(S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)))
|
2011-02-23 00:41:50 +01:00
|
|
|
die(_("'%s': not a regular file or symlink"), path);
|
2007-04-22 18:44:00 +02:00
|
|
|
|
2008-08-19 05:08:09 +02:00
|
|
|
diff_set_mnemonic_prefix(&revs->diffopt, "o/", "w/");
|
|
|
|
|
2017-05-19 14:57:30 +02:00
|
|
|
if (blob[0]->mode == S_IFINVALID)
|
|
|
|
blob[0]->mode = canon_mode(st.st_mode);
|
2007-04-22 18:44:00 +02:00
|
|
|
|
2006-04-29 08:20:52 +02:00
|
|
|
stuff_change(&revs->diffopt,
|
2017-05-19 14:57:30 +02:00
|
|
|
blob[0]->mode, canon_mode(st.st_mode),
|
|
|
|
&blob[0]->item->oid, &null_oid,
|
diff: do not use null sha1 as a sentinel value
The diff code represents paths using the diff_filespec
struct. This struct has a sha1 to represent the sha1 of the
content at that path, as well as a sha1_valid member which
indicates whether its sha1 field is actually useful. If
sha1_valid is not true, then the filespec represents a
working tree file (e.g., for the no-index case, or for when
the index is not up-to-date).
The diff_filespec is only used internally, though. At the
interfaces to the diff subsystem, callers feed the sha1
directly, and we create a diff_filespec from it. It's at
that point that we look at the sha1 and decide whether it is
valid or not; callers may pass the null sha1 as a sentinel
value to indicate that it is not.
We should not typically see the null sha1 coming from any
other source (e.g., in the index itself, or from a tree).
However, a corrupt tree might have a null sha1, which would
cause "diff --patch" to accidentally diff the working tree
version of a file instead of treating it as a blob.
This patch extends the edges of the diff interface to accept
a "sha1_valid" flag whenever we accept a sha1, and to use
that flag when creating a filespec. In some cases, this
means passing the flag through several layers, making the
code change larger than would be desirable.
One alternative would be to simply die() upon seeing
corrupted trees with null sha1s. However, this fix more
directly addresses the problem (while bogus sha1s in a tree
are probably a bad thing, it is really the sentinel
confusion sending us down the wrong code path that is what
makes it devastating). And it means that git is more capable
of examining and debugging these corrupted trees. For
example, you can still "diff --raw" such a tree to find out
when the bogus entry was introduced; you just cannot do a
"--patch" diff (just as you could not with any other
corrupted tree, as we do not have any content to diff).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-28 17:03:01 +02:00
|
|
|
1, 0,
|
2017-05-19 14:59:34 +02:00
|
|
|
blob[0]->path ? blob[0]->path : path,
|
|
|
|
path);
|
2006-04-29 08:20:52 +02:00
|
|
|
diffcore_std(&revs->diffopt);
|
|
|
|
diff_flush(&revs->diffopt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int builtin_diff_blobs(struct rev_info *revs,
|
|
|
|
int argc, const char **argv,
|
2017-05-19 14:57:30 +02:00
|
|
|
struct object_array_entry **blob)
|
2006-04-29 08:20:52 +02:00
|
|
|
{
|
2019-02-03 00:16:45 +01:00
|
|
|
const unsigned mode = canon_mode(S_IFREG | 0644);
|
2006-04-29 08:20:52 +02:00
|
|
|
|
2006-06-24 19:23:06 +02:00
|
|
|
if (argc > 1)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
|
2017-05-19 14:57:30 +02:00
|
|
|
if (blob[0]->mode == S_IFINVALID)
|
|
|
|
blob[0]->mode = mode;
|
2007-04-22 18:44:00 +02:00
|
|
|
|
2017-05-19 14:57:30 +02:00
|
|
|
if (blob[1]->mode == S_IFINVALID)
|
|
|
|
blob[1]->mode = mode;
|
2007-04-22 18:44:00 +02:00
|
|
|
|
2006-04-29 08:20:52 +02:00
|
|
|
stuff_change(&revs->diffopt,
|
2017-05-19 14:57:30 +02:00
|
|
|
blob[0]->mode, blob[1]->mode,
|
|
|
|
&blob[0]->item->oid, &blob[1]->item->oid,
|
diff: do not use null sha1 as a sentinel value
The diff code represents paths using the diff_filespec
struct. This struct has a sha1 to represent the sha1 of the
content at that path, as well as a sha1_valid member which
indicates whether its sha1 field is actually useful. If
sha1_valid is not true, then the filespec represents a
working tree file (e.g., for the no-index case, or for when
the index is not up-to-date).
The diff_filespec is only used internally, though. At the
interfaces to the diff subsystem, callers feed the sha1
directly, and we create a diff_filespec from it. It's at
that point that we look at the sha1 and decide whether it is
valid or not; callers may pass the null sha1 as a sentinel
value to indicate that it is not.
We should not typically see the null sha1 coming from any
other source (e.g., in the index itself, or from a tree).
However, a corrupt tree might have a null sha1, which would
cause "diff --patch" to accidentally diff the working tree
version of a file instead of treating it as a blob.
This patch extends the edges of the diff interface to accept
a "sha1_valid" flag whenever we accept a sha1, and to use
that flag when creating a filespec. In some cases, this
means passing the flag through several layers, making the
code change larger than would be desirable.
One alternative would be to simply die() upon seeing
corrupted trees with null sha1s. However, this fix more
directly addresses the problem (while bogus sha1s in a tree
are probably a bad thing, it is really the sentinel
confusion sending us down the wrong code path that is what
makes it devastating). And it means that git is more capable
of examining and debugging these corrupted trees. For
example, you can still "diff --raw" such a tree to find out
when the bogus entry was introduced; you just cannot do a
"--patch" diff (just as you could not with any other
corrupted tree, as we do not have any content to diff).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-28 17:03:01 +02:00
|
|
|
1, 1,
|
2017-05-19 14:59:15 +02:00
|
|
|
blob_path(blob[0]), blob_path(blob[1]));
|
2006-04-29 08:20:52 +02:00
|
|
|
diffcore_std(&revs->diffopt);
|
|
|
|
diff_flush(&revs->diffopt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int builtin_diff_index(struct rev_info *revs,
|
|
|
|
int argc, const char **argv)
|
|
|
|
{
|
|
|
|
int cached = 0;
|
|
|
|
while (1 < argc) {
|
|
|
|
const char *arg = argv[1];
|
2008-10-29 17:15:36 +01:00
|
|
|
if (!strcmp(arg, "--cached") || !strcmp(arg, "--staged"))
|
2006-04-29 08:20:52 +02:00
|
|
|
cached = 1;
|
|
|
|
else
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
argv++; argc--;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Make sure there is one revision (i.e. pending object),
|
|
|
|
* and there is no revision filtering parameters.
|
|
|
|
*/
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
if (revs->pending.nr != 1 ||
|
2006-04-29 08:20:52 +02:00
|
|
|
revs->max_count != -1 || revs->min_age != -1 ||
|
|
|
|
revs->max_age != -1)
|
|
|
|
usage(builtin_diff_usage);
|
2012-10-30 10:50:42 +01:00
|
|
|
if (!cached) {
|
|
|
|
setup_work_tree();
|
2013-07-14 10:35:49 +02:00
|
|
|
if (read_cache_preload(&revs->diffopt.pathspec) < 0) {
|
2012-10-30 10:50:42 +01:00
|
|
|
perror("read_cache_preload");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if (read_cache() < 0) {
|
|
|
|
perror("read_cache");
|
2007-02-10 03:51:40 +01:00
|
|
|
return -1;
|
|
|
|
}
|
2006-04-29 08:20:52 +02:00
|
|
|
return run_diff_index(revs, cached);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int builtin_diff_tree(struct rev_info *revs,
|
|
|
|
int argc, const char **argv,
|
2013-05-25 11:08:03 +02:00
|
|
|
struct object_array_entry *ent0,
|
|
|
|
struct object_array_entry *ent1)
|
2006-04-29 08:20:52 +02:00
|
|
|
{
|
2017-03-26 18:01:26 +02:00
|
|
|
const struct object_id *(oid[2]);
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
int swap = 0;
|
2006-06-24 19:23:06 +02:00
|
|
|
|
|
|
|
if (argc > 1)
|
|
|
|
usage(builtin_diff_usage);
|
2006-04-29 10:24:49 +02:00
|
|
|
|
2013-05-25 11:08:03 +02:00
|
|
|
/*
|
|
|
|
* We saw two trees, ent0 and ent1. If ent1 is uninteresting,
|
|
|
|
* swap them.
|
2006-04-29 10:24:49 +02:00
|
|
|
*/
|
2013-05-25 11:08:03 +02:00
|
|
|
if (ent1->item->flags & UNINTERESTING)
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
swap = 1;
|
2017-03-26 18:01:26 +02:00
|
|
|
oid[swap] = &ent0->item->oid;
|
|
|
|
oid[1 - swap] = &ent1->item->oid;
|
2017-05-30 19:31:03 +02:00
|
|
|
diff_tree_oid(oid[0], oid[1], "", &revs->diffopt);
|
2006-04-29 08:20:52 +02:00
|
|
|
log_tree_diff_flush(revs);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-04-29 10:24:49 +02:00
|
|
|
static int builtin_diff_combined(struct rev_info *revs,
|
|
|
|
int argc, const char **argv,
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
struct object_array_entry *ent,
|
2006-04-29 10:24:49 +02:00
|
|
|
int ents)
|
|
|
|
{
|
2017-03-31 03:40:00 +02:00
|
|
|
struct oid_array parents = OID_ARRAY_INIT;
|
2006-04-29 10:24:49 +02:00
|
|
|
int i;
|
|
|
|
|
2006-06-24 19:23:06 +02:00
|
|
|
if (argc > 1)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
|
2006-04-29 10:24:49 +02:00
|
|
|
if (!revs->dense_combined_merges && !revs->combine_merges)
|
|
|
|
revs->dense_combined_merges = revs->combine_merges = 1;
|
2011-12-17 11:15:48 +01:00
|
|
|
for (i = 1; i < ents; i++)
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_append(&parents, &ent[i].item->oid);
|
2020-09-29 13:31:22 +02:00
|
|
|
diff_tree_combined(&ent[0].item->oid, &parents, revs);
|
2017-03-31 03:40:00 +02:00
|
|
|
oid_array_clear(&parents);
|
2006-04-29 10:24:49 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-08-31 22:13:42 +02:00
|
|
|
static void refresh_index_quietly(void)
|
|
|
|
{
|
2017-10-05 22:32:04 +02:00
|
|
|
struct lock_file lock_file = LOCK_INIT;
|
2007-08-31 22:13:42 +02:00
|
|
|
int fd;
|
|
|
|
|
2017-10-05 22:32:04 +02:00
|
|
|
fd = hold_locked_index(&lock_file, 0);
|
2007-08-31 22:13:42 +02:00
|
|
|
if (fd < 0)
|
|
|
|
return;
|
|
|
|
discard_cache();
|
|
|
|
read_cache();
|
|
|
|
refresh_cache(REFRESH_QUIET|REFRESH_UNMERGED);
|
2019-01-12 03:13:27 +01:00
|
|
|
repo_update_index_if_able(the_repository, &lock_file);
|
2007-08-31 22:13:42 +02:00
|
|
|
}
|
|
|
|
|
2008-05-24 07:28:56 +02:00
|
|
|
static int builtin_diff_files(struct rev_info *revs, int argc, const char **argv)
|
|
|
|
{
|
|
|
|
unsigned int options = 0;
|
|
|
|
|
|
|
|
while (1 < argc && argv[1][0] == '-') {
|
|
|
|
if (!strcmp(argv[1], "--base"))
|
|
|
|
revs->max_count = 1;
|
|
|
|
else if (!strcmp(argv[1], "--ours"))
|
|
|
|
revs->max_count = 2;
|
|
|
|
else if (!strcmp(argv[1], "--theirs"))
|
|
|
|
revs->max_count = 3;
|
|
|
|
else if (!strcmp(argv[1], "-q"))
|
|
|
|
options |= DIFF_SILENT_ON_REMOVED;
|
2009-08-06 12:47:21 +02:00
|
|
|
else if (!strcmp(argv[1], "-h"))
|
|
|
|
usage(builtin_diff_usage);
|
2008-05-24 07:28:56 +02:00
|
|
|
else
|
2011-02-23 00:41:50 +01:00
|
|
|
return error(_("invalid option: %s"), argv[1]);
|
2008-05-24 07:28:56 +02:00
|
|
|
argv++; argc--;
|
|
|
|
}
|
|
|
|
|
2008-09-18 09:32:37 +02:00
|
|
|
/*
|
|
|
|
* "diff --base" should not combine merges because it was not
|
|
|
|
* asked to. "diff -c" should not densify (if the user wants
|
|
|
|
* dense one, --cc can be explicitly asked for, or just rely
|
|
|
|
* on the default).
|
|
|
|
*/
|
|
|
|
if (revs->max_count == -1 && !revs->combine_merges &&
|
2008-05-24 07:28:56 +02:00
|
|
|
(revs->diffopt.output_format & DIFF_FORMAT_PATCH))
|
|
|
|
revs->combine_merges = revs->dense_combined_merges = 1;
|
|
|
|
|
2008-08-28 15:02:12 +02:00
|
|
|
setup_work_tree();
|
2013-07-14 10:35:49 +02:00
|
|
|
if (read_cache_preload(&revs->diffopt.pathspec) < 0) {
|
2008-11-14 01:36:30 +01:00
|
|
|
perror("read_cache_preload");
|
2008-05-24 07:28:56 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2011-03-22 22:17:30 +01:00
|
|
|
return run_diff_files(revs, options);
|
2008-05-24 07:28:56 +02:00
|
|
|
}
|
|
|
|
|
git diff: improve range handling
When git diff is given a symmetric difference A...B, it chooses
some merge base from the two specified commits (as documented).
This fails, however, if there is *no* merge base: instead, you
see the differences between A and B, which is certainly not what
is expected.
Moreover, if additional revisions are specified on the command
line ("git diff A...B C"), the results get a bit weird:
* If there is a symmetric difference merge base, this is used
as the left side of the diff. The last final ref is used as
the right side.
* If there is no merge base, the symmetric status is completely
lost. We will produce a combined diff instead.
Similar weirdness occurs if you use, e.g., "git diff C A...B D".
Likewise, using multiple two-dot ranges, or tossing extra
revision specifiers into the command line with two-dot ranges,
or mixing two and three dot ranges, all produce nonsense.
To avoid all this, add a routine to catch the range cases and
verify that that the arguments make sense. As a side effect,
produce a warning showing *which* merge base is being used when
there are multiple choices; die if there is no merge base.
Signed-off-by: Chris Torek <chris.torek@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-12 18:19:59 +02:00
|
|
|
struct symdiff {
|
|
|
|
struct bitmap *skip;
|
|
|
|
int warn;
|
|
|
|
const char *base, *left, *right;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for symmetric-difference arguments, and if present, arrange
|
|
|
|
* everything we need to know to handle them correctly. As a bonus,
|
|
|
|
* weed out all bogus range-based revision specifications, e.g.,
|
|
|
|
* "git diff A..B C..D" or "git diff A..B C" get rejected.
|
|
|
|
*
|
|
|
|
* For an actual symmetric diff, *symdiff is set this way:
|
|
|
|
*
|
|
|
|
* - its skip is non-NULL and marks *all* rev->pending.objects[i]
|
|
|
|
* indices that the caller should ignore (extra merge bases, of
|
|
|
|
* which there might be many, and A in A...B). Note that the
|
|
|
|
* chosen merge base and right side are NOT marked.
|
|
|
|
* - warn is set if there are multiple merge bases.
|
|
|
|
* - base, left, and right point to the names to use in a
|
|
|
|
* warning about multiple merge bases.
|
|
|
|
*
|
|
|
|
* If there is no symmetric diff argument, sym->skip is NULL and
|
|
|
|
* sym->warn is cleared. The remaining fields are not set.
|
|
|
|
*/
|
|
|
|
static void symdiff_prepare(struct rev_info *rev, struct symdiff *sym)
|
|
|
|
{
|
|
|
|
int i, is_symdiff = 0, basecount = 0, othercount = 0;
|
|
|
|
int lpos = -1, rpos = -1, basepos = -1;
|
|
|
|
struct bitmap *map = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the whence fields to find merge bases and left and
|
|
|
|
* right parts of symmetric difference, so that we do not
|
|
|
|
* depend on the order that revisions are parsed. If there
|
|
|
|
* are any revs that aren't from these sources, we have a
|
|
|
|
* "git diff C A...B" or "git diff A...B C" case. Or we
|
|
|
|
* could even get "git diff A...B C...E", for instance.
|
|
|
|
*
|
|
|
|
* If we don't have just one merge base, we pick one
|
|
|
|
* at random.
|
|
|
|
*
|
|
|
|
* NB: REV_CMD_LEFT, REV_CMD_RIGHT are also used for A..B,
|
|
|
|
* so we must check for SYMMETRIC_LEFT too. The two arrays
|
|
|
|
* rev->pending.objects and rev->cmdline.rev are parallel.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < rev->cmdline.nr; i++) {
|
|
|
|
struct object *obj = rev->pending.objects[i].item;
|
|
|
|
switch (rev->cmdline.rev[i].whence) {
|
|
|
|
case REV_CMD_MERGE_BASE:
|
|
|
|
if (basepos < 0)
|
|
|
|
basepos = i;
|
|
|
|
basecount++;
|
|
|
|
break; /* do mark all bases */
|
|
|
|
case REV_CMD_LEFT:
|
|
|
|
if (lpos >= 0)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
lpos = i;
|
|
|
|
if (obj->flags & SYMMETRIC_LEFT) {
|
|
|
|
is_symdiff = 1;
|
|
|
|
break; /* do mark A */
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
case REV_CMD_RIGHT:
|
|
|
|
if (rpos >= 0)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
rpos = i;
|
|
|
|
continue; /* don't mark B */
|
|
|
|
case REV_CMD_PARENTS_ONLY:
|
|
|
|
case REV_CMD_REF:
|
|
|
|
case REV_CMD_REV:
|
|
|
|
othercount++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (map == NULL)
|
|
|
|
map = bitmap_new();
|
|
|
|
bitmap_set(map, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Forbid any additional revs for both A...B and A..B.
|
|
|
|
*/
|
|
|
|
if (lpos >= 0 && othercount > 0)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
|
|
|
|
if (!is_symdiff) {
|
|
|
|
bitmap_free(map);
|
|
|
|
sym->warn = 0;
|
|
|
|
sym->skip = NULL;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
sym->left = rev->pending.objects[lpos].name;
|
|
|
|
sym->right = rev->pending.objects[rpos].name;
|
|
|
|
if (basecount == 0)
|
|
|
|
die(_("%s...%s: no merge base"), sym->left, sym->right);
|
diff: check for merge bases before assigning sym->base
In symdiff_prepare(), we iterate over the set of parsed objects to pick
out any symmetric differences, including the left, right, and base
elements. We assign the results into pointers in a "struct symdiff", and
then complain if we didn't find a base, like so:
sym->left = rev->pending.objects[lpos].name;
sym->right = rev->pending.objects[rpos].name;
sym->base = rev->pending.objects[basepos].name;
if (basecount == 0)
die(_("%s...%s: no merge base"), sym->left, sym->right);
But the least lines are backwards. If basecount is 0, then basepos will
be -1, and we will access memory outside of the pending array. This
isn't usually that big a deal, since we don't do anything besides a
single pointer-sized read before exiting anyway, but it does violate the
C standard, and of course memory-checking tools like ASan complain.
Let's put the basecount check first. Note that we haveto split it from
the other assignments, since the die() relies on sym->left and
sym->right having been assigned (this isn't strictly necessary, but is
easier to read than dereferencing the pending array again).
Reported-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-08 06:38:19 +02:00
|
|
|
sym->base = rev->pending.objects[basepos].name;
|
git diff: improve range handling
When git diff is given a symmetric difference A...B, it chooses
some merge base from the two specified commits (as documented).
This fails, however, if there is *no* merge base: instead, you
see the differences between A and B, which is certainly not what
is expected.
Moreover, if additional revisions are specified on the command
line ("git diff A...B C"), the results get a bit weird:
* If there is a symmetric difference merge base, this is used
as the left side of the diff. The last final ref is used as
the right side.
* If there is no merge base, the symmetric status is completely
lost. We will produce a combined diff instead.
Similar weirdness occurs if you use, e.g., "git diff C A...B D".
Likewise, using multiple two-dot ranges, or tossing extra
revision specifiers into the command line with two-dot ranges,
or mixing two and three dot ranges, all produce nonsense.
To avoid all this, add a routine to catch the range cases and
verify that that the arguments make sense. As a side effect,
produce a warning showing *which* merge base is being used when
there are multiple choices; die if there is no merge base.
Signed-off-by: Chris Torek <chris.torek@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-12 18:19:59 +02:00
|
|
|
bitmap_unset(map, basepos); /* unmark the base we want */
|
|
|
|
sym->warn = basecount > 1;
|
|
|
|
sym->skip = map;
|
|
|
|
}
|
|
|
|
|
2006-07-29 07:44:25 +02:00
|
|
|
int cmd_diff(int argc, const char **argv, const char *prefix)
|
2006-04-29 08:20:52 +02:00
|
|
|
{
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
int i;
|
2006-04-29 08:20:52 +02:00
|
|
|
struct rev_info rev;
|
2013-05-25 11:08:04 +02:00
|
|
|
struct object_array ent = OBJECT_ARRAY_INIT;
|
|
|
|
int blobs = 0, paths = 0;
|
2017-05-19 14:57:30 +02:00
|
|
|
struct object_array_entry *blob[2];
|
2013-12-11 10:58:42 +01:00
|
|
|
int nongit = 0, no_index = 0;
|
2007-03-14 01:17:04 +01:00
|
|
|
int result = 0;
|
git diff: improve range handling
When git diff is given a symmetric difference A...B, it chooses
some merge base from the two specified commits (as documented).
This fails, however, if there is *no* merge base: instead, you
see the differences between A and B, which is certainly not what
is expected.
Moreover, if additional revisions are specified on the command
line ("git diff A...B C"), the results get a bit weird:
* If there is a symmetric difference merge base, this is used
as the left side of the diff. The last final ref is used as
the right side.
* If there is no merge base, the symmetric status is completely
lost. We will produce a combined diff instead.
Similar weirdness occurs if you use, e.g., "git diff C A...B D".
Likewise, using multiple two-dot ranges, or tossing extra
revision specifiers into the command line with two-dot ranges,
or mixing two and three dot ranges, all produce nonsense.
To avoid all this, add a routine to catch the range cases and
verify that that the arguments make sense. As a side effect,
produce a warning showing *which* merge base is being used when
there are multiple choices; die if there is no merge base.
Signed-off-by: Chris Torek <chris.torek@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-12 18:19:59 +02:00
|
|
|
struct symdiff sdiff;
|
2006-04-29 08:20:52 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We could get N tree-ish in the rev.pending_objects list.
|
builtin/diff: update usage comment
A comment in cmd_diff() states that if one tree-ish and no blobs are
provided, (the "N=1, M=0" case), it will provide a diff between the tree
and the cache. This is incorrect because a diff happens between the
tree-ish and the working tree. Remove the `--cached` in the comment so
that the correct behavior is shown. Add a new section describing the
"N=1, M=0, --cached" behavior.
Next, describe the "N=0, M=0, --cached" case, similar to the above since
it is undocumented.
Finally, fix some spacing issues. Add spaces between each section for
consistency and readability. Also, change tabs within the comment into
spaces.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-18 12:43:34 +02:00
|
|
|
* Also there could be M blobs there, and P pathspecs. --cached may
|
|
|
|
* also be present.
|
2006-04-29 08:20:52 +02:00
|
|
|
*
|
|
|
|
* N=0, M=0:
|
builtin/diff: update usage comment
A comment in cmd_diff() states that if one tree-ish and no blobs are
provided, (the "N=1, M=0" case), it will provide a diff between the tree
and the cache. This is incorrect because a diff happens between the
tree-ish and the working tree. Remove the `--cached` in the comment so
that the correct behavior is shown. Add a new section describing the
"N=1, M=0, --cached" behavior.
Next, describe the "N=0, M=0, --cached" case, similar to the above since
it is undocumented.
Finally, fix some spacing issues. Add spaces between each section for
consistency and readability. Also, change tabs within the comment into
spaces.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-18 12:43:34 +02:00
|
|
|
* cache vs files (diff-files)
|
|
|
|
*
|
|
|
|
* N=0, M=0, --cached:
|
|
|
|
* HEAD vs cache (diff-index --cached)
|
|
|
|
*
|
2006-04-29 08:20:52 +02:00
|
|
|
* N=0, M=2:
|
|
|
|
* compare two random blobs. P must be zero.
|
builtin/diff: update usage comment
A comment in cmd_diff() states that if one tree-ish and no blobs are
provided, (the "N=1, M=0" case), it will provide a diff between the tree
and the cache. This is incorrect because a diff happens between the
tree-ish and the working tree. Remove the `--cached` in the comment so
that the correct behavior is shown. Add a new section describing the
"N=1, M=0, --cached" behavior.
Next, describe the "N=0, M=0, --cached" case, similar to the above since
it is undocumented.
Finally, fix some spacing issues. Add spaces between each section for
consistency and readability. Also, change tabs within the comment into
spaces.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-18 12:43:34 +02:00
|
|
|
*
|
2006-04-29 08:20:52 +02:00
|
|
|
* N=0, M=1, P=1:
|
builtin/diff: update usage comment
A comment in cmd_diff() states that if one tree-ish and no blobs are
provided, (the "N=1, M=0" case), it will provide a diff between the tree
and the cache. This is incorrect because a diff happens between the
tree-ish and the working tree. Remove the `--cached` in the comment so
that the correct behavior is shown. Add a new section describing the
"N=1, M=0, --cached" behavior.
Next, describe the "N=0, M=0, --cached" case, similar to the above since
it is undocumented.
Finally, fix some spacing issues. Add spaces between each section for
consistency and readability. Also, change tabs within the comment into
spaces.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-18 12:43:34 +02:00
|
|
|
* compare a blob with a working tree file.
|
2006-04-29 08:20:52 +02:00
|
|
|
*
|
|
|
|
* N=1, M=0:
|
builtin/diff: update usage comment
A comment in cmd_diff() states that if one tree-ish and no blobs are
provided, (the "N=1, M=0" case), it will provide a diff between the tree
and the cache. This is incorrect because a diff happens between the
tree-ish and the working tree. Remove the `--cached` in the comment so
that the correct behavior is shown. Add a new section describing the
"N=1, M=0, --cached" behavior.
Next, describe the "N=0, M=0, --cached" case, similar to the above since
it is undocumented.
Finally, fix some spacing issues. Add spaces between each section for
consistency and readability. Also, change tabs within the comment into
spaces.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-18 12:43:34 +02:00
|
|
|
* tree vs files (diff-index)
|
2006-04-29 08:20:52 +02:00
|
|
|
*
|
builtin/diff: update usage comment
A comment in cmd_diff() states that if one tree-ish and no blobs are
provided, (the "N=1, M=0" case), it will provide a diff between the tree
and the cache. This is incorrect because a diff happens between the
tree-ish and the working tree. Remove the `--cached` in the comment so
that the correct behavior is shown. Add a new section describing the
"N=1, M=0, --cached" behavior.
Next, describe the "N=0, M=0, --cached" case, similar to the above since
it is undocumented.
Finally, fix some spacing issues. Add spaces between each section for
consistency and readability. Also, change tabs within the comment into
spaces.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-18 12:43:34 +02:00
|
|
|
* N=1, M=0, --cached:
|
2006-04-29 08:20:52 +02:00
|
|
|
* tree vs cache (diff-index --cached)
|
|
|
|
*
|
|
|
|
* N=2, M=0:
|
|
|
|
* tree vs tree (diff-tree)
|
|
|
|
*
|
2008-05-24 07:28:56 +02:00
|
|
|
* N=0, M=0, P=2:
|
|
|
|
* compare two filesystem entities (aka --no-index).
|
|
|
|
*
|
2006-04-29 08:20:52 +02:00
|
|
|
* Other cases are errors.
|
|
|
|
*/
|
2006-05-04 08:54:34 +02:00
|
|
|
|
2013-12-11 10:58:42 +01:00
|
|
|
/* Were we asked to do --no-index explicitly? */
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
if (!strcmp(argv[i], "--")) {
|
|
|
|
i++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!strcmp(argv[i], "--no-index"))
|
|
|
|
no_index = DIFF_NO_INDEX_EXPLICIT;
|
|
|
|
if (argv[i][0] != '-')
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
diff: always try to set up the repository
If we see an explicit "--no-index", we do not bother calling
setup_git_directory_gently() at all. This means that we may
miss out on reading repo-specific config.
It's arguable whether this is correct or not. If we were
designing from scratch, making "git diff --no-index"
completely ignore the repository makes some sense. But we
are nowhere near scratch, so let's look at the existing
behavior:
1. If you're in the top-level of a repository and run an
explicit "diff --no-index", the config subsystem falls
back to reading ".git/config", and we will respect repo
config.
2. If you're in a subdirectory of a repository, then we
still try to read ".git/config", but it generally
doesn't exist. So "diff --no-index" there does not
respect repo config.
3. If you have $GIT_DIR set in the environment, we read
and respect $GIT_DIR/config,
4. If you run "git diff /tmp/foo /tmp/bar" to get an
implicit no-index, we _do_ run the repository setup,
and set $GIT_DIR (or respect an existing $GIT_DIR
variable). We find the repo config no matter where we
started, and respect it.
So we already respect the repository config in a number of
common cases, and case (2) is the only one that does not.
And at least one of our tests, t4034, depends on case (1)
behaving as it does now (though it is just incidental, not
an explicit test for this behavior).
So let's bring case (2) in line with the others by always
running the repository setup, even with an explicit
"--no-index". We shouldn't need to change anything else, as the
implicit case already handles the prefix.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-13 05:23:36 +02:00
|
|
|
prefix = setup_git_directory_gently(&nongit);
|
2013-12-11 10:58:43 +01:00
|
|
|
|
diff: always try to set up the repository
If we see an explicit "--no-index", we do not bother calling
setup_git_directory_gently() at all. This means that we may
miss out on reading repo-specific config.
It's arguable whether this is correct or not. If we were
designing from scratch, making "git diff --no-index"
completely ignore the repository makes some sense. But we
are nowhere near scratch, so let's look at the existing
behavior:
1. If you're in the top-level of a repository and run an
explicit "diff --no-index", the config subsystem falls
back to reading ".git/config", and we will respect repo
config.
2. If you're in a subdirectory of a repository, then we
still try to read ".git/config", but it generally
doesn't exist. So "diff --no-index" there does not
respect repo config.
3. If you have $GIT_DIR set in the environment, we read
and respect $GIT_DIR/config,
4. If you run "git diff /tmp/foo /tmp/bar" to get an
implicit no-index, we _do_ run the repository setup,
and set $GIT_DIR (or respect an existing $GIT_DIR
variable). We find the repo config no matter where we
started, and respect it.
So we already respect the repository config in a number of
common cases, and case (2) is the only one that does not.
And at least one of our tests, t4034, depends on case (1)
behaving as it does now (though it is just incidental, not
an explicit test for this behavior).
So let's bring case (2) in line with the others by always
running the repository setup, even with an explicit
"--no-index". We shouldn't need to change anything else, as the
implicit case already handles the prefix.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-13 05:23:36 +02:00
|
|
|
if (!no_index) {
|
2016-09-13 05:23:27 +02:00
|
|
|
/*
|
|
|
|
* Treat git diff with at least one path outside of the
|
|
|
|
* repo the same as if the command would have been executed
|
|
|
|
* outside of a git repository. In this case it behaves
|
|
|
|
* the same way as "git diff --no-index <a> <b>", which acts
|
|
|
|
* as a colourful "diff" replacement.
|
|
|
|
*/
|
|
|
|
if (nongit || ((argc == i + 2) &&
|
|
|
|
(!path_inside_repo(prefix, argv[i]) ||
|
|
|
|
!path_inside_repo(prefix, argv[i + 1]))))
|
|
|
|
no_index = DIFF_NO_INDEX_IMPLICIT;
|
|
|
|
}
|
2013-12-11 10:58:42 +01:00
|
|
|
|
2016-02-25 09:59:21 +01:00
|
|
|
init_diff_ui_defaults();
|
2008-05-14 19:46:53 +02:00
|
|
|
git_config(git_diff_ui_config, NULL);
|
2016-05-13 22:41:02 +02:00
|
|
|
precompose_argv(argc, argv);
|
2008-02-18 08:26:03 +01:00
|
|
|
|
2018-09-21 17:57:38 +02:00
|
|
|
repo_init_revisions(the_repository, &rev, prefix);
|
2008-05-24 07:28:56 +02:00
|
|
|
|
2019-02-16 07:57:56 +01:00
|
|
|
/* Set up defaults that will apply to both no-index and regular diffs. */
|
diff --stat: use the full terminal width
Default to the real terminal width for diff --stat output, instead
of the hard-coded 80 columns.
Some projects (especially in Java), have long filename paths, with
nested directories or long individual filenames. When files are
renamed, the filename part in stat output can be almost useless. If
the middle part between { and } is long (because the file was moved to
a completely different directory), then most of the path would be
truncated.
It makes sense to detect and use the full terminal width and display
full filenames if possible.
The are commands like diff, show, and log, which can adapt the output
to the terminal width. There are also commands like format-patch,
whose output should be independent of the terminal width. Since it is
safer to use the 80-column default, the real terminal width is only
used if requested by the calling code by setting diffopts.stat_width=-1.
Normally this value is 0, and can be set by the user only to a
non-negative value, so -1 is safe to use internally.
This patch only changes the diff builtin to use the full terminal width.
Signed-off-by: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-01 13:26:39 +01:00
|
|
|
rev.diffopt.stat_width = -1;
|
2012-03-01 13:26:46 +01:00
|
|
|
rev.diffopt.stat_graph_width = -1;
|
2017-10-31 19:19:11 +01:00
|
|
|
rev.diffopt.flags.allow_external = 1;
|
|
|
|
rev.diffopt.flags.allow_textconv = 1;
|
2008-11-26 18:58:41 +01:00
|
|
|
|
2019-02-16 07:57:56 +01:00
|
|
|
/* If this is a no-index diff, just run it and exit there. */
|
|
|
|
if (no_index)
|
2019-04-25 09:41:12 +02:00
|
|
|
exit(diff_no_index(&rev, no_index == DIFF_NO_INDEX_IMPLICIT,
|
|
|
|
argc, argv));
|
|
|
|
|
2019-02-16 07:57:56 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Otherwise, we are doing the usual "git" diff; set up any
|
|
|
|
* further defaults that apply to regular diffs.
|
|
|
|
*/
|
|
|
|
rev.diffopt.skip_stat_unmatch = !!diff_auto_refresh_index;
|
|
|
|
|
2018-05-26 14:08:44 +02:00
|
|
|
/*
|
|
|
|
* Default to intent-to-add entries invisible in the
|
|
|
|
* index. This makes them show up as new files in diff-files
|
|
|
|
* and not at all in diff-cached.
|
|
|
|
*/
|
|
|
|
rev.diffopt.ita_invisible_in_index = 1;
|
|
|
|
|
2008-05-24 07:28:56 +02:00
|
|
|
if (nongit)
|
2011-02-23 00:41:50 +01:00
|
|
|
die(_("Not a git repository"));
|
2008-05-24 07:28:56 +02:00
|
|
|
argc = setup_revisions(argc, argv, &rev, NULL);
|
2006-07-02 07:15:40 +02:00
|
|
|
if (!rev.diffopt.output_format) {
|
2006-06-24 19:24:14 +02:00
|
|
|
rev.diffopt.output_format = DIFF_FORMAT_PATCH;
|
2012-08-03 14:16:24 +02:00
|
|
|
diff_setup_done(&rev.diffopt);
|
2006-07-02 07:15:40 +02:00
|
|
|
}
|
2008-11-26 18:58:41 +01:00
|
|
|
|
2017-10-31 19:19:11 +01:00
|
|
|
rev.diffopt.flags.recursive = 1;
|
2006-06-24 19:24:14 +02:00
|
|
|
|
fix pager.diff with diff --no-index
git-diff does not rely on the git wrapper to setup its
pager; instead, it sets it up on its own after seeing
whether --quiet or --exit-code has been specified. After
diff_no_index was split off from cmd_diff, commit b3fde6c
(git diff --no-index: default to page like other diff
frontends, 2008-05-26) duplicated the one-liner from
cmd_diff to turn on the pager.
Later, commit 8f0359f (Allow pager of diff command be
enabled/disabled, 2008-07-21) taught the the version in
cmd_diff to respect the pager.diff config, but the version
in diff_no_index was left behind. This meant that
git -c pager.diff=0 diff a b
would not use a pager, but
git -c pager.diff=0 diff --no-index a b
would. Let's fix it by factoring out a common function.
While we're there, let's update the antiquated comment,
which claims that the pager interferes with propagating the
exit code; this has not been the case since ea27a18 (spawn
pager via run_command interface, 2008-07-22).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-06-15 22:29:48 +02:00
|
|
|
setup_diff_pager(&rev.diffopt);
|
2007-08-12 19:46:55 +02:00
|
|
|
|
2008-05-24 07:28:56 +02:00
|
|
|
/*
|
|
|
|
* Do we have --cached and not have a pending object, then
|
2006-04-29 08:20:52 +02:00
|
|
|
* default to HEAD by hand. Eek.
|
|
|
|
*/
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
if (!rev.pending.nr) {
|
2006-04-29 08:20:52 +02:00
|
|
|
int i;
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
if (!strcmp(arg, "--"))
|
|
|
|
break;
|
2008-10-29 17:15:36 +01:00
|
|
|
else if (!strcmp(arg, "--cached") ||
|
|
|
|
!strcmp(arg, "--staged")) {
|
2007-12-11 19:09:04 +01:00
|
|
|
add_head_to_pending(&rev);
|
2011-02-03 07:23:34 +01:00
|
|
|
if (!rev.pending.nr) {
|
|
|
|
struct tree *tree;
|
2018-06-29 03:21:56 +02:00
|
|
|
tree = lookup_tree(the_repository,
|
|
|
|
the_repository->hash_algo->empty_tree);
|
2011-02-03 07:23:34 +01:00
|
|
|
add_pending_object(&rev, &tree->object, "HEAD");
|
|
|
|
}
|
2006-04-29 08:20:52 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
git diff: improve range handling
When git diff is given a symmetric difference A...B, it chooses
some merge base from the two specified commits (as documented).
This fails, however, if there is *no* merge base: instead, you
see the differences between A and B, which is certainly not what
is expected.
Moreover, if additional revisions are specified on the command
line ("git diff A...B C"), the results get a bit weird:
* If there is a symmetric difference merge base, this is used
as the left side of the diff. The last final ref is used as
the right side.
* If there is no merge base, the symmetric status is completely
lost. We will produce a combined diff instead.
Similar weirdness occurs if you use, e.g., "git diff C A...B D".
Likewise, using multiple two-dot ranges, or tossing extra
revision specifiers into the command line with two-dot ranges,
or mixing two and three dot ranges, all produce nonsense.
To avoid all this, add a routine to catch the range cases and
verify that that the arguments make sense. As a side effect,
produce a warning showing *which* merge base is being used when
there are multiple choices; die if there is no merge base.
Signed-off-by: Chris Torek <chris.torek@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-12 18:19:59 +02:00
|
|
|
symdiff_prepare(&rev, &sdiff);
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
for (i = 0; i < rev.pending.nr; i++) {
|
2013-05-25 11:08:05 +02:00
|
|
|
struct object_array_entry *entry = &rev.pending.objects[i];
|
|
|
|
struct object *obj = entry->item;
|
|
|
|
const char *name = entry->name;
|
2006-04-29 08:20:52 +02:00
|
|
|
int flags = (obj->flags & UNINTERESTING);
|
|
|
|
if (!obj->parsed)
|
2018-06-29 03:21:51 +02:00
|
|
|
obj = parse_object(the_repository, &obj->oid);
|
2018-06-29 03:22:05 +02:00
|
|
|
obj = deref_tag(the_repository, obj, NULL, 0);
|
2006-04-29 08:20:52 +02:00
|
|
|
if (!obj)
|
2011-02-23 00:41:50 +01:00
|
|
|
die(_("invalid object '%s' given."), name);
|
2006-07-12 05:45:31 +02:00
|
|
|
if (obj->type == OBJ_COMMIT)
|
2018-04-06 21:09:38 +02:00
|
|
|
obj = &get_commit_tree(((struct commit *)obj))->object;
|
2013-05-25 11:08:06 +02:00
|
|
|
|
2006-07-12 05:45:31 +02:00
|
|
|
if (obj->type == OBJ_TREE) {
|
git diff: improve range handling
When git diff is given a symmetric difference A...B, it chooses
some merge base from the two specified commits (as documented).
This fails, however, if there is *no* merge base: instead, you
see the differences between A and B, which is certainly not what
is expected.
Moreover, if additional revisions are specified on the command
line ("git diff A...B C"), the results get a bit weird:
* If there is a symmetric difference merge base, this is used
as the left side of the diff. The last final ref is used as
the right side.
* If there is no merge base, the symmetric status is completely
lost. We will produce a combined diff instead.
Similar weirdness occurs if you use, e.g., "git diff C A...B D".
Likewise, using multiple two-dot ranges, or tossing extra
revision specifiers into the command line with two-dot ranges,
or mixing two and three dot ranges, all produce nonsense.
To avoid all this, add a routine to catch the range cases and
verify that that the arguments make sense. As a side effect,
produce a warning showing *which* merge base is being used when
there are multiple choices; die if there is no merge base.
Signed-off-by: Chris Torek <chris.torek@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-12 18:19:59 +02:00
|
|
|
if (sdiff.skip && bitmap_get(sdiff.skip, i))
|
|
|
|
continue;
|
2006-04-29 08:20:52 +02:00
|
|
|
obj->flags |= flags;
|
2013-05-25 11:08:04 +02:00
|
|
|
add_object_array(obj, name, &ent);
|
2013-05-25 11:08:06 +02:00
|
|
|
} else if (obj->type == OBJ_BLOB) {
|
2006-04-29 08:20:52 +02:00
|
|
|
if (2 <= blobs)
|
2011-02-23 00:41:50 +01:00
|
|
|
die(_("more than two blobs given: '%s'"), name);
|
2017-05-19 14:57:30 +02:00
|
|
|
blob[blobs] = entry;
|
2006-04-29 08:20:52 +02:00
|
|
|
blobs++;
|
2006-05-04 08:54:34 +02:00
|
|
|
|
2013-05-25 11:08:06 +02:00
|
|
|
} else {
|
|
|
|
die(_("unhandled object '%s' given."), name);
|
2006-04-29 08:20:52 +02:00
|
|
|
}
|
|
|
|
}
|
2013-11-20 02:26:41 +01:00
|
|
|
if (rev.prune_data.nr)
|
2010-12-17 13:43:06 +01:00
|
|
|
paths += rev.prune_data.nr;
|
2006-04-29 08:20:52 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Now, do the arguments look reasonable?
|
|
|
|
*/
|
2013-05-25 11:08:04 +02:00
|
|
|
if (!ent.nr) {
|
2006-04-29 08:20:52 +02:00
|
|
|
switch (blobs) {
|
|
|
|
case 0:
|
2008-05-24 07:28:56 +02:00
|
|
|
result = builtin_diff_files(&rev, argc, argv);
|
2006-04-29 08:20:52 +02:00
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
if (paths != 1)
|
|
|
|
usage(builtin_diff_usage);
|
2013-11-20 02:26:41 +01:00
|
|
|
result = builtin_diff_b_f(&rev, argc, argv, blob);
|
2006-04-29 08:20:52 +02:00
|
|
|
break;
|
|
|
|
case 2:
|
2006-04-29 10:24:49 +02:00
|
|
|
if (paths)
|
|
|
|
usage(builtin_diff_usage);
|
2007-03-14 01:17:04 +01:00
|
|
|
result = builtin_diff_blobs(&rev, argc, argv, blob);
|
2006-04-29 08:20:52 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (blobs)
|
|
|
|
usage(builtin_diff_usage);
|
2013-05-25 11:08:04 +02:00
|
|
|
else if (ent.nr == 1)
|
2007-03-14 01:17:04 +01:00
|
|
|
result = builtin_diff_index(&rev, argc, argv);
|
git diff: improve range handling
When git diff is given a symmetric difference A...B, it chooses
some merge base from the two specified commits (as documented).
This fails, however, if there is *no* merge base: instead, you
see the differences between A and B, which is certainly not what
is expected.
Moreover, if additional revisions are specified on the command
line ("git diff A...B C"), the results get a bit weird:
* If there is a symmetric difference merge base, this is used
as the left side of the diff. The last final ref is used as
the right side.
* If there is no merge base, the symmetric status is completely
lost. We will produce a combined diff instead.
Similar weirdness occurs if you use, e.g., "git diff C A...B D".
Likewise, using multiple two-dot ranges, or tossing extra
revision specifiers into the command line with two-dot ranges,
or mixing two and three dot ranges, all produce nonsense.
To avoid all this, add a routine to catch the range cases and
verify that that the arguments make sense. As a side effect,
produce a warning showing *which* merge base is being used when
there are multiple choices; die if there is no merge base.
Signed-off-by: Chris Torek <chris.torek@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-12 18:19:59 +02:00
|
|
|
else if (ent.nr == 2) {
|
|
|
|
if (sdiff.warn)
|
|
|
|
warning(_("%s...%s: multiple merge bases, using %s"),
|
|
|
|
sdiff.left, sdiff.right, sdiff.base);
|
2013-05-25 11:08:04 +02:00
|
|
|
result = builtin_diff_tree(&rev, argc, argv,
|
|
|
|
&ent.objects[0], &ent.objects[1]);
|
2010-07-13 02:27:46 +02:00
|
|
|
} else
|
2007-03-14 01:17:04 +01:00
|
|
|
result = builtin_diff_combined(&rev, argc, argv,
|
2013-05-25 11:08:04 +02:00
|
|
|
ent.objects, ent.nr);
|
2007-12-14 08:40:27 +01:00
|
|
|
result = diff_result_code(&rev.diffopt, result);
|
2007-08-31 22:13:42 +02:00
|
|
|
if (1 < rev.diffopt.skip_stat_unmatch)
|
|
|
|
refresh_index_quietly();
|
2017-10-01 19:42:08 +02:00
|
|
|
UNLEAK(rev);
|
|
|
|
UNLEAK(ent);
|
|
|
|
UNLEAK(blob);
|
2007-03-14 01:17:04 +01:00
|
|
|
return result;
|
2006-04-29 08:20:52 +02:00
|
|
|
}
|