2006-04-29 08:20:52 +02:00
|
|
|
/*
|
|
|
|
* Builtin "git diff"
|
|
|
|
*
|
|
|
|
* Copyright (c) 2006 Junio C Hamano
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "blob.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "diffcore.h"
|
|
|
|
#include "revision.h"
|
|
|
|
#include "log-tree.h"
|
|
|
|
#include "builtin.h"
|
|
|
|
|
|
|
|
/* NEEDSWORK: struct object has place for name but we _do_
|
|
|
|
* know mode when we extracted the blob out of a tree, which
|
|
|
|
* we currently lose.
|
|
|
|
*/
|
|
|
|
struct blobinfo {
|
|
|
|
unsigned char sha1[20];
|
|
|
|
const char *name;
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char builtin_diff_usage[] =
|
|
|
|
"diff <options> <rev>{0,2} -- <path>*";
|
|
|
|
|
|
|
|
static int builtin_diff_files(struct rev_info *revs,
|
|
|
|
int argc, const char **argv)
|
|
|
|
{
|
|
|
|
int silent = 0;
|
|
|
|
while (1 < argc) {
|
|
|
|
const char *arg = argv[1];
|
|
|
|
if (!strcmp(arg, "--base"))
|
|
|
|
revs->max_count = 1;
|
|
|
|
else if (!strcmp(arg, "--ours"))
|
|
|
|
revs->max_count = 2;
|
|
|
|
else if (!strcmp(arg, "--theirs"))
|
|
|
|
revs->max_count = 3;
|
|
|
|
else if (!strcmp(arg, "-q"))
|
|
|
|
silent = 1;
|
|
|
|
else if (!strcmp(arg, "--raw"))
|
|
|
|
revs->diffopt.output_format = DIFF_FORMAT_RAW;
|
|
|
|
else
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
argv++; argc--;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Make sure there are NO revision (i.e. pending object) parameter,
|
2006-04-29 10:24:49 +02:00
|
|
|
* specified rev.max_count is reasonable (0 <= n <= 3), and
|
|
|
|
* there is no other revision filtering parameter.
|
2006-04-29 08:20:52 +02:00
|
|
|
*/
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
if (revs->pending.nr ||
|
2006-04-29 08:20:52 +02:00
|
|
|
revs->min_age != -1 ||
|
2006-04-29 10:24:49 +02:00
|
|
|
revs->max_age != -1 ||
|
|
|
|
3 < revs->max_count)
|
2006-04-29 08:20:52 +02:00
|
|
|
usage(builtin_diff_usage);
|
2006-04-29 10:24:49 +02:00
|
|
|
if (revs->max_count < 0 &&
|
|
|
|
(revs->diffopt.output_format == DIFF_FORMAT_PATCH))
|
|
|
|
revs->combine_merges = revs->dense_combined_merges = 1;
|
2006-04-29 08:20:52 +02:00
|
|
|
/*
|
|
|
|
* Backward compatibility wart - "diff-files -s" used to
|
|
|
|
* defeat the common diff option "-s" which asked for
|
|
|
|
* DIFF_FORMAT_NO_OUTPUT.
|
|
|
|
*/
|
|
|
|
if (revs->diffopt.output_format == DIFF_FORMAT_NO_OUTPUT)
|
|
|
|
revs->diffopt.output_format = DIFF_FORMAT_RAW;
|
|
|
|
return run_diff_files(revs, silent);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void stuff_change(struct diff_options *opt,
|
|
|
|
unsigned old_mode, unsigned new_mode,
|
|
|
|
const unsigned char *old_sha1,
|
|
|
|
const unsigned char *new_sha1,
|
|
|
|
const char *old_name,
|
|
|
|
const char *new_name)
|
|
|
|
{
|
|
|
|
struct diff_filespec *one, *two;
|
|
|
|
|
|
|
|
if (memcmp(null_sha1, old_sha1, 20) &&
|
|
|
|
memcmp(null_sha1, new_sha1, 20) &&
|
|
|
|
!memcmp(old_sha1, new_sha1, 20))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (opt->reverse_diff) {
|
|
|
|
unsigned tmp;
|
2006-05-07 16:50:47 +02:00
|
|
|
const unsigned char *tmp_u;
|
2006-04-29 08:20:52 +02:00
|
|
|
const char *tmp_c;
|
|
|
|
tmp = old_mode; old_mode = new_mode; new_mode = tmp;
|
|
|
|
tmp_u = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_u;
|
|
|
|
tmp_c = old_name; old_name = new_name; new_name = tmp_c;
|
|
|
|
}
|
|
|
|
one = alloc_filespec(old_name);
|
|
|
|
two = alloc_filespec(new_name);
|
|
|
|
fill_filespec(one, old_sha1, old_mode);
|
|
|
|
fill_filespec(two, new_sha1, new_mode);
|
|
|
|
|
|
|
|
/* NEEDSWORK: shouldn't this part of diffopt??? */
|
|
|
|
diff_queue(&diff_queued_diff, one, two);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int builtin_diff_b_f(struct rev_info *revs,
|
|
|
|
int argc, const char **argv,
|
|
|
|
struct blobinfo *blob,
|
|
|
|
const char *path)
|
|
|
|
{
|
|
|
|
/* Blob vs file in the working tree*/
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
while (1 < argc) {
|
|
|
|
const char *arg = argv[1];
|
|
|
|
if (!strcmp(arg, "--raw"))
|
|
|
|
revs->diffopt.output_format = DIFF_FORMAT_RAW;
|
|
|
|
else
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
argv++; argc--;
|
|
|
|
}
|
|
|
|
if (lstat(path, &st))
|
|
|
|
die("'%s': %s", path, strerror(errno));
|
|
|
|
if (!(S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)))
|
|
|
|
die("'%s': not a regular file or symlink", path);
|
|
|
|
stuff_change(&revs->diffopt,
|
|
|
|
canon_mode(st.st_mode), canon_mode(st.st_mode),
|
|
|
|
blob[0].sha1, null_sha1,
|
2006-05-18 23:35:37 +02:00
|
|
|
path, path);
|
2006-04-29 08:20:52 +02:00
|
|
|
diffcore_std(&revs->diffopt);
|
|
|
|
diff_flush(&revs->diffopt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int builtin_diff_blobs(struct rev_info *revs,
|
|
|
|
int argc, const char **argv,
|
|
|
|
struct blobinfo *blob)
|
|
|
|
{
|
2006-05-16 04:05:50 +02:00
|
|
|
/* Blobs: the arguments are reversed when setup_revisions()
|
|
|
|
* picked them up.
|
|
|
|
*/
|
2006-04-29 08:20:52 +02:00
|
|
|
unsigned mode = canon_mode(S_IFREG | 0644);
|
|
|
|
|
|
|
|
while (1 < argc) {
|
|
|
|
const char *arg = argv[1];
|
|
|
|
if (!strcmp(arg, "--raw"))
|
|
|
|
revs->diffopt.output_format = DIFF_FORMAT_RAW;
|
|
|
|
else
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
argv++; argc--;
|
|
|
|
}
|
|
|
|
stuff_change(&revs->diffopt,
|
|
|
|
mode, mode,
|
2006-05-16 04:05:50 +02:00
|
|
|
blob[1].sha1, blob[0].sha1,
|
|
|
|
blob[0].name, blob[0].name);
|
2006-04-29 08:20:52 +02:00
|
|
|
diffcore_std(&revs->diffopt);
|
|
|
|
diff_flush(&revs->diffopt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int builtin_diff_index(struct rev_info *revs,
|
|
|
|
int argc, const char **argv)
|
|
|
|
{
|
|
|
|
int cached = 0;
|
|
|
|
while (1 < argc) {
|
|
|
|
const char *arg = argv[1];
|
|
|
|
if (!strcmp(arg, "--cached"))
|
|
|
|
cached = 1;
|
|
|
|
else if (!strcmp(arg, "--raw"))
|
|
|
|
revs->diffopt.output_format = DIFF_FORMAT_RAW;
|
|
|
|
else
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
argv++; argc--;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Make sure there is one revision (i.e. pending object),
|
|
|
|
* and there is no revision filtering parameters.
|
|
|
|
*/
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
if (revs->pending.nr != 1 ||
|
2006-04-29 08:20:52 +02:00
|
|
|
revs->max_count != -1 || revs->min_age != -1 ||
|
|
|
|
revs->max_age != -1)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
return run_diff_index(revs, cached);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int builtin_diff_tree(struct rev_info *revs,
|
|
|
|
int argc, const char **argv,
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
struct object_array_entry *ent)
|
2006-04-29 08:20:52 +02:00
|
|
|
{
|
|
|
|
const unsigned char *(sha1[2]);
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
int swap = 0;
|
2006-04-29 08:20:52 +02:00
|
|
|
while (1 < argc) {
|
|
|
|
const char *arg = argv[1];
|
|
|
|
if (!strcmp(arg, "--raw"))
|
|
|
|
revs->diffopt.output_format = DIFF_FORMAT_RAW;
|
|
|
|
else
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
argv++; argc--;
|
|
|
|
}
|
2006-04-29 10:24:49 +02:00
|
|
|
|
|
|
|
/* We saw two trees, ent[0] and ent[1].
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
* if ent[1] is unintesting, they are swapped
|
2006-04-29 10:24:49 +02:00
|
|
|
*/
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
if (ent[1].item->flags & UNINTERESTING)
|
|
|
|
swap = 1;
|
2006-04-29 08:20:52 +02:00
|
|
|
sha1[swap] = ent[0].item->sha1;
|
|
|
|
sha1[1-swap] = ent[1].item->sha1;
|
|
|
|
diff_tree_sha1(sha1[0], sha1[1], "", &revs->diffopt);
|
|
|
|
log_tree_diff_flush(revs);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-04-29 10:24:49 +02:00
|
|
|
static int builtin_diff_combined(struct rev_info *revs,
|
|
|
|
int argc, const char **argv,
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
struct object_array_entry *ent,
|
2006-04-29 10:24:49 +02:00
|
|
|
int ents)
|
|
|
|
{
|
|
|
|
const unsigned char (*parent)[20];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
while (1 < argc) {
|
|
|
|
const char *arg = argv[1];
|
|
|
|
if (!strcmp(arg, "--raw"))
|
|
|
|
revs->diffopt.output_format = DIFF_FORMAT_RAW;
|
|
|
|
else
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
argv++; argc--;
|
|
|
|
}
|
|
|
|
if (!revs->dense_combined_merges && !revs->combine_merges)
|
|
|
|
revs->dense_combined_merges = revs->combine_merges = 1;
|
|
|
|
parent = xmalloc(ents * sizeof(*parent));
|
|
|
|
/* Again, the revs are all reverse */
|
|
|
|
for (i = 0; i < ents; i++)
|
|
|
|
memcpy(parent + i, ent[ents - 1 - i].item->sha1, 20);
|
|
|
|
diff_tree_combined(parent[0], parent + 1, ents - 1,
|
|
|
|
revs->dense_combined_merges, revs);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-05-06 22:56:38 +02:00
|
|
|
void add_head(struct rev_info *revs)
|
2006-04-29 08:20:52 +02:00
|
|
|
{
|
|
|
|
unsigned char sha1[20];
|
|
|
|
struct object *obj;
|
|
|
|
if (get_sha1("HEAD", sha1))
|
|
|
|
return;
|
|
|
|
obj = parse_object(sha1);
|
|
|
|
if (!obj)
|
|
|
|
return;
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
add_pending_object(revs, obj, "HEAD");
|
2006-04-29 08:20:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int cmd_diff(int argc, const char **argv, char **envp)
|
|
|
|
{
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
int i;
|
2006-04-29 08:20:52 +02:00
|
|
|
struct rev_info rev;
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
struct object_array_entry ent[100];
|
2006-04-29 08:20:52 +02:00
|
|
|
int ents = 0, blobs = 0, paths = 0;
|
|
|
|
const char *path = NULL;
|
|
|
|
struct blobinfo blob[2];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We could get N tree-ish in the rev.pending_objects list.
|
|
|
|
* Also there could be M blobs there, and P pathspecs.
|
|
|
|
*
|
|
|
|
* N=0, M=0:
|
|
|
|
* cache vs files (diff-files)
|
|
|
|
* N=0, M=2:
|
|
|
|
* compare two random blobs. P must be zero.
|
|
|
|
* N=0, M=1, P=1:
|
|
|
|
* compare a blob with a working tree file.
|
|
|
|
*
|
|
|
|
* N=1, M=0:
|
|
|
|
* tree vs cache (diff-index --cached)
|
|
|
|
*
|
|
|
|
* N=2, M=0:
|
|
|
|
* tree vs tree (diff-tree)
|
|
|
|
*
|
|
|
|
* Other cases are errors.
|
|
|
|
*/
|
2006-05-04 08:54:34 +02:00
|
|
|
|
2006-04-29 08:20:52 +02:00
|
|
|
git_config(git_diff_config);
|
|
|
|
init_revisions(&rev);
|
|
|
|
rev.diffopt.output_format = DIFF_FORMAT_PATCH;
|
|
|
|
|
|
|
|
argc = setup_revisions(argc, argv, &rev, NULL);
|
|
|
|
/* Do we have --cached and not have a pending object, then
|
|
|
|
* default to HEAD by hand. Eek.
|
|
|
|
*/
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
if (!rev.pending.nr) {
|
2006-04-29 08:20:52 +02:00
|
|
|
int i;
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
if (!strcmp(arg, "--"))
|
|
|
|
break;
|
|
|
|
else if (!strcmp(arg, "--cached")) {
|
|
|
|
add_head(&rev);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
for (i = 0; i < rev.pending.nr; i++) {
|
|
|
|
struct object_array_entry *list = rev.pending.objects+i;
|
2006-04-29 08:20:52 +02:00
|
|
|
struct object *obj = list->item;
|
|
|
|
const char *name = list->name;
|
|
|
|
int flags = (obj->flags & UNINTERESTING);
|
|
|
|
if (!obj->parsed)
|
|
|
|
obj = parse_object(obj->sha1);
|
|
|
|
obj = deref_tag(obj, NULL, 0);
|
|
|
|
if (!obj)
|
|
|
|
die("invalid object '%s' given.", name);
|
Shrink "struct object" a bit
This shrinks "struct object" by a small amount, by getting rid of the
"struct type *" pointer and replacing it with a 3-bit bitfield instead.
In addition, we merge the bitfields and the "flags" field, which
incidentally should also remove a useless 4-byte padding from the object
when in 64-bit mode.
Now, our "struct object" is still too damn large, but it's now less
obviously bloated, and of the remaining fields, only the "util" (which is
not used by most things) is clearly something that should be eventually
discarded.
This shrinks the "git-rev-list --all" memory use by about 2.5% on the
kernel archive (and, perhaps more importantly, on the larger mozilla
archive). That may not sound like much, but I suspect it's more on a
64-bit platform.
There are other remaining inefficiencies (the parent lists, for example,
probably have horrible malloc overhead), but this was pretty obvious.
Most of the patch is just changing the comparison of the "type" pointer
from one of the constant string pointers to the appropriate new TYPE_xxx
small integer constant.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-15 01:45:13 +02:00
|
|
|
if (obj->type == TYPE_COMMIT)
|
2006-04-29 08:20:52 +02:00
|
|
|
obj = &((struct commit *)obj)->tree->object;
|
Shrink "struct object" a bit
This shrinks "struct object" by a small amount, by getting rid of the
"struct type *" pointer and replacing it with a 3-bit bitfield instead.
In addition, we merge the bitfields and the "flags" field, which
incidentally should also remove a useless 4-byte padding from the object
when in 64-bit mode.
Now, our "struct object" is still too damn large, but it's now less
obviously bloated, and of the remaining fields, only the "util" (which is
not used by most things) is clearly something that should be eventually
discarded.
This shrinks the "git-rev-list --all" memory use by about 2.5% on the
kernel archive (and, perhaps more importantly, on the larger mozilla
archive). That may not sound like much, but I suspect it's more on a
64-bit platform.
There are other remaining inefficiencies (the parent lists, for example,
probably have horrible malloc overhead), but this was pretty obvious.
Most of the patch is just changing the comparison of the "type" pointer
from one of the constant string pointers to the appropriate new TYPE_xxx
small integer constant.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-15 01:45:13 +02:00
|
|
|
if (obj->type == TYPE_TREE) {
|
2006-04-29 10:24:49 +02:00
|
|
|
if (ARRAY_SIZE(ent) <= ents)
|
|
|
|
die("more than %d trees given: '%s'",
|
2006-04-30 09:26:41 +02:00
|
|
|
(int) ARRAY_SIZE(ent), name);
|
2006-04-29 08:20:52 +02:00
|
|
|
obj->flags |= flags;
|
|
|
|
ent[ents].item = obj;
|
|
|
|
ent[ents].name = name;
|
|
|
|
ents++;
|
|
|
|
continue;
|
|
|
|
}
|
Shrink "struct object" a bit
This shrinks "struct object" by a small amount, by getting rid of the
"struct type *" pointer and replacing it with a 3-bit bitfield instead.
In addition, we merge the bitfields and the "flags" field, which
incidentally should also remove a useless 4-byte padding from the object
when in 64-bit mode.
Now, our "struct object" is still too damn large, but it's now less
obviously bloated, and of the remaining fields, only the "util" (which is
not used by most things) is clearly something that should be eventually
discarded.
This shrinks the "git-rev-list --all" memory use by about 2.5% on the
kernel archive (and, perhaps more importantly, on the larger mozilla
archive). That may not sound like much, but I suspect it's more on a
64-bit platform.
There are other remaining inefficiencies (the parent lists, for example,
probably have horrible malloc overhead), but this was pretty obvious.
Most of the patch is just changing the comparison of the "type" pointer
from one of the constant string pointers to the appropriate new TYPE_xxx
small integer constant.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-15 01:45:13 +02:00
|
|
|
if (obj->type == TYPE_BLOB) {
|
2006-04-29 08:20:52 +02:00
|
|
|
if (2 <= blobs)
|
|
|
|
die("more than two blobs given: '%s'", name);
|
|
|
|
memcpy(blob[blobs].sha1, obj->sha1, 20);
|
|
|
|
blob[blobs].name = name;
|
|
|
|
blobs++;
|
|
|
|
continue;
|
2006-05-04 08:54:34 +02:00
|
|
|
|
2006-04-29 08:20:52 +02:00
|
|
|
}
|
|
|
|
die("unhandled object '%s' given.", name);
|
|
|
|
}
|
|
|
|
if (rev.prune_data) {
|
|
|
|
const char **pathspec = rev.prune_data;
|
|
|
|
while (*pathspec) {
|
|
|
|
if (!path)
|
|
|
|
path = *pathspec;
|
|
|
|
paths++;
|
|
|
|
pathspec++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now, do the arguments look reasonable?
|
|
|
|
*/
|
|
|
|
if (!ents) {
|
|
|
|
switch (blobs) {
|
|
|
|
case 0:
|
|
|
|
return builtin_diff_files(&rev, argc, argv);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
if (paths != 1)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
return builtin_diff_b_f(&rev, argc, argv, blob, path);
|
|
|
|
break;
|
|
|
|
case 2:
|
2006-04-29 10:24:49 +02:00
|
|
|
if (paths)
|
|
|
|
usage(builtin_diff_usage);
|
2006-04-29 08:20:52 +02:00
|
|
|
return builtin_diff_blobs(&rev, argc, argv, blob);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (blobs)
|
|
|
|
usage(builtin_diff_usage);
|
|
|
|
else if (ents == 1)
|
|
|
|
return builtin_diff_index(&rev, argc, argv);
|
|
|
|
else if (ents == 2)
|
|
|
|
return builtin_diff_tree(&rev, argc, argv, ent);
|
2006-04-29 10:24:49 +02:00
|
|
|
else
|
|
|
|
return builtin_diff_combined(&rev, argc, argv, ent, ents);
|
2006-04-29 08:20:52 +02:00
|
|
|
usage(builtin_diff_usage);
|
|
|
|
}
|