git-commit-vandalism/builtin-add.c

268 lines
6.3 KiB
C
Raw Normal View History

/*
* "git add" builtin command
*
* Copyright (C) 2006 Linus Torvalds
*/
#include "cache.h"
#include "builtin.h"
#include "dir.h"
git-add --interactive A script to be driven when the user says "git add --interactive" is introduced. When it is run, first it runs its internal 'status' command to show the current status, and then goes into its internactive command loop. The command loop shows the list of subcommands available, and gives a prompt "What now> ". In general, when the prompt ends with a single '>', you can pick only one of the choices given and type return, like this: *** Commands *** 1: status 2: update 3: revert 4: add untracked 5: patch 6: diff 7: quit 8: help What now> 1 You also could say "s" or "sta" or "status" above as long as the choice is unique. The main command loop has 6 subcommands (plus help and quit). * 'status' shows the change between HEAD and index (i.e. what will be committed if you say "git commit"), and between index and working tree files (i.e. what you could stage further before "git commit" using "git-add") for each path. A sample output looks like this: staged unstaged path 1: binary nothing foo.png 2: +403/-35 +1/-1 git-add--interactive.perl It shows that foo.png has differences from HEAD (but that is binary so line count cannot be shown) and there is no difference between indexed copy and the working tree version (if the working tree version were also different, 'binary' would have been shown in place of 'nothing'). The other file, git-add--interactive.perl, has 403 lines added and 35 lines deleted if you commit what is in the index, but working tree file has further modifications (one addition and one deletion). * 'update' shows the status information and gives prompt "Update>>". When the prompt ends with double '>>', you can make more than one selection, concatenated with whitespace or comma. Also you can say ranges. E.g. "2-5 7,9" to choose 2,3,4,5,7,9 from the list. You can say '*' to choose everything. What you chose are then highlighted with '*', like this: staged unstaged path 1: binary nothing foo.png * 2: +403/-35 +1/-1 git-add--interactive.perl To remove selection, prefix the input with - like this: Update>> -2 After making the selection, answer with an empty line to stage the contents of working tree files for selected paths in the index. * 'revert' has a very similar UI to 'update', and the staged information for selected paths are reverted to that of the HEAD version. Reverting new paths makes them untracked. * 'add untracked' has a very similar UI to 'update' and 'revert', and lets you add untracked paths to the index. * 'patch' lets you choose one path out of 'status' like selection. After choosing the path, it presents diff between the index and the working tree file and asks you if you want to stage the change of each hunk. You can say: y - add the change from that hunk to index n - do not add the change from that hunk to index a - add the change from that hunk and all the rest to index d - do not the change from that hunk nor any of the rest to index j - do not decide on this hunk now, and view the next undecided hunk J - do not decide on this hunk now, and view the next hunk k - do not decide on this hunk now, and view the previous undecided hunk K - do not decide on this hunk now, and view the previous hunk After deciding the fate for all hunks, if there is any hunk that was chosen, the index is updated with the selected hunks. * 'diff' lets you review what will be committed (i.e. between HEAD and index). This is still rough, but does everything except a few things I think are needed. * 'patch' should be able to allow splitting a hunk into multiple hunks. * 'patch' does not adjust the line offsets @@ -k,l +m,n @@ in the hunk header. This does not have major problem in practice, but it _should_ do the adjustment. * It does not have any explicit support for a merge in progress; it may not work at all. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-11 05:55:50 +01:00
#include "exec_cmd.h"
#include "cache-tree.h"
#include "diff.h"
#include "diffcore.h"
#include "commit.h"
#include "revision.h"
#include "run-command.h"
#include "parse-options.h"
static const char * const builtin_add_usage[] = {
"git-add [options] [--] <filepattern>...",
NULL
};
static int patch_interactive = 0, add_interactive = 0;
static int take_worktree_changes;
static void prune_directory(struct dir_struct *dir, const char **pathspec, int prefix)
{
char *seen;
int i, specs;
struct dir_entry **src, **dst;
for (specs = 0; pathspec[specs]; specs++)
/* nothing */;
seen = xcalloc(specs, 1);
src = dst = dir->entries;
i = dir->nr;
while (--i >= 0) {
struct dir_entry *entry = *src++;
if (match_pathspec(pathspec, entry->name, entry->len,
prefix, seen))
*dst++ = entry;
}
dir->nr = dst - dir->entries;
for (i = 0; i < specs; i++) {
if (!seen[i] && !file_exists(pathspec[i]))
die("pathspec '%s' did not match any files",
pathspec[i]);
}
free(seen);
}
static void fill_directory(struct dir_struct *dir, const char **pathspec,
int ignored_too)
{
const char *path, *base;
int baselen;
/* Set up the default git porcelain excludes */
memset(dir, 0, sizeof(*dir));
if (!ignored_too) {
dir->collect_ignored = 1;
core.excludesfile clean-up There are inconsistencies in the way commands currently handle the core.excludesfile configuration variable. The problem is the variable is too new to be noticed by anything other than git-add and git-status. * git-ls-files does not notice any of the "ignore" files by default, as it predates the standardized set of ignore files. The calling scripts established the convention to use .git/info/exclude, .gitignore, and later core.excludesfile. * git-add and git-status know about it because they call add_excludes_from_file() directly with their own notion of which standard set of ignore files to use. This is just a stupid duplication of code that need to be updated every time the definition of the standard set of ignore files is changed. * git-read-tree takes --exclude-per-directory=<gitignore>, not because the flexibility was needed. Again, this was because the option predates the standardization of the ignore files. * git-merge-recursive uses hardcoded per-directory .gitignore and nothing else. git-clean (scripted version) does not honor core.* because its call to underlying ls-files does not know about it. git-clean in C (parked in 'pu') doesn't either. We probably could change git-ls-files to use the standard set when no excludes are specified on the command line and ignore processing was asked, or something like that, but that will be a change in semantics and might break people's scripts in a subtle way. I am somewhat reluctant to make such a change. On the other hand, I think it makes perfect sense to fix git-read-tree, git-merge-recursive and git-clean to follow the same rule as other commands. I do not think of a valid use case to give an exclude-per-directory that is nonstandard to read-tree command, outside a "negative" test in the t1004 test script. This patch is the first step to untangle this mess. The next step would be to teach read-tree, merge-recursive and clean (in C) to use setup_standard_excludes(). Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-14 09:05:00 +01:00
setup_standard_excludes(dir);
}
/*
* Calculate common prefix for the pathspec, and
* use that to optimize the directory walk
*/
baselen = common_prefix(pathspec);
path = ".";
base = "";
if (baselen)
path = base = xmemdupz(*pathspec, baselen);
/* Read the directory and prune it */
read_directory(dir, path, base, baselen, pathspec);
if (pathspec)
prune_directory(dir, pathspec, baselen);
}
static void update_callback(struct diff_queue_struct *q,
struct diff_options *opt, void *cbdata)
{
int i, verbose;
verbose = *((int *)cbdata);
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
const char *path = p->one->path;
switch (p->status) {
default:
die("unexpected diff status %c", p->status);
case DIFF_STATUS_UNMERGED:
case DIFF_STATUS_MODIFIED:
case DIFF_STATUS_TYPE_CHANGED:
add_file_to_cache(path, verbose);
break;
case DIFF_STATUS_DELETED:
remove_file_from_cache(path);
if (verbose)
printf("remove '%s'\n", path);
break;
}
}
}
void add_files_to_cache(int verbose, const char *prefix, const char **pathspec)
{
struct rev_info rev;
init_revisions(&rev, prefix);
setup_revisions(0, NULL, &rev, NULL);
rev.prune_data = pathspec;
rev.diffopt.output_format = DIFF_FORMAT_CALLBACK;
rev.diffopt.format_callback = update_callback;
rev.diffopt.format_callback_data = &verbose;
git-add: make the entry stat-clean after re-adding the same contents Earlier in commit 0781b8a9b2fe760fc4ed519a3a26e4b9bd6ccffe (add_file_to_index: skip rehashing if the cached stat already matches), add_file_to_index() were taught not to re-add the path if it already matches the index. The change meant well, but was not executed quite right. It used ie_modified() to see if the file on the work tree is really different from the index, and skipped adding the contents if the function says "not modified". This was wrong. There are three possible comparison results between the index and the file in the work tree: - with lstat(2) we _know_ they are different. E.g. if the length or the owner in the cached stat information is different from the length we just obtained from lstat(2), we can tell the file is modified without looking at the actual contents. - with lstat(2) we _know_ they are the same. The same length, the same owner, the same everything (but this has a twist, as described below). - we cannot tell from lstat(2) information alone and need to go to the filesystem to actually compare. The last case arises from what we call 'racy git' situation, that can be caused with this sequence: $ echo hello >file $ git add file $ echo aeiou >file ;# the same length If the second "echo" is done within the same filesystem timestamp granularity as the first "echo", then the timestamp recorded by "git add" and the timestamp we get from lstat(2) will be the same, and we can mistakenly say the file is not modified. The path is called 'racily clean'. We need to reliably detect racily clean paths are in fact modified. To solve this problem, when we write out the index, we mark the index entry that has the same timestamp as the index file itself (that is the time from the point of view of the filesystem) to tell any later code that does the lstat(2) comparison not to trust the cached stat info, and ie_modified() then actually goes to the filesystem to compare the contents for such a path. That's all good, but it should not be used for this "git add" optimization, as the goal of "git add" is to actually update the path in the index and make it stat-clean. With the false optimization, we did _not_ cause any data loss (after all, what we failed to do was only to update the cached stat information), but it made the following sequence leave the file stat dirty: $ echo hello >file $ git add file $ echo hello >file ;# the same contents $ git add file The solution is not to use ie_modified() which goes to the filesystem to see if it is really clean, but instead use ie_match_stat() with "assume racily clean paths are dirty" option, to force re-adding of such a path. There was another problem with "git add -u". The codepath shares the same issue when adding the paths that are found to be modified, but in addition, it asked "git diff-files" machinery run_diff_files() function (which is "git diff-files") to list the paths that are modified. But "git diff-files" machinery uses the same ie_modified() call so that it does not report racily clean _and_ actually clean paths as modified, which is not what we want. The patch allows the callers of run_diff_files() to pass the same "assume racily clean paths are dirty" option, and makes "git-add -u" codepath to use that option, to discover and re-add racily clean _and_ actually clean paths. We could further optimize on top of this patch to differentiate the case where the path really needs re-adding (i.e. the content of the racily clean entry was indeed different) and the case where only the cached stat information needs to be refreshed (i.e. the racily clean entry was actually clean), but I do not think it is worth it. This patch applies to maint and all the way up. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-10 03:22:52 +01:00
run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
}
static void refresh(int verbose, const char **pathspec)
{
char *seen;
int i, specs;
for (specs = 0; pathspec[specs]; specs++)
/* nothing */;
seen = xcalloc(specs, 1);
if (read_cache() < 0)
die("index file corrupt");
refresh_index(&the_index, verbose ? 0 : REFRESH_QUIET, pathspec, seen);
for (i = 0; i < specs; i++) {
if (!seen[i])
die("pathspec '%s' did not match any files", pathspec[i]);
}
free(seen);
}
static const char **validate_pathspec(int argc, const char **argv, const char *prefix)
{
const char **pathspec = get_pathspec(prefix, argv);
return pathspec;
}
int interactive_add(int argc, const char **argv, const char *prefix)
{
int status, ac;
const char **args;
const char **pathspec = NULL;
if (argc) {
pathspec = validate_pathspec(argc, argv, prefix);
if (!pathspec)
return -1;
}
args = xcalloc(sizeof(const char *), (argc + 4));
ac = 0;
args[ac++] = "add--interactive";
if (patch_interactive)
args[ac++] = "--patch";
args[ac++] = "--";
if (argc) {
memcpy(&(args[ac]), pathspec, sizeof(const char *) * argc);
ac += argc;
}
args[ac] = NULL;
status = run_command_v_opt(args, RUN_GIT_CMD);
free(args);
return status;
}
static struct lock_file lock_file;
static const char ignore_error[] =
"The following paths are ignored by one of your .gitignore files:\n";
static int verbose = 0, show_only = 0, ignored_too = 0, refresh_only = 0;
static struct option builtin_add_options[] = {
OPT__DRY_RUN(&show_only),
OPT__VERBOSE(&verbose),
OPT_GROUP(""),
OPT_BOOLEAN('i', "interactive", &add_interactive, "interactive picking"),
OPT_BOOLEAN('p', "patch", &patch_interactive, "interactive patching"),
OPT_BOOLEAN('f', NULL, &ignored_too, "allow adding otherwise ignored files"),
OPT_BOOLEAN('u', NULL, &take_worktree_changes, "update tracked files"),
OPT_BOOLEAN( 0 , "refresh", &refresh_only, "don't add, only refresh the index"),
OPT_END(),
};
int cmd_add(int argc, const char **argv, const char *prefix)
{
int i, newfd;
const char **pathspec;
struct dir_struct dir;
git-add --interactive A script to be driven when the user says "git add --interactive" is introduced. When it is run, first it runs its internal 'status' command to show the current status, and then goes into its internactive command loop. The command loop shows the list of subcommands available, and gives a prompt "What now> ". In general, when the prompt ends with a single '>', you can pick only one of the choices given and type return, like this: *** Commands *** 1: status 2: update 3: revert 4: add untracked 5: patch 6: diff 7: quit 8: help What now> 1 You also could say "s" or "sta" or "status" above as long as the choice is unique. The main command loop has 6 subcommands (plus help and quit). * 'status' shows the change between HEAD and index (i.e. what will be committed if you say "git commit"), and between index and working tree files (i.e. what you could stage further before "git commit" using "git-add") for each path. A sample output looks like this: staged unstaged path 1: binary nothing foo.png 2: +403/-35 +1/-1 git-add--interactive.perl It shows that foo.png has differences from HEAD (but that is binary so line count cannot be shown) and there is no difference between indexed copy and the working tree version (if the working tree version were also different, 'binary' would have been shown in place of 'nothing'). The other file, git-add--interactive.perl, has 403 lines added and 35 lines deleted if you commit what is in the index, but working tree file has further modifications (one addition and one deletion). * 'update' shows the status information and gives prompt "Update>>". When the prompt ends with double '>>', you can make more than one selection, concatenated with whitespace or comma. Also you can say ranges. E.g. "2-5 7,9" to choose 2,3,4,5,7,9 from the list. You can say '*' to choose everything. What you chose are then highlighted with '*', like this: staged unstaged path 1: binary nothing foo.png * 2: +403/-35 +1/-1 git-add--interactive.perl To remove selection, prefix the input with - like this: Update>> -2 After making the selection, answer with an empty line to stage the contents of working tree files for selected paths in the index. * 'revert' has a very similar UI to 'update', and the staged information for selected paths are reverted to that of the HEAD version. Reverting new paths makes them untracked. * 'add untracked' has a very similar UI to 'update' and 'revert', and lets you add untracked paths to the index. * 'patch' lets you choose one path out of 'status' like selection. After choosing the path, it presents diff between the index and the working tree file and asks you if you want to stage the change of each hunk. You can say: y - add the change from that hunk to index n - do not add the change from that hunk to index a - add the change from that hunk and all the rest to index d - do not the change from that hunk nor any of the rest to index j - do not decide on this hunk now, and view the next undecided hunk J - do not decide on this hunk now, and view the next hunk k - do not decide on this hunk now, and view the previous undecided hunk K - do not decide on this hunk now, and view the previous hunk After deciding the fate for all hunks, if there is any hunk that was chosen, the index is updated with the selected hunks. * 'diff' lets you review what will be committed (i.e. between HEAD and index). This is still rough, but does everything except a few things I think are needed. * 'patch' should be able to allow splitting a hunk into multiple hunks. * 'patch' does not adjust the line offsets @@ -k,l +m,n @@ in the hunk header. This does not have major problem in practice, but it _should_ do the adjustment. * It does not have any explicit support for a merge in progress; it may not work at all. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-11 05:55:50 +01:00
argc = parse_options(argc, argv, builtin_add_options,
builtin_add_usage, 0);
if (patch_interactive)
add_interactive = 1;
if (add_interactive)
exit(interactive_add(argc, argv, prefix));
core.excludesfile clean-up There are inconsistencies in the way commands currently handle the core.excludesfile configuration variable. The problem is the variable is too new to be noticed by anything other than git-add and git-status. * git-ls-files does not notice any of the "ignore" files by default, as it predates the standardized set of ignore files. The calling scripts established the convention to use .git/info/exclude, .gitignore, and later core.excludesfile. * git-add and git-status know about it because they call add_excludes_from_file() directly with their own notion of which standard set of ignore files to use. This is just a stupid duplication of code that need to be updated every time the definition of the standard set of ignore files is changed. * git-read-tree takes --exclude-per-directory=<gitignore>, not because the flexibility was needed. Again, this was because the option predates the standardization of the ignore files. * git-merge-recursive uses hardcoded per-directory .gitignore and nothing else. git-clean (scripted version) does not honor core.* because its call to underlying ls-files does not know about it. git-clean in C (parked in 'pu') doesn't either. We probably could change git-ls-files to use the standard set when no excludes are specified on the command line and ignore processing was asked, or something like that, but that will be a change in semantics and might break people's scripts in a subtle way. I am somewhat reluctant to make such a change. On the other hand, I think it makes perfect sense to fix git-read-tree, git-merge-recursive and git-clean to follow the same rule as other commands. I do not think of a valid use case to give an exclude-per-directory that is nonstandard to read-tree command, outside a "negative" test in the t1004 test script. This patch is the first step to untangle this mess. The next step would be to teach read-tree, merge-recursive and clean (in C) to use setup_standard_excludes(). Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-14 09:05:00 +01:00
git_config(git_default_config);
newfd = hold_locked_index(&lock_file, 1);
if (take_worktree_changes) {
const char **pathspec;
if (read_cache() < 0)
die("index file corrupt");
pathspec = get_pathspec(prefix, argv);
add_files_to_cache(verbose, prefix, pathspec);
goto finish;
}
if (argc == 0) {
fprintf(stderr, "Nothing specified, nothing added.\n");
fprintf(stderr, "Maybe you wanted to say 'git add .'?\n");
return 0;
}
pathspec = get_pathspec(prefix, argv);
if (refresh_only) {
refresh(verbose, pathspec);
goto finish;
}
fill_directory(&dir, pathspec, ignored_too);
if (show_only) {
const char *sep = "", *eof = "";
for (i = 0; i < dir.nr; i++) {
printf("%s%s", sep, dir.entries[i]->name);
sep = " ";
eof = "\n";
}
fputs(eof, stdout);
return 0;
}
if (read_cache() < 0)
die("index file corrupt");
if (dir.ignored_nr) {
fprintf(stderr, ignore_error);
for (i = 0; i < dir.ignored_nr; i++) {
fprintf(stderr, "%s\n", dir.ignored[i]->name);
}
fprintf(stderr, "Use -f if you really want to add them.\n");
die("no files added");
}
for (i = 0; i < dir.nr; i++)
add_file_to_cache(dir.entries[i]->name, verbose);
finish:
if (active_cache_changed) {
if (write_cache(newfd, active_cache, active_nr) ||
commit_locked_index(&lock_file))
die("Unable to write new index file");
}
return 0;
}