From 453ec4bdf403c2e89892266a0a660c21680d3f9d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 16 May 2006 19:02:14 -0700 Subject: [PATCH 1/9] libify git-ls-files directory traversal This moves the core directory traversal and filename exclusion logic into the general git library, making it available for other users directly. If we ever want to do "git commit" or "git add" as a built-in (and we do), we want to be able to handle most of git-ls-files as a library. NOTE! Not all of git-ls-files is libified by this. The index matching and pathspec prefix calculation is still in ls-files.c, but this is a big part of it. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- Makefile | 4 +- dir.c | 295 +++++++++++++++++++++++++++++++++++++++++++ dir.h | 50 ++++++++ ls-files.c | 363 +++++------------------------------------------------ 4 files changed, 376 insertions(+), 336 deletions(-) create mode 100644 dir.c create mode 100644 dir.h diff --git a/Makefile b/Makefile index 9ba608c805..f43ac63c93 100644 --- a/Makefile +++ b/Makefile @@ -199,7 +199,7 @@ LIB_H = \ blob.h cache.h commit.h csum-file.h delta.h \ diff.h object.h pack.h pkt-line.h quote.h refs.h \ run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \ - tree-walk.h log-tree.h + tree-walk.h log-tree.h dir.h DIFF_OBJS = \ diff.o diff-lib.o diffcore-break.o diffcore-order.o \ @@ -210,7 +210,7 @@ LIB_OBJS = \ blob.o commit.o connect.o csum-file.o base85.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ - quote.o read-cache.o refs.o run-command.o \ + quote.o read-cache.o refs.o run-command.o dir.o \ server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \ tag.o tree.o usage.o config.o environment.o ctype.o copy.o \ fetch-clone.o revision.o pager.o tree-walk.o xdiff-interface.o \ diff --git a/dir.c b/dir.c new file mode 100644 index 0000000000..3f41a5dfea --- /dev/null +++ b/dir.c @@ -0,0 +1,295 @@ +/* + * This handles recursive filename detection with exclude + * files, index knowledge etc.. + * + * Copyright (C) Linus Torvalds, 2005-2006 + * Junio Hamano, 2005-2006 + */ +#include +#include + +#include "cache.h" +#include "dir.h" + +void add_exclude(const char *string, const char *base, + int baselen, struct exclude_list *which) +{ + struct exclude *x = xmalloc(sizeof (*x)); + + x->pattern = string; + x->base = base; + x->baselen = baselen; + if (which->nr == which->alloc) { + which->alloc = alloc_nr(which->alloc); + which->excludes = realloc(which->excludes, + which->alloc * sizeof(x)); + } + which->excludes[which->nr++] = x; +} + +static int add_excludes_from_file_1(const char *fname, + const char *base, + int baselen, + struct exclude_list *which) +{ + int fd, i; + long size; + char *buf, *entry; + + fd = open(fname, O_RDONLY); + if (fd < 0) + goto err; + size = lseek(fd, 0, SEEK_END); + if (size < 0) + goto err; + lseek(fd, 0, SEEK_SET); + if (size == 0) { + close(fd); + return 0; + } + buf = xmalloc(size+1); + if (read(fd, buf, size) != size) + goto err; + close(fd); + + buf[size++] = '\n'; + entry = buf; + for (i = 0; i < size; i++) { + if (buf[i] == '\n') { + if (entry != buf + i && entry[0] != '#') { + buf[i - (i && buf[i-1] == '\r')] = 0; + add_exclude(entry, base, baselen, which); + } + entry = buf + i + 1; + } + } + return 0; + + err: + if (0 <= fd) + close(fd); + return -1; +} + +void add_excludes_from_file(struct dir_struct *dir, const char *fname) +{ + if (add_excludes_from_file_1(fname, "", 0, + &dir->exclude_list[EXC_FILE]) < 0) + die("cannot use %s as an exclude file", fname); +} + +int push_exclude_per_directory(struct dir_struct *dir, const char *base, int baselen) +{ + char exclude_file[PATH_MAX]; + struct exclude_list *el = &dir->exclude_list[EXC_DIRS]; + int current_nr = el->nr; + + if (dir->exclude_per_dir) { + memcpy(exclude_file, base, baselen); + strcpy(exclude_file + baselen, dir->exclude_per_dir); + add_excludes_from_file_1(exclude_file, base, baselen, el); + } + return current_nr; +} + +static void pop_exclude_per_directory(struct dir_struct *dir, int stk) +{ + struct exclude_list *el = &dir->exclude_list[EXC_DIRS]; + + while (stk < el->nr) + free(el->excludes[--el->nr]); +} + +/* Scan the list and let the last match determines the fate. + * Return 1 for exclude, 0 for include and -1 for undecided. + */ +static int excluded_1(const char *pathname, + int pathlen, + struct exclude_list *el) +{ + int i; + + if (el->nr) { + for (i = el->nr - 1; 0 <= i; i--) { + struct exclude *x = el->excludes[i]; + const char *exclude = x->pattern; + int to_exclude = 1; + + if (*exclude == '!') { + to_exclude = 0; + exclude++; + } + + if (!strchr(exclude, '/')) { + /* match basename */ + const char *basename = strrchr(pathname, '/'); + basename = (basename) ? basename+1 : pathname; + if (fnmatch(exclude, basename, 0) == 0) + return to_exclude; + } + else { + /* match with FNM_PATHNAME: + * exclude has base (baselen long) implicitly + * in front of it. + */ + int baselen = x->baselen; + if (*exclude == '/') + exclude++; + + if (pathlen < baselen || + (baselen && pathname[baselen-1] != '/') || + strncmp(pathname, x->base, baselen)) + continue; + + if (fnmatch(exclude, pathname+baselen, + FNM_PATHNAME) == 0) + return to_exclude; + } + } + } + return -1; /* undecided */ +} + +int excluded(struct dir_struct *dir, const char *pathname) +{ + int pathlen = strlen(pathname); + int st; + + for (st = EXC_CMDL; st <= EXC_FILE; st++) { + switch (excluded_1(pathname, pathlen, &dir->exclude_list[st])) { + case 0: + return 0; + case 1: + return 1; + } + } + return 0; +} + +static void add_name(struct dir_struct *dir, const char *pathname, int len) +{ + struct dir_entry *ent; + + if (cache_name_pos(pathname, len) >= 0) + return; + + if (dir->nr == dir->alloc) { + int alloc = alloc_nr(dir->alloc); + dir->alloc = alloc; + dir->entries = xrealloc(dir->entries, alloc*sizeof(ent)); + } + ent = xmalloc(sizeof(*ent) + len + 1); + ent->len = len; + memcpy(ent->name, pathname, len); + ent->name[len] = 0; + dir->entries[dir->nr++] = ent; +} + +static int dir_exists(const char *dirname, int len) +{ + int pos = cache_name_pos(dirname, len); + if (pos >= 0) + return 1; + pos = -pos-1; + if (pos >= active_nr) /* can't */ + return 0; + return !strncmp(active_cache[pos]->name, dirname, len); +} + +/* + * Read a directory tree. We currently ignore anything but + * directories, regular files and symlinks. That's because git + * doesn't handle them at all yet. Maybe that will change some + * day. + * + * Also, we ignore the name ".git" (even if it is not a directory). + * That likely will not change. + */ +static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen) +{ + DIR *fdir = opendir(path); + int contents = 0; + + if (fdir) { + int exclude_stk; + struct dirent *de; + char fullname[MAXPATHLEN + 1]; + memcpy(fullname, base, baselen); + + exclude_stk = push_exclude_per_directory(dir, base, baselen); + + while ((de = readdir(fdir)) != NULL) { + int len; + + if ((de->d_name[0] == '.') && + (de->d_name[1] == 0 || + !strcmp(de->d_name + 1, ".") || + !strcmp(de->d_name + 1, "git"))) + continue; + len = strlen(de->d_name); + memcpy(fullname + baselen, de->d_name, len+1); + if (excluded(dir, fullname) != dir->show_ignored) { + if (!dir->show_ignored || DTYPE(de) != DT_DIR) { + continue; + } + } + + switch (DTYPE(de)) { + struct stat st; + int subdir, rewind_base; + default: + continue; + case DT_UNKNOWN: + if (lstat(fullname, &st)) + continue; + if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) + break; + if (!S_ISDIR(st.st_mode)) + continue; + /* fallthrough */ + case DT_DIR: + memcpy(fullname + baselen + len, "/", 2); + len++; + rewind_base = dir->nr; + subdir = read_directory_recursive(dir, fullname, fullname, + baselen + len); + if (dir->show_other_directories && + (subdir || !dir->hide_empty_directories) && + !dir_exists(fullname, baselen + len)) { + // Rewind the read subdirectory + while (dir->nr > rewind_base) + free(dir->entries[--dir->nr]); + break; + } + contents += subdir; + continue; + case DT_REG: + case DT_LNK: + break; + } + add_name(dir, fullname, baselen + len); + contents++; + } + closedir(fdir); + + pop_exclude_per_directory(dir, exclude_stk); + } + + return contents; +} + +static int cmp_name(const void *p1, const void *p2) +{ + const struct dir_entry *e1 = *(const struct dir_entry **)p1; + const struct dir_entry *e2 = *(const struct dir_entry **)p2; + + return cache_name_compare(e1->name, e1->len, + e2->name, e2->len); +} + +int read_directory(struct dir_struct *dir, const char *path, const char *base, int baselen) +{ + read_directory_recursive(dir, path, base, baselen); + qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name); + return dir->nr; +} diff --git a/dir.h b/dir.h new file mode 100644 index 0000000000..e8fc441e7f --- /dev/null +++ b/dir.h @@ -0,0 +1,50 @@ +#ifndef DIR_H +#define DIR_H + +/* + * We maintain three exclude pattern lists: + * EXC_CMDL lists patterns explicitly given on the command line. + * EXC_DIRS lists patterns obtained from per-directory ignore files. + * EXC_FILE lists patterns from fallback ignore files. + */ +#define EXC_CMDL 0 +#define EXC_DIRS 1 +#define EXC_FILE 2 + + +struct dir_entry { + int len; + char name[FLEX_ARRAY]; /* more */ +}; + +struct exclude_list { + int nr; + int alloc; + struct exclude { + const char *pattern; + const char *base; + int baselen; + } **excludes; +}; + +struct dir_struct { + int nr, alloc; + unsigned int show_ignored:1, + show_other_directories:1, + hide_empty_directories:1; + struct dir_entry **entries; + + /* Exclude info */ + const char *exclude_per_dir; + struct exclude_list exclude_list[3]; +}; + +extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen); +extern int excluded(struct dir_struct *, const char *); +extern void add_excludes_from_file(struct dir_struct *, const char *fname); +extern void add_exclude(const char *string, const char *base, + int baselen, struct exclude_list *which); +extern int push_exclude_per_directory(struct dir_struct *, + const char *base, int baselen); + +#endif diff --git a/ls-files.c b/ls-files.c index 4a4af1ca3b..89941a3ff8 100644 --- a/ls-files.c +++ b/ls-files.c @@ -5,23 +5,20 @@ * * Copyright (C) Linus Torvalds, 2005 */ -#include #include #include "cache.h" #include "quote.h" +#include "dir.h" static int abbrev = 0; static int show_deleted = 0; static int show_cached = 0; static int show_others = 0; -static int show_ignored = 0; static int show_stage = 0; static int show_unmerged = 0; static int show_modified = 0; static int show_killed = 0; -static int show_other_directories = 0; -static int hide_empty_directories = 0; static int show_valid_bit = 0; static int line_terminator = '\n'; @@ -38,309 +35,6 @@ static const char *tag_other = ""; static const char *tag_killed = ""; static const char *tag_modified = ""; -static const char *exclude_per_dir = NULL; - -/* We maintain three exclude pattern lists: - * EXC_CMDL lists patterns explicitly given on the command line. - * EXC_DIRS lists patterns obtained from per-directory ignore files. - * EXC_FILE lists patterns from fallback ignore files. - */ -#define EXC_CMDL 0 -#define EXC_DIRS 1 -#define EXC_FILE 2 -static struct exclude_list { - int nr; - int alloc; - struct exclude { - const char *pattern; - const char *base; - int baselen; - } **excludes; -} exclude_list[3]; - -static void add_exclude(const char *string, const char *base, - int baselen, struct exclude_list *which) -{ - struct exclude *x = xmalloc(sizeof (*x)); - - x->pattern = string; - x->base = base; - x->baselen = baselen; - if (which->nr == which->alloc) { - which->alloc = alloc_nr(which->alloc); - which->excludes = realloc(which->excludes, - which->alloc * sizeof(x)); - } - which->excludes[which->nr++] = x; -} - -static int add_excludes_from_file_1(const char *fname, - const char *base, - int baselen, - struct exclude_list *which) -{ - int fd, i; - long size; - char *buf, *entry; - - fd = open(fname, O_RDONLY); - if (fd < 0) - goto err; - size = lseek(fd, 0, SEEK_END); - if (size < 0) - goto err; - lseek(fd, 0, SEEK_SET); - if (size == 0) { - close(fd); - return 0; - } - buf = xmalloc(size+1); - if (read(fd, buf, size) != size) - goto err; - close(fd); - - buf[size++] = '\n'; - entry = buf; - for (i = 0; i < size; i++) { - if (buf[i] == '\n') { - if (entry != buf + i && entry[0] != '#') { - buf[i - (i && buf[i-1] == '\r')] = 0; - add_exclude(entry, base, baselen, which); - } - entry = buf + i + 1; - } - } - return 0; - - err: - if (0 <= fd) - close(fd); - return -1; -} - -static void add_excludes_from_file(const char *fname) -{ - if (add_excludes_from_file_1(fname, "", 0, - &exclude_list[EXC_FILE]) < 0) - die("cannot use %s as an exclude file", fname); -} - -static int push_exclude_per_directory(const char *base, int baselen) -{ - char exclude_file[PATH_MAX]; - struct exclude_list *el = &exclude_list[EXC_DIRS]; - int current_nr = el->nr; - - if (exclude_per_dir) { - memcpy(exclude_file, base, baselen); - strcpy(exclude_file + baselen, exclude_per_dir); - add_excludes_from_file_1(exclude_file, base, baselen, el); - } - return current_nr; -} - -static void pop_exclude_per_directory(int stk) -{ - struct exclude_list *el = &exclude_list[EXC_DIRS]; - - while (stk < el->nr) - free(el->excludes[--el->nr]); -} - -/* Scan the list and let the last match determines the fate. - * Return 1 for exclude, 0 for include and -1 for undecided. - */ -static int excluded_1(const char *pathname, - int pathlen, - struct exclude_list *el) -{ - int i; - - if (el->nr) { - for (i = el->nr - 1; 0 <= i; i--) { - struct exclude *x = el->excludes[i]; - const char *exclude = x->pattern; - int to_exclude = 1; - - if (*exclude == '!') { - to_exclude = 0; - exclude++; - } - - if (!strchr(exclude, '/')) { - /* match basename */ - const char *basename = strrchr(pathname, '/'); - basename = (basename) ? basename+1 : pathname; - if (fnmatch(exclude, basename, 0) == 0) - return to_exclude; - } - else { - /* match with FNM_PATHNAME: - * exclude has base (baselen long) implicitly - * in front of it. - */ - int baselen = x->baselen; - if (*exclude == '/') - exclude++; - - if (pathlen < baselen || - (baselen && pathname[baselen-1] != '/') || - strncmp(pathname, x->base, baselen)) - continue; - - if (fnmatch(exclude, pathname+baselen, - FNM_PATHNAME) == 0) - return to_exclude; - } - } - } - return -1; /* undecided */ -} - -static int excluded(const char *pathname) -{ - int pathlen = strlen(pathname); - int st; - - for (st = EXC_CMDL; st <= EXC_FILE; st++) { - switch (excluded_1(pathname, pathlen, &exclude_list[st])) { - case 0: - return 0; - case 1: - return 1; - } - } - return 0; -} - -struct nond_on_fs { - int len; - char name[FLEX_ARRAY]; /* more */ -}; - -static struct nond_on_fs **dir; -static int nr_dir; -static int dir_alloc; - -static void add_name(const char *pathname, int len) -{ - struct nond_on_fs *ent; - - if (cache_name_pos(pathname, len) >= 0) - return; - - if (nr_dir == dir_alloc) { - dir_alloc = alloc_nr(dir_alloc); - dir = xrealloc(dir, dir_alloc*sizeof(ent)); - } - ent = xmalloc(sizeof(*ent) + len + 1); - ent->len = len; - memcpy(ent->name, pathname, len); - ent->name[len] = 0; - dir[nr_dir++] = ent; -} - -static int dir_exists(const char *dirname, int len) -{ - int pos = cache_name_pos(dirname, len); - if (pos >= 0) - return 1; - pos = -pos-1; - if (pos >= active_nr) /* can't */ - return 0; - return !strncmp(active_cache[pos]->name, dirname, len); -} - -/* - * Read a directory tree. We currently ignore anything but - * directories, regular files and symlinks. That's because git - * doesn't handle them at all yet. Maybe that will change some - * day. - * - * Also, we ignore the name ".git" (even if it is not a directory). - * That likely will not change. - */ -static int read_directory(const char *path, const char *base, int baselen) -{ - DIR *fdir = opendir(path); - int contents = 0; - - if (fdir) { - int exclude_stk; - struct dirent *de; - char fullname[MAXPATHLEN + 1]; - memcpy(fullname, base, baselen); - - exclude_stk = push_exclude_per_directory(base, baselen); - - while ((de = readdir(fdir)) != NULL) { - int len; - - if ((de->d_name[0] == '.') && - (de->d_name[1] == 0 || - !strcmp(de->d_name + 1, ".") || - !strcmp(de->d_name + 1, "git"))) - continue; - len = strlen(de->d_name); - memcpy(fullname + baselen, de->d_name, len+1); - if (excluded(fullname) != show_ignored) { - if (!show_ignored || DTYPE(de) != DT_DIR) { - continue; - } - } - - switch (DTYPE(de)) { - struct stat st; - int subdir, rewind_base; - default: - continue; - case DT_UNKNOWN: - if (lstat(fullname, &st)) - continue; - if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) - break; - if (!S_ISDIR(st.st_mode)) - continue; - /* fallthrough */ - case DT_DIR: - memcpy(fullname + baselen + len, "/", 2); - len++; - rewind_base = nr_dir; - subdir = read_directory(fullname, fullname, - baselen + len); - if (show_other_directories && - (subdir || !hide_empty_directories) && - !dir_exists(fullname, baselen + len)) { - // Rewind the read subdirectory - while (nr_dir > rewind_base) - free(dir[--nr_dir]); - break; - } - contents += subdir; - continue; - case DT_REG: - case DT_LNK: - break; - } - add_name(fullname, baselen + len); - contents++; - } - closedir(fdir); - - pop_exclude_per_directory(exclude_stk); - } - - return contents; -} - -static int cmp_name(const void *p1, const void *p2) -{ - const struct nond_on_fs *e1 = *(const struct nond_on_fs **)p1; - const struct nond_on_fs *e2 = *(const struct nond_on_fs **)p2; - - return cache_name_compare(e1->name, e1->len, - e2->name, e2->len); -} /* * Match a pathspec against a filename. The first "len" characters @@ -377,7 +71,7 @@ static int match(const char **spec, char *ps_matched, return 0; } -static void show_dir_entry(const char *tag, struct nond_on_fs *ent) +static void show_dir_entry(const char *tag, struct dir_entry *ent) { int len = prefix_len; int offset = prefix_offset; @@ -393,14 +87,14 @@ static void show_dir_entry(const char *tag, struct nond_on_fs *ent) putchar(line_terminator); } -static void show_other_files(void) +static void show_other_files(struct dir_struct *dir) { int i; - for (i = 0; i < nr_dir; i++) { + for (i = 0; i < dir->nr; i++) { /* We should not have a matching entry, but we * may have an unmerged entry for this path. */ - struct nond_on_fs *ent = dir[i]; + struct dir_entry *ent = dir->entries[i]; int pos = cache_name_pos(ent->name, ent->len); struct cache_entry *ce; if (0 <= pos) @@ -416,11 +110,11 @@ static void show_other_files(void) } } -static void show_killed_files(void) +static void show_killed_files(struct dir_struct *dir) { int i; - for (i = 0; i < nr_dir; i++) { - struct nond_on_fs *ent = dir[i]; + for (i = 0; i < dir->nr; i++) { + struct dir_entry *ent = dir->entries[i]; char *cp, *sp; int pos, len, killed = 0; @@ -461,7 +155,7 @@ static void show_killed_files(void) } } if (killed) - show_dir_entry(tag_killed, dir[i]); + show_dir_entry(tag_killed, dir->entries[i]); } } @@ -512,7 +206,7 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce) } } -static void show_files(void) +static void show_files(struct dir_struct *dir) { int i; @@ -523,14 +217,14 @@ static void show_files(void) if (baselen) { path = base = prefix; - if (exclude_per_dir) { + if (dir->exclude_per_dir) { char *p, *pp = xmalloc(baselen+1); memcpy(pp, prefix, baselen+1); p = pp; while (1) { char save = *p; *p = 0; - push_exclude_per_directory(pp, p-pp); + push_exclude_per_directory(dir, pp, p-pp); *p++ = save; if (!save) break; @@ -543,17 +237,16 @@ static void show_files(void) free(pp); } } - read_directory(path, base, baselen); - qsort(dir, nr_dir, sizeof(struct nond_on_fs *), cmp_name); + read_directory(dir, path, base, baselen); if (show_others) - show_other_files(); + show_other_files(dir); if (show_killed) - show_killed_files(); + show_killed_files(dir); } if (show_cached | show_stage) { for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; - if (excluded(ce->name) != show_ignored) + if (excluded(dir, ce->name) != dir->show_ignored) continue; if (show_unmerged && !ce_stage(ce)) continue; @@ -565,7 +258,7 @@ static void show_files(void) struct cache_entry *ce = active_cache[i]; struct stat st; int err; - if (excluded(ce->name) != show_ignored) + if (excluded(dir, ce->name) != dir->show_ignored) continue; err = lstat(ce->name, &st); if (show_deleted && err) @@ -652,7 +345,9 @@ int main(int argc, const char **argv) { int i; int exc_given = 0; + struct dir_struct dir; + memset(&dir, 0, sizeof(dir)); prefix = setup_git_directory(); if (prefix) prefix_offset = strlen(prefix); @@ -697,7 +392,7 @@ int main(int argc, const char **argv) continue; } if (!strcmp(arg, "-i") || !strcmp(arg, "--ignored")) { - show_ignored = 1; + dir.show_ignored = 1; continue; } if (!strcmp(arg, "-s") || !strcmp(arg, "--stage")) { @@ -709,11 +404,11 @@ int main(int argc, const char **argv) continue; } if (!strcmp(arg, "--directory")) { - show_other_directories = 1; + dir.show_other_directories = 1; continue; } if (!strcmp(arg, "--no-empty-directory")) { - hide_empty_directories = 1; + dir.hide_empty_directories = 1; continue; } if (!strcmp(arg, "-u") || !strcmp(arg, "--unmerged")) { @@ -726,27 +421,27 @@ int main(int argc, const char **argv) } if (!strcmp(arg, "-x") && i+1 < argc) { exc_given = 1; - add_exclude(argv[++i], "", 0, &exclude_list[EXC_CMDL]); + add_exclude(argv[++i], "", 0, &dir.exclude_list[EXC_CMDL]); continue; } if (!strncmp(arg, "--exclude=", 10)) { exc_given = 1; - add_exclude(arg+10, "", 0, &exclude_list[EXC_CMDL]); + add_exclude(arg+10, "", 0, &dir.exclude_list[EXC_CMDL]); continue; } if (!strcmp(arg, "-X") && i+1 < argc) { exc_given = 1; - add_excludes_from_file(argv[++i]); + add_excludes_from_file(&dir, argv[++i]); continue; } if (!strncmp(arg, "--exclude-from=", 15)) { exc_given = 1; - add_excludes_from_file(arg+15); + add_excludes_from_file(&dir, arg+15); continue; } if (!strncmp(arg, "--exclude-per-directory=", 24)) { exc_given = 1; - exclude_per_dir = arg + 24; + dir.exclude_per_dir = arg + 24; continue; } if (!strcmp(arg, "--full-name")) { @@ -788,7 +483,7 @@ int main(int argc, const char **argv) ps_matched = xcalloc(1, num); } - if (show_ignored && !exc_given) { + if (dir.show_ignored && !exc_given) { fprintf(stderr, "%s: --ignored needs some exclude pattern\n", argv[0]); exit(1); @@ -802,7 +497,7 @@ int main(int argc, const char **argv) read_cache(); if (prefix) prune_cache(); - show_files(); + show_files(&dir); if (ps_matched) { /* We need to make sure all pathspec matched otherwise From b4189aa84873718f80c62846cd53e803b5f72362 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 16 May 2006 19:46:16 -0700 Subject: [PATCH 2/9] Clean up git-ls-file directory walking library interface This moves the code to add the per-directory ignore files for the base directory into the library routine. That not only allows us to turn the function push_exclude_per_directory() static again, it also simplifies the library interface a lot (the caller no longer needs to worry about any of the per-directory exclude files at all). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 28 +++++++++++++++++++++++++++- dir.h | 2 -- ls-files.c | 22 +--------------------- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/dir.c b/dir.c index 3f41a5dfea..d40b62e1c1 100644 --- a/dir.c +++ b/dir.c @@ -78,7 +78,7 @@ void add_excludes_from_file(struct dir_struct *dir, const char *fname) die("cannot use %s as an exclude file", fname); } -int push_exclude_per_directory(struct dir_struct *dir, const char *base, int baselen) +static int push_exclude_per_directory(struct dir_struct *dir, const char *base, int baselen) { char exclude_file[PATH_MAX]; struct exclude_list *el = &dir->exclude_list[EXC_DIRS]; @@ -289,6 +289,32 @@ static int cmp_name(const void *p1, const void *p2) int read_directory(struct dir_struct *dir, const char *path, const char *base, int baselen) { + /* + * Make sure to do the per-directory exclude for all the + * directories leading up to our base. + */ + if (baselen) { + if (dir->exclude_per_dir) { + char *p, *pp = xmalloc(baselen+1); + memcpy(pp, base, baselen+1); + p = pp; + while (1) { + char save = *p; + *p = 0; + push_exclude_per_directory(dir, pp, p-pp); + *p++ = save; + if (!save) + break; + p = strchr(p, '/'); + if (p) + p++; + else + p = pp + baselen; + } + free(pp); + } + } + read_directory_recursive(dir, path, base, baselen); qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name); return dir->nr; diff --git a/dir.h b/dir.h index e8fc441e7f..4f65f57085 100644 --- a/dir.h +++ b/dir.h @@ -44,7 +44,5 @@ extern int excluded(struct dir_struct *, const char *); extern void add_excludes_from_file(struct dir_struct *, const char *fname); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *which); -extern int push_exclude_per_directory(struct dir_struct *, - const char *base, int baselen); #endif diff --git a/ls-files.c b/ls-files.c index 89941a3ff8..dfe1481a35 100644 --- a/ls-files.c +++ b/ls-files.c @@ -215,28 +215,8 @@ static void show_files(struct dir_struct *dir) const char *path = ".", *base = ""; int baselen = prefix_len; - if (baselen) { + if (baselen) path = base = prefix; - if (dir->exclude_per_dir) { - char *p, *pp = xmalloc(baselen+1); - memcpy(pp, prefix, baselen+1); - p = pp; - while (1) { - char save = *p; - *p = 0; - push_exclude_per_directory(dir, pp, p-pp); - *p++ = save; - if (!save) - break; - p = strchr(p, '/'); - if (p) - p++; - else - p = pp + baselen; - } - free(pp); - } - } read_directory(dir, path, base, baselen); if (show_others) show_other_files(dir); From 0d78153952e70c21e94dc6b7eefcb2ac5337a902 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 May 2006 09:33:32 -0700 Subject: [PATCH 3/9] Do "git add" as a builtin First try. Let's see how well this works. In many ways, the hard parts of "git commit" are not so different from this, and a builtin commit would share a lot of the code, I think. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- Makefile | 2 +- builtin-add.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++ builtin.h | 1 + git.c | 1 + 4 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 builtin-add.c diff --git a/Makefile b/Makefile index f43ac63c93..e6f7794ad0 100644 --- a/Makefile +++ b/Makefile @@ -218,7 +218,7 @@ LIB_OBJS = \ BUILTIN_OBJS = \ builtin-log.o builtin-help.o builtin-count.o builtin-diff.o builtin-push.o \ - builtin-grep.o + builtin-grep.o builtin-add.o GITLIBS = $(LIB_FILE) $(XDIFF_LIB) LIBS = $(GITLIBS) -lz diff --git a/builtin-add.c b/builtin-add.c new file mode 100644 index 0000000000..d225988e53 --- /dev/null +++ b/builtin-add.c @@ -0,0 +1,228 @@ +/* + * "git add" builtin command + * + * Copyright (C) 2006 Linus Torvalds + */ +#include + +#include "cache.h" +#include "builtin.h" +#include "dir.h" + +static const char builtin_add_usage[] = +"git-add [-n] [-v] ..."; + +static int common_prefix(const char **pathspec) +{ + const char *path, *slash, *next; + int prefix; + + if (!pathspec) + return 0; + + path = *pathspec; + slash = strrchr(path, '/'); + if (!slash) + return 0; + + prefix = slash - path + 1; + while ((next = *++pathspec) != NULL) { + int len = strlen(next); + if (len >= prefix && !memcmp(path, next, len)) + continue; + for (;;) { + if (!len) + return 0; + if (next[--len] != '/') + continue; + if (memcmp(path, next, len+1)) + continue; + prefix = len + 1; + break; + } + } + return prefix; +} + +static int match(const char **pathspec, const char *name, int namelen, int prefix) +{ + const char *match; + + name += prefix; + namelen -= prefix; + + while ((match = *pathspec++) != NULL) { + int matchlen; + + match += prefix; + matchlen = strlen(match); + if (!matchlen) + return 1; + if (!strncmp(match, name, matchlen)) { + if (match[matchlen-1] == '/') + return 1; + switch (name[matchlen]) { + case '/': case '\0': + return 1; + } + } + if (!fnmatch(match, name, 0)) + return 1; + } + return 0; +} + +static void prune_directory(struct dir_struct *dir, const char **pathspec, int prefix) +{ + int i; + struct dir_entry **src, **dst; + + src = dst = dir->entries; + i = dir->nr; + while (--i >= 0) { + struct dir_entry *entry = *src++; + if (!match(pathspec, entry->name, entry->len, prefix)) { + free(entry); + continue; + } + *dst++ = entry; + } + dir->nr = dst - dir->entries; +} + +static void fill_directory(struct dir_struct *dir, const char **pathspec) +{ + const char *path, *base; + int baselen; + + /* Set up the default git porcelain excludes */ + memset(dir, 0, sizeof(*dir)); + dir->exclude_per_dir = ".gitignore"; + path = git_path("info/exclude"); + if (!access(path, R_OK)) + add_excludes_from_file(dir, path); + + /* + * Calculate common prefix for the pathspec, and + * use that to optimize the directory walk + */ + baselen = common_prefix(pathspec); + path = "."; + base = ""; + if (baselen) { + char *common = xmalloc(baselen + 1); + common = xmalloc(baselen + 1); + memcpy(common, *pathspec, baselen); + common[baselen] = 0; + path = base = common; + } + + /* Read the directory and prune it */ + read_directory(dir, path, base, baselen); + if (pathspec) + prune_directory(dir, pathspec, baselen); +} + +static int add_file_to_index(const char *path, int verbose) +{ + int size, namelen; + struct stat st; + struct cache_entry *ce; + + if (lstat(path, &st)) + die("%s: unable to stat (%s)", path, strerror(errno)); + + if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) + die("%s: can only add regular files or symbolic links", path); + + namelen = strlen(path); + size = cache_entry_size(namelen); + ce = xcalloc(1, size); + memcpy(ce->name, path, namelen); + ce->ce_flags = htons(namelen); + fill_stat_cache_info(ce, &st); + + ce->ce_mode = create_ce_mode(st.st_mode); + if (!trust_executable_bit) { + /* If there is an existing entry, pick the mode bits + * from it. + */ + int pos = cache_name_pos(path, namelen); + if (pos >= 0) + ce->ce_mode = active_cache[pos]->ce_mode; + } + + if (index_path(ce->sha1, path, &st, 1)) + die("unable to index file %s", path); + if (add_cache_entry(ce, ADD_CACHE_OK_TO_ADD)) + die("unable to add %s to index",path); + if (verbose) + printf("add '%s'\n", path); + return 0; +} + +static struct cache_file cache_file; + +int cmd_add(int argc, const char **argv, char **envp) +{ + int i, newfd; + int verbose = 0, show_only = 0; + const char *prefix = setup_git_directory(); + const char **pathspec; + struct dir_struct dir; + + git_config(git_default_config); + + newfd = hold_index_file_for_update(&cache_file, get_index_file()); + if (newfd < 0) + die("unable to create new cachefile"); + + if (read_cache() < 0) + die("index file corrupt"); + + for (i = 1; i < argc; i++) { + const char *arg = argv[i]; + + if (arg[0] != '-') + break; + if (!strcmp(arg, "--")) { + i++; + break; + } + if (!strcmp(arg, "-n")) { + show_only = 1; + continue; + } + if (!strcmp(arg, "-v")) { + verbose = 1; + continue; + } + die(builtin_add_usage); + } + git_config(git_default_config); + pathspec = get_pathspec(prefix, argv + i); + + fill_directory(&dir, pathspec); + + if (show_only) { + const char *sep = "", *eof = ""; + for (i = 0; i < dir.nr; i++) { + printf("%s%s", sep, dir.entries[i]->name); + sep = " "; + eof = "\n"; + } + fputs(eof, stdout); + return 0; + } + + for (i = 0; i < dir.nr; i++) + add_file_to_index(dir.entries[i]->name, verbose); + + if (active_cache_changed) { + if (write_cache(newfd, active_cache, active_nr) || + commit_index_file(&cache_file)) + die("Unable to write new index file"); + } + + return 0; +} diff --git a/builtin.h b/builtin.h index 7744f7d2f6..1b77f4b0ca 100644 --- a/builtin.h +++ b/builtin.h @@ -24,5 +24,6 @@ extern int cmd_count_objects(int argc, const char **argv, char **envp); extern int cmd_push(int argc, const char **argv, char **envp); extern int cmd_grep(int argc, const char **argv, char **envp); +extern int cmd_add(int argc, const char **argv, char **envp); #endif diff --git a/git.c b/git.c index a94d9ee5a7..fac46af057 100644 --- a/git.c +++ b/git.c @@ -50,6 +50,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "count-objects", cmd_count_objects }, { "diff", cmd_diff }, { "grep", cmd_grep }, + { "add", cmd_add }, }; int i; From f25933987f29070e9cd79dfddf03018010e82e80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 May 2006 13:23:19 -0700 Subject: [PATCH 4/9] builtin-add: warn on unmatched pathspecs This is in the same spirit as what bba319b5 and 45e48120 tried to do to help users. A command such as "git add Documentaiton" with misspelled pathspecs would give a friendly reminder with this. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-add.c | 77 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/builtin-add.c b/builtin-add.c index d225988e53..089c7a89b3 100644 --- a/builtin-add.c +++ b/builtin-add.c @@ -44,50 +44,89 @@ static int common_prefix(const char **pathspec) return prefix; } -static int match(const char **pathspec, const char *name, int namelen, int prefix) +static int match_one(const char *match, const char *name, int namelen) { + int matchlen; + + /* If the match was just the prefix, we matched */ + matchlen = strlen(match); + if (!matchlen) + return 1; + + /* + * If we don't match the matchstring exactly, + * we need to match by fnmatch + */ + if (strncmp(match, name, matchlen)) + return !fnmatch(match, name, 0); + + /* + * If we did match the string exactly, we still + * need to make sure that it happened on a path + * component boundary (ie either the last character + * of the match was '/', or the next character of + * the name was '/' or the terminating NUL. + */ + return match[matchlen-1] == '/' || + name[matchlen] == '/' || + !name[matchlen]; +} + +static int match(const char **pathspec, const char *name, int namelen, int prefix, char *seen) +{ + int retval; const char *match; name += prefix; namelen -= prefix; - while ((match = *pathspec++) != NULL) { - int matchlen; - + for (retval = 0; (match = *pathspec++) != NULL; seen++) { + if (retval & *seen) + continue; match += prefix; - matchlen = strlen(match); - if (!matchlen) - return 1; - if (!strncmp(match, name, matchlen)) { - if (match[matchlen-1] == '/') - return 1; - switch (name[matchlen]) { - case '/': case '\0': - return 1; - } + if (match_one(match, name, namelen)) { + retval = 1; + *seen = 1; } - if (!fnmatch(match, name, 0)) - return 1; } - return 0; + return retval; } static void prune_directory(struct dir_struct *dir, const char **pathspec, int prefix) { - int i; + char *seen; + int i, specs; struct dir_entry **src, **dst; + for (specs = 0; pathspec[specs]; specs++) + /* nothing */; + seen = xmalloc(specs); + memset(seen, 0, specs); + src = dst = dir->entries; i = dir->nr; while (--i >= 0) { struct dir_entry *entry = *src++; - if (!match(pathspec, entry->name, entry->len, prefix)) { + if (!match(pathspec, entry->name, entry->len, prefix, seen)) { free(entry); continue; } *dst++ = entry; } dir->nr = dst - dir->entries; + + for (i = 0; i < specs; i++) { + struct stat st; + const char *match; + if (seen[i]) + continue; + + /* Existing file? We must have ignored it */ + match = pathspec[i]; + if (!lstat(match, &st)) + continue; + die("pathspec '%s' did not match any files", match); + } } static void fill_directory(struct dir_struct *dir, const char **pathspec) From c699f9b924d763b762df932769d91e3d053634a8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 May 2006 21:21:04 -0700 Subject: [PATCH 5/9] Remove old "git-add.sh" remnants Repeat after me: "It's now a built-in" Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- Makefile | 4 ++-- git-add.sh | 56 ------------------------------------------------------ 2 files changed, 2 insertions(+), 58 deletions(-) delete mode 100755 git-add.sh diff --git a/Makefile b/Makefile index e6f7794ad0..48e2a9cb22 100644 --- a/Makefile +++ b/Makefile @@ -113,7 +113,7 @@ SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ ### --- END CONFIGURATION SECTION --- SCRIPT_SH = \ - git-add.sh git-bisect.sh git-branch.sh git-checkout.sh \ + git-bisect.sh git-branch.sh git-checkout.sh \ git-cherry.sh git-clean.sh git-clone.sh git-commit.sh \ git-fetch.sh \ git-format-patch.sh git-ls-remote.sh \ @@ -170,7 +170,7 @@ PROGRAMS = \ BUILT_INS = git-log$X git-whatchanged$X git-show$X \ git-count-objects$X git-diff$X git-push$X \ - git-grep$X + git-grep$X git-add$X # what 'all' will build and 'install' will install, in gitexecdir ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS) diff --git a/git-add.sh b/git-add.sh deleted file mode 100755 index d6a4bc7d09..0000000000 --- a/git-add.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh - -USAGE='[-n] [-v] ...' -SUBDIRECTORY_OK='Yes' -. git-sh-setup - -show_only= -verbose= -while : ; do - case "$1" in - -n) - show_only=true - ;; - -v) - verbose=--verbose - ;; - --) - shift - break - ;; - -*) - usage - ;; - *) - break - ;; - esac - shift -done - -# Check misspelled pathspec -case "$#" in -0) ;; -*) - git-ls-files --error-unmatch --others --cached -- "$@" >/dev/null || { - echo >&2 "Maybe you misspelled it?" - exit 1 - } - ;; -esac - -if test -f "$GIT_DIR/info/exclude" -then - git-ls-files -z \ - --exclude-from="$GIT_DIR/info/exclude" \ - --others --exclude-per-directory=.gitignore -- "$@" -else - git-ls-files -z \ - --others --exclude-per-directory=.gitignore -- "$@" -fi | -case "$show_only" in -true) - xargs -0 echo ;; -*) - git-update-index --add $verbose -z --stdin ;; -esac From e8f990b4e4b56f214138cc475c19e5a253e9148e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 18 May 2006 01:29:36 -0700 Subject: [PATCH 6/9] builtin-add: fix unmatched pathspec warnings. "git add Documentation/" when Documentation directory exists does not barf (as it should not), but "git add ." barfed when it did not add anything. This was because we checked for the path prefix ("Documentation/" in the former case, and an empty string in the latter case) for existence, and lstat("", &st) would say "Huh?". Signed-off-by: Junio C Hamano --- builtin-add.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin-add.c b/builtin-add.c index 089c7a89b3..37243f8c36 100644 --- a/builtin-add.c +++ b/builtin-add.c @@ -123,7 +123,7 @@ static void prune_directory(struct dir_struct *dir, const char **pathspec, int p /* Existing file? We must have ignored it */ match = pathspec[i]; - if (!lstat(match, &st)) + if (!match[0] || !lstat(match, &st)) continue; die("pathspec '%s' did not match any files", match); } From 8dcf39c46e2931ca02b18b1ea3a6b21f446d8de8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 May 2006 12:07:31 -0700 Subject: [PATCH 7/9] Prevent bogus paths from being added to the index. With this one, it's now a fatal error to try to add a pathname that cannot be added with "git add", i.e. [torvalds@g5 git]$ git add .git/config fatal: unable to add .git/config to index and [torvalds@g5 git]$ git add foo/../bar fatal: unable to add foo/../bar to index instead of the old "Ignoring path xyz" warning that would end up silently succeeding on any other paths. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- cache.h | 1 + read-cache.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++ update-index.c | 64 ------------------------------------------------ 3 files changed, 67 insertions(+), 64 deletions(-) diff --git a/cache.h b/cache.h index 4b7a439253..89aa4f6c7e 100644 --- a/cache.h +++ b/cache.h @@ -142,6 +142,7 @@ extern void verify_non_filename(const char *prefix, const char *name); /* Initialize and use the cache information */ extern int read_cache(void); extern int write_cache(int newfd, struct cache_entry **cache, int entries); +extern int verify_path(const char *path); extern int cache_name_pos(const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ diff --git a/read-cache.c b/read-cache.c index a917ab0cfe..e8fa6d0812 100644 --- a/read-cache.c +++ b/read-cache.c @@ -331,6 +331,70 @@ int ce_path_match(const struct cache_entry *ce, const char **pathspec) return 0; } +/* + * We fundamentally don't like some paths: we don't want + * dot or dot-dot anywhere, and for obvious reasons don't + * want to recurse into ".git" either. + * + * Also, we don't want double slashes or slashes at the + * end that can make pathnames ambiguous. + */ +static int verify_dotfile(const char *rest) +{ + /* + * The first character was '.', but that + * has already been discarded, we now test + * the rest. + */ + switch (*rest) { + /* "." is not allowed */ + case '\0': case '/': + return 0; + + /* + * ".git" followed by NUL or slash is bad. This + * shares the path end test with the ".." case. + */ + case 'g': + if (rest[1] != 'i') + break; + if (rest[2] != 't') + break; + rest += 2; + /* fallthrough */ + case '.': + if (rest[1] == '\0' || rest[1] == '/') + return 0; + } + return 1; +} + +int verify_path(const char *path) +{ + char c; + + goto inside; + for (;;) { + if (!c) + return 1; + if (c == '/') { +inside: + c = *path++; + switch (c) { + default: + continue; + case '/': case '\0': + break; + case '.': + if (verify_dotfile(path)) + continue; + } + return 0; + } + c = *path++; + } +} + /* * Do we have another file that has the beginning components being a * proper superset of the name we're trying to add? @@ -472,6 +536,8 @@ int add_cache_entry(struct cache_entry *ce, int option) if (!ok_to_add) return -1; + if (!verify_path(ce->name)) + return -1; if (!skip_df_check && check_file_directory_conflict(ce, pos, ok_to_replace)) { diff --git a/update-index.c b/update-index.c index 3d7e02db2c..859efc7916 100644 --- a/update-index.c +++ b/update-index.c @@ -237,70 +237,6 @@ static int refresh_cache(int really) return has_errors; } -/* - * We fundamentally don't like some paths: we don't want - * dot or dot-dot anywhere, and for obvious reasons don't - * want to recurse into ".git" either. - * - * Also, we don't want double slashes or slashes at the - * end that can make pathnames ambiguous. - */ -static int verify_dotfile(const char *rest) -{ - /* - * The first character was '.', but that - * has already been discarded, we now test - * the rest. - */ - switch (*rest) { - /* "." is not allowed */ - case '\0': case '/': - return 0; - - /* - * ".git" followed by NUL or slash is bad. This - * shares the path end test with the ".." case. - */ - case 'g': - if (rest[1] != 'i') - break; - if (rest[2] != 't') - break; - rest += 2; - /* fallthrough */ - case '.': - if (rest[1] == '\0' || rest[1] == '/') - return 0; - } - return 1; -} - -static int verify_path(const char *path) -{ - char c; - - goto inside; - for (;;) { - if (!c) - return 1; - if (c == '/') { -inside: - c = *path++; - switch (c) { - default: - continue; - case '/': case '\0': - break; - case '.': - if (verify_dotfile(path)) - continue; - } - return 0; - } - c = *path++; - } -} - static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, const char *path, int stage) { From 3c6a370b0ee78114a656acba04fddf306252b9d5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 May 2006 16:07:51 -0700 Subject: [PATCH 8/9] Move pathspec matching from builtin-add.c into dir.c I'll use it for builtin-rm.c too. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-add.c | 82 +-------------------------------------------------- dir.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ dir.h | 3 ++ 3 files changed, 84 insertions(+), 81 deletions(-) diff --git a/builtin-add.c b/builtin-add.c index 37243f8c36..6166f66bce 100644 --- a/builtin-add.c +++ b/builtin-add.c @@ -12,86 +12,6 @@ static const char builtin_add_usage[] = "git-add [-n] [-v] ..."; -static int common_prefix(const char **pathspec) -{ - const char *path, *slash, *next; - int prefix; - - if (!pathspec) - return 0; - - path = *pathspec; - slash = strrchr(path, '/'); - if (!slash) - return 0; - - prefix = slash - path + 1; - while ((next = *++pathspec) != NULL) { - int len = strlen(next); - if (len >= prefix && !memcmp(path, next, len)) - continue; - for (;;) { - if (!len) - return 0; - if (next[--len] != '/') - continue; - if (memcmp(path, next, len+1)) - continue; - prefix = len + 1; - break; - } - } - return prefix; -} - -static int match_one(const char *match, const char *name, int namelen) -{ - int matchlen; - - /* If the match was just the prefix, we matched */ - matchlen = strlen(match); - if (!matchlen) - return 1; - - /* - * If we don't match the matchstring exactly, - * we need to match by fnmatch - */ - if (strncmp(match, name, matchlen)) - return !fnmatch(match, name, 0); - - /* - * If we did match the string exactly, we still - * need to make sure that it happened on a path - * component boundary (ie either the last character - * of the match was '/', or the next character of - * the name was '/' or the terminating NUL. - */ - return match[matchlen-1] == '/' || - name[matchlen] == '/' || - !name[matchlen]; -} - -static int match(const char **pathspec, const char *name, int namelen, int prefix, char *seen) -{ - int retval; - const char *match; - - name += prefix; - namelen -= prefix; - - for (retval = 0; (match = *pathspec++) != NULL; seen++) { - if (retval & *seen) - continue; - match += prefix; - if (match_one(match, name, namelen)) { - retval = 1; - *seen = 1; - } - } - return retval; -} - static void prune_directory(struct dir_struct *dir, const char **pathspec, int prefix) { char *seen; @@ -107,7 +27,7 @@ static void prune_directory(struct dir_struct *dir, const char **pathspec, int p i = dir->nr; while (--i >= 0) { struct dir_entry *entry = *src++; - if (!match(pathspec, entry->name, entry->len, prefix, seen)) { + if (!match_pathspec(pathspec, entry->name, entry->len, prefix, seen)) { free(entry); continue; } diff --git a/dir.c b/dir.c index d40b62e1c1..d778ecd890 100644 --- a/dir.c +++ b/dir.c @@ -11,6 +11,86 @@ #include "cache.h" #include "dir.h" +int common_prefix(const char **pathspec) +{ + const char *path, *slash, *next; + int prefix; + + if (!pathspec) + return 0; + + path = *pathspec; + slash = strrchr(path, '/'); + if (!slash) + return 0; + + prefix = slash - path + 1; + while ((next = *++pathspec) != NULL) { + int len = strlen(next); + if (len >= prefix && !memcmp(path, next, len)) + continue; + for (;;) { + if (!len) + return 0; + if (next[--len] != '/') + continue; + if (memcmp(path, next, len+1)) + continue; + prefix = len + 1; + break; + } + } + return prefix; +} + +static int match_one(const char *match, const char *name, int namelen) +{ + int matchlen; + + /* If the match was just the prefix, we matched */ + matchlen = strlen(match); + if (!matchlen) + return 1; + + /* + * If we don't match the matchstring exactly, + * we need to match by fnmatch + */ + if (strncmp(match, name, matchlen)) + return !fnmatch(match, name, 0); + + /* + * If we did match the string exactly, we still + * need to make sure that it happened on a path + * component boundary (ie either the last character + * of the match was '/', or the next character of + * the name was '/' or the terminating NUL. + */ + return match[matchlen-1] == '/' || + name[matchlen] == '/' || + !name[matchlen]; +} + +int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen) +{ + int retval; + const char *match; + + name += prefix; + namelen -= prefix; + + for (retval = 0; (match = *pathspec++) != NULL; seen++) { + if (retval & *seen) + continue; + match += prefix; + if (match_one(match, name, namelen)) { + retval = 1; + *seen = 1; + } + } + return retval; +} + void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *which) { diff --git a/dir.h b/dir.h index 4f65f57085..56a1b7fce2 100644 --- a/dir.h +++ b/dir.h @@ -39,6 +39,9 @@ struct dir_struct { struct exclude_list exclude_list[3]; }; +extern int common_prefix(const char **pathspec); +extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen); + extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen); extern int excluded(struct dir_struct *, const char *); extern void add_excludes_from_file(struct dir_struct *, const char *fname); From d9b814cc97f16daac06566a5340121c446136d22 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 May 2006 16:19:34 -0700 Subject: [PATCH 9/9] Add builtin "git rm" command This changes semantics very subtly, because it adds a new atomicity guarantee. In particular, if you "git rm" several files, it will now do all or nothing. The old shell-script really looped over the removed files one by one, and would basically randomly fail in the middle if "-f" was used and one of the files didn't exist in the working directory. This C builtin one will not re-write the index after each remove, but instead remove all files at once. However, that means that if "-f" is used (to also force removal of the file from the working directory), and some files have already been removed from the workspace, it won't stop in the middle in some half-way state like the old one did. So what happens is that if the _first_ file fails to be removed with "-f", we abort the whole "git rm". But once we've started removing, we don't leave anything half done. If some of the other files don't exist, we'll just ignore errors of removal from the working tree. This is only an issue with "-f", of course. I think the new behaviour is strictly an improvement, but perhaps more importantly, it is _different_. As a special case, the semantics are identical for the single-file case (which is the only one our test-suite seems to test). The other question is what to do with leading directories. The old "git rm" script didn't do anything, which is somewhat inconsistent. This one will actually clean up directories that have become empty as a result of removing the last file, but maybe we want to have a flag to decide the behaviour? Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- Makefile | 8 +-- builtin-rm.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++ builtin.h | 1 + git-rm.sh | 70 ------------------------ git.c | 1 + 5 files changed, 157 insertions(+), 73 deletions(-) create mode 100644 builtin-rm.c delete mode 100755 git-rm.sh diff --git a/Makefile b/Makefile index 48e2a9cb22..d4a91135c3 100644 --- a/Makefile +++ b/Makefile @@ -120,7 +120,7 @@ SCRIPT_SH = \ git-merge-one-file.sh git-parse-remote.sh \ git-prune.sh git-pull.sh git-rebase.sh \ git-repack.sh git-request-pull.sh git-reset.sh \ - git-resolve.sh git-revert.sh git-rm.sh git-sh-setup.sh \ + git-resolve.sh git-revert.sh git-sh-setup.sh \ git-tag.sh git-verify-tag.sh \ git-applymbox.sh git-applypatch.sh git-am.sh \ git-merge.sh git-merge-stupid.sh git-merge-octopus.sh \ @@ -170,7 +170,8 @@ PROGRAMS = \ BUILT_INS = git-log$X git-whatchanged$X git-show$X \ git-count-objects$X git-diff$X git-push$X \ - git-grep$X git-add$X + git-grep$X git-add$X git-rm$X git-rev-list$X \ + git-check-ref-format$X # what 'all' will build and 'install' will install, in gitexecdir ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS) @@ -218,7 +219,8 @@ LIB_OBJS = \ BUILTIN_OBJS = \ builtin-log.o builtin-help.o builtin-count.o builtin-diff.o builtin-push.o \ - builtin-grep.o builtin-add.o + builtin-grep.o builtin-add.o builtin-rev-list.o builtin-check-ref-format.o \ + builtin-rm.o GITLIBS = $(LIB_FILE) $(XDIFF_LIB) LIBS = $(GITLIBS) -lz diff --git a/builtin-rm.c b/builtin-rm.c new file mode 100644 index 0000000000..9014c61556 --- /dev/null +++ b/builtin-rm.c @@ -0,0 +1,150 @@ +/* + * "git rm" builtin command + * + * Copyright (C) Linus Torvalds 2006 + */ +#include "cache.h" +#include "builtin.h" +#include "dir.h" + +static const char builtin_rm_usage[] = +"git-rm [-n] [-v] [-f] ..."; + +static struct { + int nr, alloc; + const char **name; +} list; + +static void add_list(const char *name) +{ + if (list.nr >= list.alloc) { + list.alloc = alloc_nr(list.alloc); + list.name = xrealloc(list.name, list.alloc * sizeof(const char *)); + } + list.name[list.nr++] = name; +} + +static int remove_file(const char *name) +{ + int ret; + char *slash; + + ret = unlink(name); + if (!ret && (slash = strrchr(name, '/'))) { + char *n = strdup(name); + do { + n[slash - name] = 0; + name = n; + } while (!rmdir(name) && (slash = strrchr(name, '/'))); + } + return ret; +} + +static struct cache_file cache_file; + +int cmd_rm(int argc, const char **argv, char **envp) +{ + int i, newfd; + int verbose = 0, show_only = 0, force = 0; + const char *prefix = setup_git_directory(); + const char **pathspec; + char *seen; + + git_config(git_default_config); + + newfd = hold_index_file_for_update(&cache_file, get_index_file()); + if (newfd < 0) + die("unable to create new index file"); + + if (read_cache() < 0) + die("index file corrupt"); + + for (i = 1 ; i < argc ; i++) { + const char *arg = argv[i]; + + if (*arg != '-') + break; + if (!strcmp(arg, "--")) { + i++; + break; + } + if (!strcmp(arg, "-n")) { + show_only = 1; + continue; + } + if (!strcmp(arg, "-v")) { + verbose = 1; + continue; + } + if (!strcmp(arg, "-f")) { + force = 1; + continue; + } + die(builtin_rm_usage); + } + pathspec = get_pathspec(prefix, argv + i); + + seen = NULL; + if (pathspec) { + for (i = 0; pathspec[i] ; i++) + /* nothing */; + seen = xmalloc(i); + memset(seen, 0, i); + } + + for (i = 0; i < active_nr; i++) { + struct cache_entry *ce = active_cache[i]; + if (!match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, seen)) + continue; + add_list(ce->name); + } + + if (pathspec) { + const char *match; + for (i = 0; (match = pathspec[i]) != NULL ; i++) { + if (*match && !seen[i]) + die("pathspec '%s' did not match any files", match); + } + } + + /* + * First remove the names from the index: we won't commit + * the index unless all of them succeed + */ + for (i = 0; i < list.nr; i++) { + const char *path = list.name[i]; + printf("rm '%s'\n", path); + + if (remove_file_from_cache(path)) + die("git rm: unable to remove %s", path); + } + + /* + * Then, if we used "-f", remove the filenames from the + * workspace. If we fail to remove the first one, we + * abort the "git rm" (but once we've successfully removed + * any file at all, we'll go ahead and commit to it all: + * by then we've already committed ourself and can't fail + * in the middle) + */ + if (force) { + int removed = 0; + for (i = 0; i < list.nr; i++) { + const char *path = list.name[i]; + if (!remove_file(path)) { + removed = 1; + continue; + } + if (!removed) + die("git rm: %s: %s", path, strerror(errno)); + } + } + + if (active_cache_changed) { + if (write_cache(newfd, active_cache, active_nr) || + commit_index_file(&cache_file)) + die("Unable to write new index file"); + } + + return 0; +} diff --git a/builtin.h b/builtin.h index 1b77f4b0ca..c1cb765dea 100644 --- a/builtin.h +++ b/builtin.h @@ -24,6 +24,7 @@ extern int cmd_count_objects(int argc, const char **argv, char **envp); extern int cmd_push(int argc, const char **argv, char **envp); extern int cmd_grep(int argc, const char **argv, char **envp); +extern int cmd_rm(int argc, const char **argv, char **envp); extern int cmd_add(int argc, const char **argv, char **envp); #endif diff --git a/git-rm.sh b/git-rm.sh deleted file mode 100755 index fda4541c76..0000000000 --- a/git-rm.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/sh - -USAGE='[-f] [-n] [-v] [--] ...' -SUBDIRECTORY_OK='Yes' -. git-sh-setup - -remove_files= -show_only= -verbose= -while : ; do - case "$1" in - -f) - remove_files=true - ;; - -n) - show_only=true - ;; - -v) - verbose=--verbose - ;; - --) - shift; break - ;; - -*) - usage - ;; - *) - break - ;; - esac - shift -done - -# This is typo-proofing. If some paths match and some do not, we want -# to do nothing. -case "$#" in -0) ;; -*) - git-ls-files --error-unmatch -- "$@" >/dev/null || { - echo >&2 "Maybe you misspelled it?" - exit 1 - } - ;; -esac - -if test -f "$GIT_DIR/info/exclude" -then - git-ls-files -z \ - --exclude-from="$GIT_DIR/info/exclude" \ - --exclude-per-directory=.gitignore -- "$@" -else - git-ls-files -z \ - --exclude-per-directory=.gitignore -- "$@" -fi | -case "$show_only,$remove_files" in -true,*) - xargs -0 echo - ;; -*,true) - xargs -0 sh -c " - while [ \$# -gt 0 ]; do - file=\$1; shift - rm -- \"\$file\" && git-update-index --remove $verbose \"\$file\" - done - " inline - ;; -*) - git-update-index --force-remove $verbose -z --stdin - ;; -esac diff --git a/git.c b/git.c index fac46af057..20c0f197a3 100644 --- a/git.c +++ b/git.c @@ -50,6 +50,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "count-objects", cmd_count_objects }, { "diff", cmd_diff }, { "grep", cmd_grep }, + { "rm", cmd_rm }, { "add", cmd_add }, }; int i;