From 844c11ae259bd33b971b9ca389b3f9619427e9a8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2007 21:13:29 -0700 Subject: [PATCH 01/18] diff-lib: use ce_mode_from_stat() rather than messing with modes manually The diff helpers used to do the magic mode canonicalization and all the other special mode handling by hand ("trust executable bit" and "has symlink support" handling). That's bogus. Use "ce_mode_from_stat()" that does this all for us. This is also going to be required when we add support for links to other git repositories. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff-lib.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/diff-lib.c b/diff-lib.c index 5c5b05bfe3..c6d127346a 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -357,7 +357,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed) continue; } else - dpath->mode = canon_mode(st.st_mode); + dpath->mode = ntohl(ce_mode_from_stat(ce, st.st_mode)); while (i < entries) { struct cache_entry *nce = active_cache[i]; @@ -374,8 +374,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed) int mode = ntohl(nce->ce_mode); num_compare_stages++; hashcpy(dpath->parent[stage-2].sha1, nce->sha1); - dpath->parent[stage-2].mode = - canon_mode(mode); + dpath->parent[stage-2].mode = ntohl(ce_mode_from_stat(nce, mode)); dpath->parent[stage-2].status = DIFF_STATUS_MODIFIED; } @@ -424,15 +423,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed) if (!changed && !revs->diffopt.find_copies_harder) continue; oldmode = ntohl(ce->ce_mode); - - newmode = canon_mode(st.st_mode); - if (!trust_executable_bit && - S_ISREG(newmode) && S_ISREG(oldmode) && - ((newmode ^ oldmode) == 0111)) - newmode = oldmode; - else if (!has_symlinks && - S_ISREG(newmode) && S_ISLNK(oldmode)) - newmode = oldmode; + newmode = ntohl(ce_mode_from_stat(ce, st.st_mode)); diff_change(&revs->diffopt, oldmode, newmode, ce->sha1, (changed ? null_sha1 : ce->sha1), ce->name, NULL); From 5d5cea67af386cfd53428f1eb404841eca8e9062 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2007 21:13:58 -0700 Subject: [PATCH 02/18] Avoid overflowing name buffer in deep directory structures This just makes sure that when we do a read_directory(), we check that the filename fits in the buffer we allocated (with a bit of slop) Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dir.c b/dir.c index 7426fde330..4f5a2241e6 100644 --- a/dir.c +++ b/dir.c @@ -353,6 +353,9 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co !strcmp(de->d_name + 1, "git"))) continue; len = strlen(de->d_name); + /* Ignore overly long pathnames! */ + if (len + baselen + 8 > sizeof(fullname)) + continue; memcpy(fullname + baselen, de->d_name, len+1); if (simplify_away(fullname, baselen + len, simplify)) continue; From 0ebde32c87da2efac5985a808e6bd4130831b7a8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2007 21:14:26 -0700 Subject: [PATCH 03/18] Add 'resolve_gitlink_ref()' helper function This new function resolves a ref in *another* git repository. It's named for its intended use: to look up the git link to a subproject. It's not actually wired up to anything yet, but we're getting closer to having fundamental plumbing support for "links" from one git directory to another, which is the basis of subproject support. [jc: amended a FILE* leak] Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- refs.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ refs.h | 3 +++ 2 files changed, 83 insertions(+) diff --git a/refs.c b/refs.c index d2b7b7fb56..11a67a8c86 100644 --- a/refs.c +++ b/refs.c @@ -215,6 +215,86 @@ static struct ref_list *get_loose_refs(void) /* We allow "recursive" symbolic refs. Only within reason, though */ #define MAXDEPTH 5 +#define MAXREFLEN (1024) + +static int resolve_gitlink_packed_ref(char *name, int pathlen, const char *refname, unsigned char *result) +{ + FILE *f; + struct cached_refs refs; + struct ref_list *ref; + int retval; + + strcpy(name + pathlen, "packed-refs"); + f = fopen(name, "r"); + if (!f) + return -1; + read_packed_refs(f, &refs); + fclose(f); + ref = refs.packed; + retval = -1; + while (ref) { + if (!strcmp(ref->name, refname)) { + retval = 0; + memcpy(result, ref->sha1, 20); + break; + } + ref = ref->next; + } + free_ref_list(refs.packed); + return retval; +} + +static int resolve_gitlink_ref_recursive(char *name, int pathlen, const char *refname, unsigned char *result, int recursion) +{ + int fd, len = strlen(refname); + char buffer[128], *p; + + if (recursion > MAXDEPTH || len > MAXREFLEN) + return -1; + memcpy(name + pathlen, refname, len+1); + fd = open(name, O_RDONLY); + if (fd < 0) + return resolve_gitlink_packed_ref(name, pathlen, refname, result); + + len = read(fd, buffer, sizeof(buffer)-1); + close(fd); + if (len < 0) + return -1; + while (len && isspace(buffer[len-1])) + len--; + buffer[len] = 0; + + /* Was it a detached head or an old-fashioned symlink? */ + if (!get_sha1_hex(buffer, result)) + return 0; + + /* Symref? */ + if (strncmp(buffer, "ref:", 4)) + return -1; + p = buffer + 4; + while (isspace(*p)) + p++; + + return resolve_gitlink_ref_recursive(name, pathlen, p, result, recursion+1); +} + +int resolve_gitlink_ref(const char *path, const char *refname, unsigned char *result) +{ + int len = strlen(path), retval; + char *gitdir; + + while (len && path[len-1] == '/') + len--; + if (!len) + return -1; + gitdir = xmalloc(len + MAXREFLEN + 8); + memcpy(gitdir, path, len); + memcpy(gitdir + len, "/.git/", 7); + + retval = resolve_gitlink_ref_recursive(gitdir, len+6, refname, result, 0); + free(gitdir); + return retval; +} const char *resolve_ref(const char *ref, unsigned char *sha1, int reading, int *flag) { diff --git a/refs.h b/refs.h index acedffc0e4..f61f6d934e 100644 --- a/refs.h +++ b/refs.h @@ -60,4 +60,7 @@ extern int check_ref_format(const char *target); /** rename ref, return 0 on success **/ extern int rename_ref(const char *oldref, const char *newref, const char *logmsg); +/** resolve ref in nested "gitlink" repository */ +extern int resolve_gitlink_ref(const char *name, const char *refname, unsigned char *result); + #endif /* REFS_H */ From 9eec4795d44439cd170fb52c73827c728252648d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2007 21:14:58 -0700 Subject: [PATCH 04/18] Add "S_IFDIRLNK" file mode infrastructure for git links This just adds the basic helper functions to recognize and work with git tree entries that are links to other git repositories ("subprojects"). They still aren't actually connected up to any of the code-paths, but now all the infrastructure is in place. The next commit will start actually adding actual subproject support. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- cache.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/cache.h b/cache.h index eb57507b80..1b3d00ee11 100644 --- a/cache.h +++ b/cache.h @@ -24,6 +24,22 @@ #define DTYPE(de) DT_UNKNOWN #endif +/* + * A "directory link" is a link to another git directory. + * + * The value 0160000 is not normally a valid mode, and + * also just happens to be S_IFDIR + S_IFLNK + * + * NOTE! We *really* shouldn't depend on the S_IFxxx macros + * always having the same values everywhere. We should use + * our internal git values for these things, and then we can + * translate that to the OS-specific value. It just so + * happens that everybody shares the same bit representation + * in the UNIX world (and apparently wider too..) + */ +#define S_IFDIRLNK 0160000 +#define S_ISDIRLNK(m) (((m) & S_IFMT) == S_IFDIRLNK) + /* * Intensive research over the course of many years has shown that * port 9418 is totally unused by anything else. Or @@ -104,6 +120,8 @@ static inline unsigned int create_ce_mode(unsigned int mode) { if (S_ISLNK(mode)) return htonl(S_IFLNK); + if (S_ISDIR(mode) || S_ISDIRLNK(mode)) + return htonl(S_IFDIRLNK); return htonl(S_IFREG | ce_permissions(mode)); } static inline unsigned int ce_mode_from_stat(struct cache_entry *ce, unsigned int mode) @@ -121,7 +139,7 @@ static inline unsigned int ce_mode_from_stat(struct cache_entry *ce, unsigned in } #define canon_mode(mode) \ (S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \ - S_ISLNK(mode) ? S_IFLNK : S_IFDIR) + S_ISLNK(mode) ? S_IFLNK : S_ISDIR(mode) ? S_IFDIR : S_IFDIRLNK) #define cache_entry_size(len) ((offsetof(struct cache_entry,name) + (len) + 8) & ~7) From 8d9721c86b0169c282ad1c5528317eafeb7fb0f7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2007 21:15:29 -0700 Subject: [PATCH 05/18] Teach "fsck" not to follow subproject links Since the subprojects don't necessarily even exist in the current tree, much less in the current git repository (they are totally independent repositories), we do not want to try to follow the chain from one git repository to another through a gitlink. This involves teaching fsck to ignore references to gitlink objects from a tree and from the current index. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-fsck.c | 9 ++++++++- tree.c | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/builtin-fsck.c b/builtin-fsck.c index 4d8b66c344..f22de8deaa 100644 --- a/builtin-fsck.c +++ b/builtin-fsck.c @@ -253,6 +253,7 @@ static int fsck_tree(struct tree *item) case S_IFREG | 0644: case S_IFLNK: case S_IFDIR: + case S_IFDIRLNK: break; /* * This is nonstandard, but we had a few of these @@ -695,8 +696,14 @@ int cmd_fsck(int argc, char **argv, const char *prefix) int i; read_cache(); for (i = 0; i < active_nr; i++) { - struct blob *blob = lookup_blob(active_cache[i]->sha1); + unsigned int mode; + struct blob *blob; struct object *obj; + + mode = ntohl(active_cache[i]->ce_mode); + if (S_ISDIRLNK(mode)) + continue; + blob = lookup_blob(active_cache[i]->sha1); if (!blob) continue; obj = &blob->object; diff --git a/tree.c b/tree.c index d188c0fbae..dbb63fc525 100644 --- a/tree.c +++ b/tree.c @@ -143,6 +143,14 @@ struct tree *lookup_tree(const unsigned char *sha1) return (struct tree *) obj; } +/* + * NOTE! Tree refs to external git repositories + * (ie gitlinks) do not count as real references. + * + * You don't have to have those repositories + * available at all, much less have the objects + * accessible from the current repository. + */ static void track_tree_refs(struct tree *item) { int n_refs = 0, i; @@ -152,8 +160,11 @@ static void track_tree_refs(struct tree *item) /* Count how many entries there are.. */ init_tree_desc(&desc, item->buffer, item->size); - while (tree_entry(&desc, &entry)) + while (tree_entry(&desc, &entry)) { + if (S_ISDIRLNK(entry.mode)) + continue; n_refs++; + } /* Allocate object refs and walk it again.. */ i = 0; @@ -162,6 +173,8 @@ static void track_tree_refs(struct tree *item) while (tree_entry(&desc, &entry)) { struct object *obj; + if (S_ISDIRLNK(entry.mode)) + continue; if (S_ISDIR(entry.mode)) obj = &lookup_tree(entry.sha1)->object; else From f35a6d3bce79c2995bbf0a3bd9fcad29e54a8d3c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2007 21:20:29 -0700 Subject: [PATCH 06/18] Teach core object handling functions about gitlinks This teaches the really fundamental core SHA1 object handling routines about gitlinks. We can compare trees with gitlinks in them (although we can not actually generate patches for them yet - just raw git diffs), and they show up as commits in "git ls-tree". We also know to compare gitlinks as if they were directories (ie the normal "sort as trees" rules apply). [jc: amended a cut&paste error] Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-ls-tree.c | 20 +++++++++++++++++++- cache-tree.c | 2 +- read-cache.c | 35 +++++++++++++++++++++++++++++++---- sha1_file.c | 3 +++ 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/builtin-ls-tree.c b/builtin-ls-tree.c index 6472610ac2..1cb4dca277 100644 --- a/builtin-ls-tree.c +++ b/builtin-ls-tree.c @@ -6,6 +6,7 @@ #include "cache.h" #include "blob.h" #include "tree.h" +#include "commit.h" #include "quote.h" #include "builtin.h" @@ -59,7 +60,24 @@ static int show_tree(const unsigned char *sha1, const char *base, int baselen, int retval = 0; const char *type = blob_type; - if (S_ISDIR(mode)) { + if (S_ISDIRLNK(mode)) { + /* + * Maybe we want to have some recursive version here? + * + * Something like: + * + if (show_subprojects(base, baselen, pathname)) { + if (fork()) { + chdir(base); + exec ls-tree; + } + waitpid(); + } + * + * ..or similar.. + */ + type = commit_type; + } else if (S_ISDIR(mode)) { if (show_recursive(base, baselen, pathname)) { retval = READ_TREE_RECURSIVE; if (!(ls_options & LS_SHOW_TREES)) diff --git a/cache-tree.c b/cache-tree.c index 9b73c8669a..6369cc7c53 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -326,7 +326,7 @@ static int update_one(struct cache_tree *it, mode = ntohl(ce->ce_mode); entlen = pathlen - baselen; } - if (!missing_ok && !has_sha1_file(sha1)) + if (mode != S_IFDIRLNK && !missing_ok && !has_sha1_file(sha1)) return error("invalid object %s", sha1_to_hex(sha1)); if (!ce->ce_mode) diff --git a/read-cache.c b/read-cache.c index 54573ce2ee..f458f50458 100644 --- a/read-cache.c +++ b/read-cache.c @@ -5,6 +5,7 @@ */ #include "cache.h" #include "cache-tree.h" +#include "refs.h" /* Index extensions. * @@ -91,6 +92,23 @@ static int ce_compare_link(struct cache_entry *ce, size_t expected_size) return match; } +static int ce_compare_gitlink(struct cache_entry *ce) +{ + unsigned char sha1[20]; + + /* + * We don't actually require that the .git directory + * under DIRLNK directory be a valid git directory. It + * might even be missing (in case nobody populated that + * sub-project). + * + * If so, we consider it always to match. + */ + if (resolve_gitlink_ref(ce->name, "HEAD", sha1) < 0) + return 0; + return hashcmp(sha1, ce->sha1); +} + static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st) { switch (st->st_mode & S_IFMT) { @@ -102,6 +120,9 @@ static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st) if (ce_compare_link(ce, xsize_t(st->st_size))) return DATA_CHANGED; break; + case S_IFDIRLNK: + /* No need to do anything, we did the exact compare in "match_stat_basic" */ + break; default: return TYPE_CHANGED; } @@ -127,6 +148,12 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st) (has_symlinks || !S_ISREG(st->st_mode))) changed |= TYPE_CHANGED; break; + case S_IFDIRLNK: + if (!S_ISDIR(st->st_mode)) + changed |= TYPE_CHANGED; + else if (ce_compare_gitlink(ce)) + changed |= DATA_CHANGED; + break; default: die("internal error: ce_mode is %o", ntohl(ce->ce_mode)); } @@ -250,9 +277,9 @@ int base_name_compare(const char *name1, int len1, int mode1, return cmp; c1 = name1[len]; c2 = name2[len]; - if (!c1 && S_ISDIR(mode1)) + if (!c1 && (S_ISDIR(mode1) || S_ISDIRLNK(mode1))) c1 = '/'; - if (!c2 && S_ISDIR(mode2)) + if (!c2 && (S_ISDIR(mode2) || S_ISDIRLNK(mode2))) c2 = '/'; return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0; } @@ -334,8 +361,8 @@ int add_file_to_cache(const char *path, int verbose) if (lstat(path, &st)) die("%s: unable to stat (%s)", path, strerror(errno)); - if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) - die("%s: can only add regular files or symbolic links", path); + if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode) && !S_ISDIR(st.st_mode)) + die("%s: can only add regular files, symbolic links or git-directories", path); namelen = strlen(path); size = cache_entry_size(namelen); diff --git a/sha1_file.c b/sha1_file.c index 4304fe9bbc..ab915faa6b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -13,6 +13,7 @@ #include "commit.h" #include "tag.h" #include "tree.h" +#include "refs.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -2332,6 +2333,8 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write path); free(target); break; + case S_IFDIR: + return resolve_gitlink_ref(path, "HEAD", sha1); default: return error("%s: unsupported file type", path); } From 1833a925484675b328d5df04ffca62efa7a0a012 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 11 Apr 2007 14:39:12 -0700 Subject: [PATCH 07/18] Fix thinko in subproject entry sorting This fixes a total thinko in my original series: subprojects do *not* sort like directories, because the index is sorted purely by full pathname, and since a subproject shows up in the index as a normal NUL-terminated string, it never has the issues with sorting with the '/' at the end. So if you have a subproject "proj" and a file "proj.c", the subproject sorts alphabetically before the file in the index (and must thus also sort that way in a tree object, since trees sort as the index). In contrast, it you have two files "proj/file" and "proj.c", the "proj.c" will sort alphabetically before "proj/file" in the index. The index itself, of course, does not actually contain an entry "proj/", but in the *tree* that gets written out, the tree entry "proj" will sort after the file entry "proj.c", which is the only real magic sorting rule. In other words: the magic sorting rule only affects tree entries, and *only* affects tree entries that point to other trees (ie are of the type S_IFDIR). Anyway, that thinko just means that we should remove the special case to make S_ISDIRLNK entries sort like S_ISDIR entries. They don't. They sort like normal files. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- read-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/read-cache.c b/read-cache.c index f458f50458..b8b6d11dba 100644 --- a/read-cache.c +++ b/read-cache.c @@ -277,9 +277,9 @@ int base_name_compare(const char *name1, int len1, int mode1, return cmp; c1 = name1[len]; c2 = name2[len]; - if (!c1 && (S_ISDIR(mode1) || S_ISDIRLNK(mode1))) + if (!c1 && S_ISDIR(mode1)) c1 = '/'; - if (!c2 && (S_ISDIR(mode2) || S_ISDIRLNK(mode2))) + if (!c2 && S_ISDIR(mode2)) c2 = '/'; return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0; } From 095952585c2a955f45deac69df17a702d7584c80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 11 Apr 2007 14:49:44 -0700 Subject: [PATCH 08/18] Teach directory traversal about subprojects This is the promised cleaned-up version of teaching directory traversal (ie the "read_directory()" logic) about subprojects. That makes "git add" understand to add/update subprojects. It now knows to look at the index file to see if a directory is marked as a subproject, and use that as information as whether it should be recursed into or not. It also generally cleans up the handling of directory entries when traversing the working tree, by splitting up the decision-making process into small functions of their own, and adding a fair number of comments. Finally, it teaches "add_file_to_cache()" that directory names can have slashes at the end, since the directory traversal adds them to make the difference between a file and a directory clear (it always did that, but my previous too-ugly-to-apply subproject patch had a totally different path for subproject directories and avoided the slash for that case). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 133 ++++++++++++++++++++++++++++++++++++++++++++------- dir.h | 3 +- read-cache.c | 4 ++ 3 files changed, 121 insertions(+), 19 deletions(-) diff --git a/dir.c b/dir.c index 4f5a2241e6..7b91501255 100644 --- a/dir.c +++ b/dir.c @@ -7,12 +7,17 @@ */ #include "cache.h" #include "dir.h" +#include "refs.h" struct path_simplify { int len; const char *path; }; +static int read_directory_recursive(struct dir_struct *dir, + const char *path, const char *base, int baselen, + int check_only, const struct path_simplify *simplify); + int common_prefix(const char **pathspec) { const char *path, *slash, *next; @@ -286,15 +291,109 @@ struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int return ent; } -static int dir_exists(const char *dirname, int len) +enum exist_status { + index_nonexistent = 0, + index_directory, + index_gitdir, +}; + +/* + * The index sorts alphabetically by entry name, which + * means that a gitlink sorts as '\0' at the end, while + * a directory (which is defined not as an entry, but as + * the files it contains) will sort with the '/' at the + * end. + */ +static enum exist_status directory_exists_in_index(const char *dirname, int len) { int pos = cache_name_pos(dirname, len); - if (pos >= 0) - return 1; - pos = -pos-1; - if (pos >= active_nr) /* can't */ - return 0; - return !strncmp(active_cache[pos]->name, dirname, len); + if (pos < 0) + pos = -pos-1; + while (pos < active_nr) { + struct cache_entry *ce = active_cache[pos++]; + unsigned char endchar; + + if (strncmp(ce->name, dirname, len)) + break; + endchar = ce->name[len]; + if (endchar > '/') + break; + if (endchar == '/') + return index_directory; + if (!endchar && S_ISDIRLNK(ntohl(ce->ce_mode))) + return index_gitdir; + } + return index_nonexistent; +} + +/* + * When we find a directory when traversing the filesystem, we + * have three distinct cases: + * + * - ignore it + * - see it as a directory + * - recurse into it + * + * and which one we choose depends on a combination of existing + * git index contents and the flags passed into the directory + * traversal routine. + * + * Case 1: If we *already* have entries in the index under that + * directory name, we always recurse into the directory to see + * all the files. + * + * Case 2: If we *already* have that directory name as a gitlink, + * we always continue to see it as a gitlink, regardless of whether + * there is an actual git directory there or not (it might not + * be checked out as a subproject!) + * + * Case 3: if we didn't have it in the index previously, we + * have a few sub-cases: + * + * (a) if "show_other_directories" is true, we show it as + * just a directory, unless "hide_empty_directories" is + * also true and the directory is empty, in which case + * we just ignore it entirely. + * (b) if it looks like a git directory, and we don't have + * 'no_dirlinks' set we treat it as a gitlink, and show it + * as a directory. + * (c) otherwise, we recurse into it. + */ +enum directory_treatment { + show_directory, + ignore_directory, + recurse_into_directory, +}; + +static enum directory_treatment treat_directory(struct dir_struct *dir, + const char *dirname, int len, + const struct path_simplify *simplify) +{ + /* The "len-1" is to strip the final '/' */ + switch (directory_exists_in_index(dirname, len-1)) { + case index_directory: + return recurse_into_directory; + + case index_gitdir: + return show_directory; + + case index_nonexistent: + if (dir->show_other_directories) + break; + if (!dir->no_dirlinks) { + unsigned char sha1[20]; + if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0) + return show_directory; + } + return recurse_into_directory; + } + + /* This is the "show_other_directories" case */ + if (!dir->hide_empty_directories) + return show_directory; + if (!read_directory_recursive(dir, dirname, dirname, len, 1, simplify)) + return ignore_directory; + return show_directory; } /* @@ -380,19 +479,17 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co case DT_DIR: memcpy(fullname + baselen + len, "/", 2); len++; - if (dir->show_other_directories && - !dir_exists(fullname, baselen + len)) { - if (dir->hide_empty_directories && - !read_directory_recursive(dir, - fullname, fullname, - baselen + len, 1, simplify)) - continue; + switch (treat_directory(dir, fullname, baselen + len, simplify)) { + case show_directory: break; + case recurse_into_directory: + contents += read_directory_recursive(dir, + fullname, fullname, baselen + len, 0, simplify); + continue; + case ignore_directory: + continue; } - - contents += read_directory_recursive(dir, - fullname, fullname, baselen + len, 0, simplify); - continue; + break; case DT_REG: case DT_LNK: break; diff --git a/dir.h b/dir.h index 33c31f25fb..817c674da1 100644 --- a/dir.h +++ b/dir.h @@ -33,7 +33,8 @@ struct dir_struct { int nr, alloc; unsigned int show_ignored:1, show_other_directories:1, - hide_empty_directories:1; + hide_empty_directories:1, + no_dirlinks:1; struct dir_entry **entries; /* Exclude info */ diff --git a/read-cache.c b/read-cache.c index b8b6d11dba..e4c628f927 100644 --- a/read-cache.c +++ b/read-cache.c @@ -365,6 +365,10 @@ int add_file_to_cache(const char *path, int verbose) die("%s: can only add regular files, symbolic links or git-directories", path); namelen = strlen(path); + if (S_ISDIR(st.st_mode)) { + while (namelen && path[namelen-1] == '/') + namelen--; + } size = cache_entry_size(namelen); ce = xcalloc(1, size); memcpy(ce->name, path, namelen); From e011054b0fdcd1e29d85cdde7ffa5d5c125bd753 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 12 Apr 2007 12:29:40 -0700 Subject: [PATCH 09/18] Teach git-update-index about gitlinks I finally got around to looking at Alex' patch to teach update-index about gitlinks too, so that "git commit -a" along with any other explicit update-index scripts can work. I don't think there was anything wrong with Alex' patch, but the code he patched I felt was just so ugly that the added cases just pushed it over the edge. Especially as I don't think that patch necessarily did the right thing for a gitlink entry that already existed in the index, but that wasn't actually a real git repository in the working tree (just an empty subdirectory or a non-git snapshot because it hadn't wanted to track that particular subproject). So I ended up deciding to clean up the git-update-index handling the same way I tackled the directory traversal used by git-add earlier: by splitting the different cases up into multiple smaller functions, and just making the code easier to read (and adding more comments about the different cases). So this replaces the old "process_file()" with a new "process_path()" function that then just calls out to different helper functions depending on what kind of path it is. Processing a nondirectory ends up being just one of the simpler cases. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-update-index.c | 200 ++++++++++++++++++++++++++++------------- 1 file changed, 138 insertions(+), 62 deletions(-) diff --git a/builtin-update-index.c b/builtin-update-index.c index 47d42ed645..c4eaa94998 100644 --- a/builtin-update-index.c +++ b/builtin-update-index.c @@ -9,6 +9,7 @@ #include "cache-tree.h" #include "tree-walk.h" #include "builtin.h" +#include "refs.h" /* * Default to not allowing changes to the list of files. The @@ -60,76 +61,151 @@ static int mark_valid(const char *path) return -1; } -static int process_file(const char *path) +static int remove_one_path(const char *path) { - int size, namelen, option, status; - struct cache_entry *ce; - struct stat st; + if (!allow_remove) + return error("%s: does not exist and --remove not passed", path); + if (remove_file_from_cache(path)) + return error("%s: cannot remove from the index", path); + return 0; +} - status = lstat(path, &st); +/* + * Handle a path that couldn't be lstat'ed. It's either: + * - missing file (ENOENT or ENOTDIR). That's ok if we're + * supposed to be removing it and the removal actually + * succeeds. + * - permission error. That's never ok. + */ +static int process_lstat_error(const char *path, int err) +{ + if (err == ENOENT || err == ENOTDIR) + return remove_one_path(path); + return error("lstat(\"%s\"): %s", path, strerror(errno)); +} + +static int add_one_path(struct cache_entry *old, const char *path, int len, struct stat *st) +{ + int option, size = cache_entry_size(len); + struct cache_entry *ce = xcalloc(1, size); + + memcpy(ce->name, path, len); + ce->ce_flags = htons(len); + fill_stat_cache_info(ce, st); + ce->ce_mode = ce_mode_from_stat(old, st->st_mode); + + if (index_path(ce->sha1, path, st, !info_only)) + return -1; + option = allow_add ? ADD_CACHE_OK_TO_ADD : 0; + option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0; + if (add_cache_entry(ce, option)) + return error("%s: cannot add to the index - missing --add option?", path); + return 0; +} + +/* + * Handle a path that was a directory. Four cases: + * + * - it's already a gitlink in the index, and we keep it that + * way, and update it if we can (if we cannot find the HEAD, + * we're going to keep it unchanged in the index!) + * + * - it's a *file* in the index, in which case it should be + * removed as a file if removal is allowed, since it doesn't + * exist as such any more. If removal isn't allowed, it's + * an error. + * + * (NOTE! This is old and arguably fairly strange behaviour. + * We might want to make this an error unconditionally, and + * use "--force-remove" if you actually want to force removal). + * + * - it used to exist as a subdirectory (ie multiple files with + * this particular prefix) in the index, in which case it's wrong + * to try to update it as a directory. + * + * - it doesn't exist at all in the index, but it is a valid + * git directory, and it should be *added* as a gitlink. + */ +static int process_directory(const char *path, int len, struct stat *st) +{ + unsigned char sha1[20]; + int pos = cache_name_pos(path, len); + + /* Exact match: file or existing gitlink */ + if (pos >= 0) { + struct cache_entry *ce = active_cache[pos]; + if (S_ISDIRLNK(ntohl(ce->ce_mode))) { + + /* Do nothing to the index if there is no HEAD! */ + if (resolve_gitlink_ref(path, "HEAD", sha1) < 0) + return 0; + + return add_one_path(ce, path, len, st); + } + /* Should this be an unconditional error? */ + return remove_one_path(path); + } + + /* Inexact match: is there perhaps a subdirectory match? */ + pos = -pos-1; + while (pos < active_nr) { + struct cache_entry *ce = active_cache[pos++]; + + if (strncmp(ce->name, path, len)) + break; + if (ce->name[len] > '/') + break; + if (ce->name[len] < '/') + continue; + + /* Subdirectory match - error out */ + return error("%s: is a directory - add individual files instead", path); + } + + /* No match - should we add it as a gitlink? */ + if (!resolve_gitlink_ref(path, "HEAD", sha1)) + return add_one_path(NULL, path, len, st); + + /* Error out. */ + return error("%s: is a directory - add files inside instead", path); +} + +/* + * Process a regular file + */ +static int process_file(const char *path, int len, struct stat *st) +{ + int pos = cache_name_pos(path, len); + struct cache_entry *ce = pos < 0 ? NULL : active_cache[pos]; + + if (ce && S_ISDIRLNK(ntohl(ce->ce_mode))) + return error("%s is already a gitlink, not replacing", path); + + return add_one_path(ce, path, len, st); +} + +static int process_path(const char *path) +{ + int len; + struct stat st; /* We probably want to do this in remove_file_from_cache() and * add_cache_entry() instead... */ cache_tree_invalidate_path(active_cache_tree, path); - if (status < 0 || S_ISDIR(st.st_mode)) { - /* When we used to have "path" and now we want to add - * "path/file", we need a way to remove "path" before - * being able to add "path/file". However, - * "git-update-index --remove path" would not work. - * --force-remove can be used but this is more user - * friendly, especially since we can do the opposite - * case just fine without --force-remove. - */ - if (status == 0 || (errno == ENOENT || errno == ENOTDIR)) { - if (allow_remove) { - if (remove_file_from_cache(path)) - return error("%s: cannot remove from the index", - path); - else - return 0; - } else if (status < 0) { - return error("%s: does not exist and --remove not passed", - path); - } - } - if (0 == status) - return error("%s: is a directory - add files inside instead", - path); - else - return error("lstat(\"%s\"): %s", path, - strerror(errno)); - } + /* + * First things first: get the stat information, to decide + * what to do about the pathname! + */ + if (lstat(path, &st) < 0) + return process_lstat_error(path, errno); - namelen = strlen(path); - size = cache_entry_size(namelen); - ce = xcalloc(1, size); - memcpy(ce->name, path, namelen); - ce->ce_flags = htons(namelen); - fill_stat_cache_info(ce, &st); + len = strlen(path); + if (S_ISDIR(st.st_mode)) + return process_directory(path, len, &st); - if (trust_executable_bit && has_symlinks) - ce->ce_mode = create_ce_mode(st.st_mode); - else { - /* If there is an existing entry, pick the mode bits and type - * from it, otherwise assume unexecutable regular file. - */ - struct cache_entry *ent; - int pos = cache_name_pos(path, namelen); - - ent = (0 <= pos) ? active_cache[pos] : NULL; - ce->ce_mode = ce_mode_from_stat(ent, st.st_mode); - } - - if (index_path(ce->sha1, path, &st, !info_only)) - return -1; - option = allow_add ? ADD_CACHE_OK_TO_ADD : 0; - option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0; - if (add_cache_entry(ce, option)) - return error("%s: cannot add to the index - missing --add option?", - path); - return 0; + return process_file(path, len, &st); } static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, @@ -210,8 +286,8 @@ static void update_one(const char *path, const char *prefix, int prefix_length) report("remove '%s'", path); goto free_return; } - if (process_file(p)) - die("Unable to process file %s", path); + if (process_path(p)) + die("Unable to process path %s", path); report("add '%s'", path); free_return: if (p < path || p > path + strlen(path)) From ab22aed3b7517c6390cb622b368bfcf503b7a37a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 12 Apr 2007 14:32:21 -0700 Subject: [PATCH 10/18] Don't show gitlink directories when we want "other" files When "show_other_directories" is set, that implies that we are looking for untracked files, which obviously means that we should ignore directories that are marked as gitlinks in the index. This fixes "git status" in a superproject, that would otherwise always report that subprojects were "Untracked files:" Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dir.c b/dir.c index 7b91501255..6564a929ff 100644 --- a/dir.c +++ b/dir.c @@ -375,6 +375,8 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, return recurse_into_directory; case index_gitdir: + if (dir->show_other_directories) + return ignore_directory; return show_directory; case index_nonexistent: From ea376fa7f2c209593bcc63008a2831fe013d4e39 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 12 Apr 2007 21:03:39 -0700 Subject: [PATCH 11/18] Teach git list-objects logic not to follow gitlinks This allows us to pack superprojects and thus clone them (but not yet check them out on the receiving side - that's the next patch) Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- list-objects.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/list-objects.c b/list-objects.c index 2ba2c958e0..310f8d3908 100644 --- a/list-objects.c +++ b/list-objects.c @@ -25,6 +25,37 @@ static void process_blob(struct rev_info *revs, add_object(obj, p, path, name); } +/* + * Processing a gitlink entry currently does nothing, since + * we do not recurse into the subproject. + * + * We *could* eventually add a flag that actually does that, + * which would involve: + * - is the subproject actually checked out? + * - if so, see if the subproject has already been added + * to the alternates list, and add it if not. + * - process the commit (or tag) the gitlink points to + * recursively. + * + * However, it's unclear whether there is really ever any + * reason to see superprojects and subprojects as such a + * "unified" object pool (potentially resulting in a totally + * humongous pack - avoiding which was the whole point of + * having gitlinks in the first place!). + * + * So for now, there is just a note that we *could* follow + * the link, and how to do it. Whether it necessarily makes + * any sense what-so-ever to ever do that is another issue. + */ +static void process_gitlink(struct rev_info *revs, + const unsigned char *sha1, + struct object_array *p, + struct name_path *path, + const char *name) +{ + /* Nothing to do */ +} + static void process_tree(struct rev_info *revs, struct tree *tree, struct object_array *p, @@ -56,6 +87,9 @@ static void process_tree(struct rev_info *revs, process_tree(revs, lookup_tree(entry.sha1), p, &me, entry.path); + else if (S_ISDIRLNK(entry.mode)) + process_gitlink(revs, entry.sha1, + p, &me, entry.path); else process_blob(revs, lookup_blob(entry.sha1), From 9129e056fb021df45d98c9472b6029456941a508 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 12 Apr 2007 21:08:52 -0700 Subject: [PATCH 12/18] Teach "git-read-tree -u" to check out submodules as a directory This actually allows us to check out a supermodule after cloning, although the submodules will obviously not be checked out, and will just be an empty subdirectory. [ Side note: this also shows that we currently don't correctly handle such subprojects that aren't checked out correctly yet. They should always show up as not being modified, but failing to resolve the gitlink HEAD does not properly trigger the "not modified" logic in all places it needs to.. So more work to be done, but that's a separate issue, unrelated to the action of checking out the superproject. ] The bulk of this patch is simply because we need to check the type of the index entry *before* we try to read the object it points to, and that meant that the code needed some re-organization. So I moved some of the code in common to both symlinks and files to be a trivial helper function. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- entry.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/entry.c b/entry.c index d72f811580..9545e895d0 100644 --- a/entry.c +++ b/entry.c @@ -62,26 +62,33 @@ static int create_file(const char *path, unsigned int mode) return open(path, O_WRONLY | O_CREAT | O_EXCL, mode); } +static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned long *size) +{ + enum object_type type; + void *new = read_sha1_file(ce->sha1, &type, size); + + if (new) { + if (type == OBJ_BLOB) + return new; + free(new); + } + return NULL; +} + static int write_entry(struct cache_entry *ce, char *path, struct checkout *state, int to_tempfile) { int fd; - void *new; - unsigned long size; long wrote; - enum object_type type; - new = read_sha1_file(ce->sha1, &type, &size); - if (!new || type != OBJ_BLOB) { - if (new) - free(new); - return error("git-checkout-index: unable to read sha1 file of %s (%s)", - path, sha1_to_hex(ce->sha1)); - } switch (ntohl(ce->ce_mode) & S_IFMT) { - char *buf; - unsigned long nsize; + char *buf, *new; + unsigned long size, nsize; case S_IFREG: + new = read_blob_entry(ce, path, &size); + if (!new) + return error("git-checkout-index: unable to read sha1 file of %s (%s)", + path, sha1_to_hex(ce->sha1)); if (to_tempfile) { strcpy(path, ".merge_file_XXXXXX"); fd = mkstemp(path); @@ -111,6 +118,10 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat return error("git-checkout-index: unable to write file %s", path); break; case S_IFLNK: + new = read_blob_entry(ce, path, &size); + if (!new) + return error("git-checkout-index: unable to read sha1 file of %s (%s)", + path, sha1_to_hex(ce->sha1)); if (to_tempfile || !has_symlinks) { if (to_tempfile) { strcpy(path, ".merge_link_XXXXXX"); @@ -136,8 +147,13 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat "symlink %s (%s)", path, strerror(errno)); } break; + case S_IFDIRLNK: + if (to_tempfile) + return error("git-checkout-index: cannot create temporary subproject %s", path); + if (mkdir(path, 0777) < 0) + return error("git-checkout-index: cannot create subproject directory %s", path); + break; default: - free(new); return error("git-checkout-index: unknown file mode for %s", path); } From a8ee75bc7a2ddbb10e0a4303b21bb5c300f98cc2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 13 Apr 2007 09:24:13 -0700 Subject: [PATCH 13/18] Fix gitlink index entry filesystem matching The code to match up index entries with the filesystem was stupidly broken. We shouldn't compare the filesystem stat() information with S_IFDIRLNK, since that's purely a git-internal value, and not what the filesystem uses (on the filesystem, it's just a regular directory). Also, don't bother to make the stat() time comparisons etc for DIRLNK entries in ce_match_stat_basic(), since we do an exact match for these things, and the hints in the stat data simply doesn't matter. This fixes "git status" with submodules that haven't been checked out in the supermodule. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- read-cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/read-cache.c b/read-cache.c index e4c628f927..d2f332a622 100644 --- a/read-cache.c +++ b/read-cache.c @@ -120,9 +120,9 @@ static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st) if (ce_compare_link(ce, xsize_t(st->st_size))) return DATA_CHANGED; break; - case S_IFDIRLNK: - /* No need to do anything, we did the exact compare in "match_stat_basic" */ - break; + case S_IFDIR: + if (S_ISDIRLNK(ntohl(ce->ce_mode))) + return 0; default: return TYPE_CHANGED; } @@ -153,7 +153,7 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st) changed |= TYPE_CHANGED; else if (ce_compare_gitlink(ce)) changed |= DATA_CHANGED; - break; + return changed; default: die("internal error: ce_mode is %o", ntohl(ce->ce_mode)); } From 6e2f441bd42c55c73b5e7ac1fdc2aded07901cb3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 13 Apr 2007 09:25:01 -0700 Subject: [PATCH 14/18] Teach git list-objects logic to not follow gitlinks This allows us to pack superprojects and thus clone them (but not yet check them out on the receiving side.. That's the next patch) Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- list-objects.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/list-objects.c b/list-objects.c index 2ba2c958e0..310f8d3908 100644 --- a/list-objects.c +++ b/list-objects.c @@ -25,6 +25,37 @@ static void process_blob(struct rev_info *revs, add_object(obj, p, path, name); } +/* + * Processing a gitlink entry currently does nothing, since + * we do not recurse into the subproject. + * + * We *could* eventually add a flag that actually does that, + * which would involve: + * - is the subproject actually checked out? + * - if so, see if the subproject has already been added + * to the alternates list, and add it if not. + * - process the commit (or tag) the gitlink points to + * recursively. + * + * However, it's unclear whether there is really ever any + * reason to see superprojects and subprojects as such a + * "unified" object pool (potentially resulting in a totally + * humongous pack - avoiding which was the whole point of + * having gitlinks in the first place!). + * + * So for now, there is just a note that we *could* follow + * the link, and how to do it. Whether it necessarily makes + * any sense what-so-ever to ever do that is another issue. + */ +static void process_gitlink(struct rev_info *revs, + const unsigned char *sha1, + struct object_array *p, + struct name_path *path, + const char *name) +{ + /* Nothing to do */ +} + static void process_tree(struct rev_info *revs, struct tree *tree, struct object_array *p, @@ -56,6 +87,9 @@ static void process_tree(struct rev_info *revs, process_tree(revs, lookup_tree(entry.sha1), p, &me, entry.path); + else if (S_ISDIRLNK(entry.mode)) + process_gitlink(revs, entry.sha1, + p, &me, entry.path); else process_blob(revs, lookup_blob(entry.sha1), From f0807e62b42df51a079c730dcf4868de9ad44ea5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 13 Apr 2007 09:26:04 -0700 Subject: [PATCH 15/18] Teach "git-read-tree -u" to check out submodules as a directory This actually allows us to check out a supermodule after cloning, although the submodules themselves will obviously not be checked out, and will just be empty directories. Checking out the submodules will be up to higher levels - we may not even want to! Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- entry.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/entry.c b/entry.c index d72f811580..50ffae40c7 100644 --- a/entry.c +++ b/entry.c @@ -62,26 +62,33 @@ static int create_file(const char *path, unsigned int mode) return open(path, O_WRONLY | O_CREAT | O_EXCL, mode); } +static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned long *size) +{ + enum object_type type; + void *new = read_sha1_file(ce->sha1, &type, size); + + if (new) { + if (type == OBJ_BLOB) + return new; + free(new); + } + return NULL; +} + static int write_entry(struct cache_entry *ce, char *path, struct checkout *state, int to_tempfile) { int fd; - void *new; - unsigned long size; long wrote; - enum object_type type; - new = read_sha1_file(ce->sha1, &type, &size); - if (!new || type != OBJ_BLOB) { - if (new) - free(new); - return error("git-checkout-index: unable to read sha1 file of %s (%s)", - path, sha1_to_hex(ce->sha1)); - } switch (ntohl(ce->ce_mode) & S_IFMT) { - char *buf; - unsigned long nsize; + char *buf, *new; + unsigned long size, nsize; case S_IFREG: + new = read_blob_entry(ce, path, &size); + if (!new) + return error("git-checkout-index: unable to read sha1 file of %s (%s)", + path, sha1_to_hex(ce->sha1)); if (to_tempfile) { strcpy(path, ".merge_file_XXXXXX"); fd = mkstemp(path); @@ -111,6 +118,10 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat return error("git-checkout-index: unable to write file %s", path); break; case S_IFLNK: + new = read_blob_entry(ce, path, &size); + if (!new) + return error("git-checkout-index: unable to read sha1 file of %s (%s)", + path, sha1_to_hex(ce->sha1)); if (to_tempfile || !has_symlinks) { if (to_tempfile) { strcpy(path, ".merge_link_XXXXXX"); @@ -136,8 +147,13 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat "symlink %s (%s)", path, strerror(errno)); } break; + case S_IFDIRLNK: + if (to_tempfile) + return error("git-checkout-index: cannot create temporary subproject %s", path); + if (mkdir(path, 0777) < 0) + return error("git-checkout-index: cannot create subproject directory %s", path); + break; default: - free(new); return error("git-checkout-index: unknown file mode for %s", path); } @@ -179,6 +195,9 @@ int checkout_entry(struct cache_entry *ce, struct checkout *state, char *topath) */ unlink(path); if (S_ISDIR(st.st_mode)) { + /* If it is a gitlink, leave it alone! */ + if (S_ISDIRLNK(ntohl(ce->ce_mode))) + return 0; if (!state->force) return error("%s is a directory", path); remove_subtree(path); From 5698454ea052369dc98d38b45c21307b07a6c4a3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 14 Apr 2007 16:22:08 -0700 Subject: [PATCH 16/18] Fix some "git ls-files -o" fallout from gitlinks Since "git ls-files" doesn't really pass down any details on what it really wants done to the directory walking code, the directory walking code doesn't really know whether the caller wants to know about gitlink directories, or whether it wants to just know about ignored files. So the directory walking code will return those gitlink directories unless the caller has explicitly told it not to ("dir->show_other_directories" tells the directory walker to only show "other" directories). This kind of confuses "git ls-files -o", because - it didn't really expect to see entries listed that were already in the index, unless they were unmerged, and would die on that unexpected setup, rather than just "continue". - it didn't know how to match directory entries with the final "/" This trivial change updates the "show_other_files()" function to handle both of these issues gracefully. There really was no reason to die, when the obviously correct thing for the function was to just ignore files it already knew about (that's what "other" means here!). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-ls-files.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/builtin-ls-files.c b/builtin-ls-files.c index 74a6acacc1..f7c066b24b 100644 --- a/builtin-ls-files.c +++ b/builtin-ls-files.c @@ -89,20 +89,38 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent) static void show_other_files(struct dir_struct *dir) { int i; + + + /* + * Skip matching and unmerged entries for the paths, + * since we want just "others". + * + * (Matching entries are normally pruned during + * the directory tree walk, but will show up for + * gitlinks because we don't necessarily have + * dir->show_other_directories set to suppress + * them). + */ for (i = 0; i < dir->nr; i++) { - /* We should not have a matching entry, but we - * may have an unmerged entry for this path. - */ struct dir_entry *ent = dir->entries[i]; - int pos = cache_name_pos(ent->name, ent->len); + int len, pos; struct cache_entry *ce; + + /* + * Remove the '/' at the end that directory + * walking adds for directory entries. + */ + len = ent->len; + if (len && ent->name[len-1] == '/') + len--; + pos = cache_name_pos(ent->name, len); if (0 <= pos) - die("bug in show-other-files"); + continue; /* exact match */ pos = -pos - 1; if (pos < active_nr) { ce = active_cache[pos]; - if (ce_namelen(ce) == ent->len && - !memcmp(ce->name, ent->name, ent->len)) + if (ce_namelen(ce) == len && + !memcmp(ce->name, ent->name, len)) continue; /* Yup, this one exists unmerged */ } show_dir_entry(tag_other, ent); From 04786756f90a734445cc300a5acf9fcfc9fd4c04 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Apr 2007 11:14:28 -0700 Subject: [PATCH 17/18] Expose subprojects as special files to "git diff" machinery The same way we generate diffs on symlinks as the the diff of text of the symlink, we can generate subproject diffs (when not recursing into them!) as the diff of the text that describes the subproject. Of course, since what descibes a subproject is just the SHA1, that's what we'll use. Add some pretty-printing to make it a bit more obvious what is going on, and we're done. So with this, we can get both raw diffs and "textual" diffs of subproject changes: - git diff --raw: :160000 160000 2de597b5ad348b7db04bd10cdd38cd81cbc93ab5 0000000... M sub-A - git diff: diff --git a/sub-A b/sub-A index 2de597b..e8f11a4 160000 --- a/sub-A +++ b/sub-A @@ -1 +1 @@ -Subproject commit 2de597b5ad348b7db04bd10cdd38cd81cbc93ab5 +Subproject commit e8f11a45c5c6b9e2fec6d136d3fb5aff75393d42 NOTE! We'll also want to have the ability to recurse into the subproject and actually diff it recursively, but that will involve a new command line option (I'd suggest "--subproject" and "-S", but the latter is in use by pickaxe), and some very different code. But regardless of ay future recursive behaviour, we need the non-recursive version too (and it should be the default, at least in the absense of config options, so that large superprojects don't default to something extremely expensive). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/diff.c b/diff.c index fbb79d70a9..1e8e689be2 100644 --- a/diff.c +++ b/diff.c @@ -1397,6 +1397,22 @@ static int populate_from_stdin(struct diff_filespec *s) return 0; } +static int diff_populate_gitlink(struct diff_filespec *s, int size_only) +{ + int len; + char *data = xmalloc(100); + len = snprintf(data, 100, + "Subproject commit %s\n", sha1_to_hex(s->sha1)); + s->data = data; + s->size = len; + s->should_free = 1; + if (size_only) { + s->data = NULL; + free(data); + } + return 0; +} + /* * While doing rename detection and pickaxe operation, we may need to * grab the data for the blob (or file) for our own in-core comparison. @@ -1415,6 +1431,10 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) if (s->data) return err; + + if (S_ISDIRLNK(s->mode)) + return diff_populate_gitlink(s, size_only); + if (!s->sha1_valid || reuse_worktree_file(s->path, s->sha1, 0)) { struct stat st; From 1c3e5c4ebc326c5c70350d3f4dc7f2b29e813480 Mon Sep 17 00:00:00 2001 From: Alex Riesen Date: Thu, 19 Apr 2007 01:55:45 +0200 Subject: [PATCH 18/18] Tests for core subproject support The following tests available: - create subprojects: create a directory in the superproject, initialize a git repo in it, and try adding it in super project. Make a commit in superproject - check if fsck ignores the subprojects: it just should give no errors - check if commit in a subproject detected: make a commit in subproject, git-diff-files in superproject should detect it - check if a changed subproject HEAD can be committed: try "git-commit -a" in superproject. It should commit changed HEAD of a subproject - check if diff-index works for subproject elements: compare the index (changed by previuos tests) with the initial commit (which created two subprojects). Should show a change for the recently changed subproject - check if diff-tree works for subproject elements: do the same, just use git-diff-tree. This test is somewhat redundant, I just added it for completeness (diff, diff-files, and diff-index are already used) - check if git diff works for subproject elements: try to limit the diff for the name of a subproject in superproject: git diff HEAD^ HEAD -- subproject - check if clone works: try a clone of superproject and compare "git ls-files -s" output in superproject and cloned repo - removing and adding subproject: rename test. Currently implemented as "git-update-index --force-remove", "mv" and "git-add". - checkout in superproject: try to checkout the initial commit Signed-off-by: Alex Riesen Signed-off-by: Junio C Hamano --- t/t3040-subprojects-basic.sh | 85 ++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100755 t/t3040-subprojects-basic.sh diff --git a/t/t3040-subprojects-basic.sh b/t/t3040-subprojects-basic.sh new file mode 100755 index 0000000000..79b9f23654 --- /dev/null +++ b/t/t3040-subprojects-basic.sh @@ -0,0 +1,85 @@ +#!/bin/sh + +test_description='Basic subproject functionality' +. ./test-lib.sh + +test_expect_success 'Super project creation' \ + ': >Makefile && + git add Makefile && + git commit -m "Superproject created"' + + +cat >expected <Makefile && git add * && + git commit -q -m "subproject 1" ) && + mkdir sub2 && + ( cd sub2 && git init && : >Makefile && git add * && + git commit -q -m "subproject 2" ) && + git update-index --add sub1 && + git add sub2 && + git commit -q -m "subprojects added" && + git diff-tree --abbrev=5 HEAD^ HEAD |cut -d" " -f-3,5- >current && + git diff expected current' + +git branch save HEAD + +test_expect_success 'check if fsck ignores the subprojects' \ + 'git fsck --full' + +test_expect_success 'check if commit in a subproject detected' \ + '( cd sub1 && + echo "all:" >>Makefile && + echo " true" >>Makefile && + git commit -q -a -m "make all" ) && { + git diff-files --exit-code + test $? = 1 + }' + +test_expect_success 'check if a changed subproject HEAD can be committed' \ + 'git commit -q -a -m "sub1 changed" && { + git diff-tree --exit-code HEAD^ HEAD + test $? = 1 + }' + +test_expect_success 'check if diff-index works for subproject elements' \ + 'git diff-index --exit-code --cached save -- sub1 + test $? = 1' + +test_expect_success 'check if diff-tree works for subproject elements' \ + 'git diff-tree --exit-code HEAD^ HEAD -- sub1 + test $? = 1' + +test_expect_success 'check if git diff works for subproject elements' \ + 'git diff --exit-code HEAD^ HEAD + test $? = 1' + +test_expect_success 'check if clone works' \ + 'git ls-files -s >expected && + git clone -l -s . cloned && + ( cd cloned && git ls-files -s ) >current && + git diff expected current' + +test_expect_success 'removing and adding subproject' \ + 'git update-index --force-remove -- sub2 && + mv sub2 sub3 && + git add sub3 && + git commit -q -m "renaming a subproject" && { + git diff -M --name-status --exit-code HEAD^ HEAD + test $? = 1 + }' + +# the index must contain the object name the HEAD of the +# subproject sub1 was at the point "save" +test_expect_success 'checkout in superproject' \ + 'git checkout save && + git diff-index --exit-code --raw --cached save -- sub1' + +# just interesting what happened... +# git diff --name-status -M save master + +test_done