git-commit-vandalism/entry.c

235 lines
5.8 KiB
C
Raw Normal View History

#include "cache.h"
#include "blob.h"
static void create_directories(const char *path, const struct checkout *state)
{
int len = strlen(path);
char *buf = xmalloc(len + 1);
const char *slash = path;
while ((slash = strchr(slash+1, '/')) != NULL) {
struct stat st;
int stat_status;
len = slash - path;
memcpy(buf, path, len);
buf[len] = 0;
if (len <= state->base_dir_len)
/*
* checkout-index --prefix=<dir>; <dir> is
* allowed to be a symlink to an existing
* directory.
*/
stat_status = stat(buf, &st);
else
/*
* if there currently is a symlink, we would
* want to replace it with a real directory.
*/
stat_status = lstat(buf, &st);
if (!stat_status && S_ISDIR(st.st_mode))
continue; /* ok, it is already a directory. */
/*
* We know stat_status == 0 means something exists
* there and this mkdir would fail, but that is an
* error codepath; we do not care, as we unlink and
* mkdir again in such a case.
*/
if (mkdir(buf, 0777)) {
if (errno == EEXIST && state->force &&
!unlink(buf) && !mkdir(buf, 0777))
continue;
die("cannot create directory at %s", buf);
}
}
free(buf);
}
static void remove_subtree(const char *path)
{
DIR *dir = opendir(path);
struct dirent *de;
char pathbuf[PATH_MAX];
char *name;
if (!dir)
die("cannot opendir %s (%s)", path, strerror(errno));
strcpy(pathbuf, path);
name = pathbuf + strlen(path);
*name++ = '/';
while ((de = readdir(dir)) != NULL) {
struct stat st;
if ((de->d_name[0] == '.') &&
((de->d_name[1] == 0) ||
((de->d_name[1] == '.') && de->d_name[2] == 0)))
continue;
strcpy(name, de->d_name);
if (lstat(pathbuf, &st))
die("cannot lstat %s (%s)", pathbuf, strerror(errno));
if (S_ISDIR(st.st_mode))
remove_subtree(pathbuf);
else if (unlink(pathbuf))
die("cannot unlink %s (%s)", pathbuf, strerror(errno));
}
closedir(dir);
if (rmdir(path))
die("cannot rmdir %s (%s)", path, strerror(errno));
}
static int create_file(const char *path, unsigned int mode)
{
mode = (mode & 0100) ? 0777 : 0666;
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
}
static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned long *size)
{
enum object_type type;
void *new = read_sha1_file(ce->sha1, &type, size);
if (new) {
if (type == OBJ_BLOB)
return new;
free(new);
}
return NULL;
}
static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
{
int fd;
long wrote;
switch (ntohl(ce->ce_mode) & S_IFMT) {
char *new;
struct strbuf buf;
unsigned long size;
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
case S_IFREG:
new = read_blob_entry(ce, path, &size);
if (!new)
return error("git-checkout-index: unable to read sha1 file of %s (%s)",
path, sha1_to_hex(ce->sha1));
/*
* Convert from git internal format to working tree format
*/
strbuf_init(&buf, 0);
if (convert_to_working_tree(ce->name, new, size, &buf)) {
size_t newsize = 0;
free(new);
new = strbuf_detach(&buf, &newsize);
size = newsize;
}
if (to_tempfile) {
strcpy(path, ".merge_file_XXXXXX");
fd = mkstemp(path);
} else
fd = create_file(path, ntohl(ce->ce_mode));
if (fd < 0) {
free(new);
return error("git-checkout-index: unable to create file %s (%s)",
path, strerror(errno));
}
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
wrote = write_in_full(fd, new, size);
close(fd);
free(new);
if (wrote != size)
return error("git-checkout-index: unable to write file %s", path);
break;
case S_IFLNK:
new = read_blob_entry(ce, path, &size);
if (!new)
return error("git-checkout-index: unable to read sha1 file of %s (%s)",
path, sha1_to_hex(ce->sha1));
if (to_tempfile || !has_symlinks) {
if (to_tempfile) {
strcpy(path, ".merge_link_XXXXXX");
fd = mkstemp(path);
} else
fd = create_file(path, 0666);
if (fd < 0) {
free(new);
return error("git-checkout-index: unable to create "
"file %s (%s)", path, strerror(errno));
}
wrote = write_in_full(fd, new, size);
close(fd);
free(new);
if (wrote != size)
return error("git-checkout-index: unable to write file %s",
path);
} else {
wrote = symlink(new, path);
free(new);
if (wrote)
return error("git-checkout-index: unable to create "
"symlink %s (%s)", path, strerror(errno));
}
break;
case S_IFGITLINK:
if (to_tempfile)
return error("git-checkout-index: cannot create temporary subproject %s", path);
if (mkdir(path, 0777) < 0)
return error("git-checkout-index: cannot create subproject directory %s", path);
break;
default:
return error("git-checkout-index: unknown file mode for %s", path);
}
if (state->refresh_cache) {
struct stat st;
lstat(ce->name, &st);
fill_stat_cache_info(ce, &st);
}
return 0;
}
int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath)
{
static char path[PATH_MAX + 1];
struct stat st;
int len = state->base_dir_len;
if (topath)
return write_entry(ce, topath, state, 1);
memcpy(path, state->base_dir, len);
strcpy(path + len, ce->name);
if (!lstat(path, &st)) {
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
unsigned changed = ce_match_stat(ce, &st, 1);
if (!changed)
return 0;
if (!state->force) {
if (!state->quiet)
fprintf(stderr, "git-checkout-index: %s already exists\n", path);
return -1;
}
/*
* We unlink the old file, to get the new one with the
* right permissions (including umask, which is nasty
* to emulate by hand - much easier to let the system
* just do the right thing)
*/
unlink(path);
if (S_ISDIR(st.st_mode)) {
/* If it is a gitlink, leave it alone! */
if (S_ISGITLINK(ntohl(ce->ce_mode)))
return 0;
if (!state->force)
return error("%s is a directory", path);
remove_subtree(path);
}
} else if (state->not_new)
return 0;
create_directories(path, state);
return write_entry(ce, path, state, 0);
}