5f73076c1a
This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
158 lines
3.8 KiB
C
158 lines
3.8 KiB
C
/*
|
|
* GIT - The information manager from hell
|
|
*
|
|
* Copyright (C) Linus Torvalds, 2005
|
|
*/
|
|
#include "cache.h"
|
|
|
|
static int missing_ok = 0;
|
|
|
|
static int check_valid_sha1(unsigned char *sha1)
|
|
{
|
|
int ret;
|
|
|
|
/* If we were anal, we'd check that the sha1 of the contents actually matches */
|
|
ret = has_sha1_file(sha1);
|
|
if (ret == 0)
|
|
perror(sha1_file_name(sha1));
|
|
return ret ? 0 : -1;
|
|
}
|
|
|
|
static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1)
|
|
{
|
|
unsigned char subdir_sha1[20];
|
|
unsigned long size, offset;
|
|
char *buffer;
|
|
int nr;
|
|
|
|
/* Guess at some random initial size */
|
|
size = 8192;
|
|
buffer = xmalloc(size);
|
|
offset = 0;
|
|
|
|
nr = 0;
|
|
while (nr < maxentries) {
|
|
struct cache_entry *ce = cachep[nr];
|
|
const char *pathname = ce->name, *filename, *dirname;
|
|
int pathlen = ce_namelen(ce), entrylen;
|
|
unsigned char *sha1;
|
|
unsigned int mode;
|
|
|
|
/* Did we hit the end of the directory? Return how many we wrote */
|
|
if (baselen >= pathlen || memcmp(base, pathname, baselen))
|
|
break;
|
|
|
|
sha1 = ce->sha1;
|
|
mode = ntohl(ce->ce_mode);
|
|
|
|
/* Do we have _further_ subdirectories? */
|
|
filename = pathname + baselen;
|
|
dirname = strchr(filename, '/');
|
|
if (dirname) {
|
|
int subdir_written;
|
|
|
|
subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1);
|
|
nr += subdir_written;
|
|
|
|
/* Now we need to write out the directory entry into this tree.. */
|
|
mode = S_IFDIR;
|
|
pathlen = dirname - pathname;
|
|
|
|
/* ..but the directory entry doesn't count towards the total count */
|
|
nr--;
|
|
sha1 = subdir_sha1;
|
|
}
|
|
|
|
if (!missing_ok && check_valid_sha1(sha1) < 0)
|
|
exit(1);
|
|
|
|
entrylen = pathlen - baselen;
|
|
if (offset + entrylen + 100 > size) {
|
|
size = alloc_nr(offset + entrylen + 100);
|
|
buffer = xrealloc(buffer, size);
|
|
}
|
|
offset += sprintf(buffer + offset, "%o %.*s", mode, entrylen, filename);
|
|
buffer[offset++] = 0;
|
|
memcpy(buffer + offset, sha1, 20);
|
|
offset += 20;
|
|
nr++;
|
|
}
|
|
|
|
write_sha1_file(buffer, offset, "tree", returnsha1);
|
|
free(buffer);
|
|
return nr;
|
|
}
|
|
|
|
static const char write_tree_usage[] = "git-write-tree [--missing-ok]";
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
int i, funny;
|
|
int entries;
|
|
unsigned char sha1[20];
|
|
|
|
setup_git_directory();
|
|
|
|
entries = read_cache();
|
|
if (argc == 2) {
|
|
if (!strcmp(argv[1], "--missing-ok"))
|
|
missing_ok = 1;
|
|
else
|
|
die(write_tree_usage);
|
|
}
|
|
|
|
if (argc > 2)
|
|
die("too many options");
|
|
|
|
if (entries < 0)
|
|
die("git-write-tree: error reading cache");
|
|
|
|
/* Verify that the tree is merged */
|
|
funny = 0;
|
|
for (i = 0; i < entries; i++) {
|
|
struct cache_entry *ce = active_cache[i];
|
|
if (ce_stage(ce)) {
|
|
if (10 < ++funny) {
|
|
fprintf(stderr, "...\n");
|
|
break;
|
|
}
|
|
fprintf(stderr, "%s: unmerged (%s)\n", ce->name, sha1_to_hex(ce->sha1));
|
|
}
|
|
}
|
|
if (funny)
|
|
die("git-write-tree: not able to write tree");
|
|
|
|
/* Also verify that the cache does not have path and path/file
|
|
* at the same time. At this point we know the cache has only
|
|
* stage 0 entries.
|
|
*/
|
|
funny = 0;
|
|
for (i = 0; i < entries - 1; i++) {
|
|
/* path/file always comes after path because of the way
|
|
* the cache is sorted. Also path can appear only once,
|
|
* which means conflicting one would immediately follow.
|
|
*/
|
|
const char *this_name = active_cache[i]->name;
|
|
const char *next_name = active_cache[i+1]->name;
|
|
int this_len = strlen(this_name);
|
|
if (this_len < strlen(next_name) &&
|
|
strncmp(this_name, next_name, this_len) == 0 &&
|
|
next_name[this_len] == '/') {
|
|
if (10 < ++funny) {
|
|
fprintf(stderr, "...\n");
|
|
break;
|
|
}
|
|
fprintf(stderr, "You have both %s and %s\n",
|
|
this_name, next_name);
|
|
}
|
|
}
|
|
if (funny)
|
|
die("git-write-tree: not able to write tree");
|
|
|
|
/* Ok, write it out */
|
|
if (write_tree(active_cache, entries, "", 0, sha1) != entries)
|
|
die("git-write-tree: internal error");
|
|
printf("%s\n", sha1_to_hex(sha1));
|
|
return 0;
|
|
}
|