git-commit-vandalism/builtin/update-index.c

1208 lines
32 KiB
C
Raw Normal View History

/*
* GIT - The information manager from hell
*
* Copyright (C) Linus Torvalds, 2005
*/
#include "cache.h"
#include "config.h"
#include "lockfile.h"
#include "quote.h"
#include "cache-tree.h"
#include "tree-walk.h"
#include "builtin.h"
#include "refs.h"
#include "resolve-undo.h"
#include "parse-options.h"
#include "pathspec.h"
#include "dir.h"
#include "split-index.h"
#include "fsmonitor.h"
/*
* Default to not allowing changes to the list of files. The
* tool doesn't actually care, but this makes it harder to add
* files to the revision control by mistake by doing something
* like "git update-index *" and suddenly having all the object
* files be revision controlled.
*/
static int allow_add;
static int allow_remove;
static int allow_replace;
static int info_only;
static int force_remove;
static int verbose;
static int mark_valid_only;
static int mark_skip_worktree_only;
static int mark_fsmonitor_only;
#define MARK_FLAG 1
#define UNMARK_FLAG 2
static struct strbuf mtime_dir = STRBUF_INIT;
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
/* Untracked cache mode */
enum uc_mode {
UC_UNSPECIFIED = -1,
UC_DISABLE = 0,
UC_ENABLE,
UC_TEST,
UC_FORCE
};
__attribute__((format (printf, 1, 2)))
static void report(const char *fmt, ...)
{
va_list vp;
if (!verbose)
return;
va_start(vp, fmt);
vprintf(fmt, vp);
putchar('\n');
va_end(vp);
}
static void remove_test_directory(void)
{
if (mtime_dir.len)
remove_dir_recursively(&mtime_dir, 0);
}
static const char *get_mtime_path(const char *path)
{
static struct strbuf sb = STRBUF_INIT;
strbuf_reset(&sb);
strbuf_addf(&sb, "%s/%s", mtime_dir.buf, path);
return sb.buf;
}
static void xmkdir(const char *path)
{
path = get_mtime_path(path);
if (mkdir(path, 0700))
die_errno(_("failed to create directory %s"), path);
}
static int xstat_mtime_dir(struct stat *st)
{
if (stat(mtime_dir.buf, st))
die_errno(_("failed to stat %s"), mtime_dir.buf);
return 0;
}
static int create_file(const char *path)
{
int fd;
path = get_mtime_path(path);
fd = open(path, O_CREAT | O_RDWR, 0644);
if (fd < 0)
die_errno(_("failed to create file %s"), path);
return fd;
}
static void xunlink(const char *path)
{
path = get_mtime_path(path);
if (unlink(path))
die_errno(_("failed to delete file %s"), path);
}
static void xrmdir(const char *path)
{
path = get_mtime_path(path);
if (rmdir(path))
die_errno(_("failed to delete directory %s"), path);
}
static void avoid_racy(void)
{
/*
* not use if we could usleep(10) if USE_NSEC is defined. The
* field nsec could be there, but the OS could choose to
* ignore it?
*/
sleep(1);
}
static int test_if_untracked_cache_is_supported(void)
{
struct stat st;
struct stat_data base;
int fd, ret = 0;
char *cwd;
strbuf_addstr(&mtime_dir, "mtime-test-XXXXXX");
if (!mkdtemp(mtime_dir.buf))
die_errno("Could not make temporary directory");
cwd = xgetcwd();
fprintf(stderr, _("Testing mtime in '%s' "), cwd);
free(cwd);
atexit(remove_test_directory);
xstat_mtime_dir(&st);
fill_stat_data(&base, &st);
fputc('.', stderr);
avoid_racy();
fd = create_file("newfile");
xstat_mtime_dir(&st);
if (!match_stat_data(&base, &st)) {
close(fd);
fputc('\n', stderr);
fprintf_ln(stderr,_("directory stat info does not "
"change after adding a new file"));
goto done;
}
fill_stat_data(&base, &st);
fputc('.', stderr);
avoid_racy();
xmkdir("new-dir");
xstat_mtime_dir(&st);
if (!match_stat_data(&base, &st)) {
close(fd);
fputc('\n', stderr);
fprintf_ln(stderr, _("directory stat info does not change "
"after adding a new directory"));
goto done;
}
fill_stat_data(&base, &st);
fputc('.', stderr);
avoid_racy();
write_or_die(fd, "data", 4);
close(fd);
xstat_mtime_dir(&st);
if (match_stat_data(&base, &st)) {
fputc('\n', stderr);
fprintf_ln(stderr, _("directory stat info changes "
"after updating a file"));
goto done;
}
fputc('.', stderr);
avoid_racy();
close(create_file("new-dir/new"));
xstat_mtime_dir(&st);
if (match_stat_data(&base, &st)) {
fputc('\n', stderr);
fprintf_ln(stderr, _("directory stat info changes after "
"adding a file inside subdirectory"));
goto done;
}
fputc('.', stderr);
avoid_racy();
xunlink("newfile");
xstat_mtime_dir(&st);
if (!match_stat_data(&base, &st)) {
fputc('\n', stderr);
fprintf_ln(stderr, _("directory stat info does not "
"change after deleting a file"));
goto done;
}
fill_stat_data(&base, &st);
fputc('.', stderr);
avoid_racy();
xunlink("new-dir/new");
xrmdir("new-dir");
xstat_mtime_dir(&st);
if (!match_stat_data(&base, &st)) {
fputc('\n', stderr);
fprintf_ln(stderr, _("directory stat info does not "
"change after deleting a directory"));
goto done;
}
if (rmdir(mtime_dir.buf))
die_errno(_("failed to delete directory %s"), mtime_dir.buf);
fprintf_ln(stderr, _(" OK"));
ret = 1;
done:
strbuf_release(&mtime_dir);
return ret;
}
static int mark_ce_flags(const char *path, int flag, int mark)
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
{
int namelen = strlen(path);
int pos = cache_name_pos(path, namelen);
if (0 <= pos) {
mark_fsmonitor_invalid(&the_index, active_cache[pos]);
if (mark)
active_cache[pos]->ce_flags |= flag;
else
active_cache[pos]->ce_flags &= ~flag;
active_cache[pos]->ce_flags |= CE_UPDATE_IN_BASE;
cache_tree_invalidate_path(&the_index, path);
active_cache_changed |= CE_ENTRY_CHANGED;
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
return 0;
}
return -1;
}
static int remove_one_path(const char *path)
{
if (!allow_remove)
return error("%s: does not exist and --remove not passed", path);
if (remove_file_from_cache(path))
return error("%s: cannot remove from the index", path);
return 0;
}
/*
* Handle a path that couldn't be lstat'ed. It's either:
* - missing file (ENOENT or ENOTDIR). That's ok if we're
* supposed to be removing it and the removal actually
* succeeds.
* - permission error. That's never ok.
*/
static int process_lstat_error(const char *path, int err)
{
if (is_missing_file_error(err))
return remove_one_path(path);
return error("lstat(\"%s\"): %s", path, strerror(err));
}
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
static int add_one_path(const struct cache_entry *old, const char *path, int len, struct stat *st)
{
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
int option;
struct cache_entry *ce;
/* Was the old index entry already up-to-date? */
if (old && !ce_stage(old) && !ce_match_stat(old, st, 0))
return 0;
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
ce = make_empty_cache_entry(&the_index, len);
memcpy(ce->name, path, len);
ce->ce_flags = create_ce_flags(0);
ce->ce_namelen = len;
fill_stat_cache_info(ce, st);
ce->ce_mode = ce_mode_from_stat(old, st->st_mode);
if (index_path(&ce->oid, path, st,
info_only ? 0 : HASH_WRITE_OBJECT)) {
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
discard_cache_entry(ce);
return -1;
}
option = allow_add ? ADD_CACHE_OK_TO_ADD : 0;
option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0;
if (add_cache_entry(ce, option)) {
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
discard_cache_entry(ce);
return error("%s: cannot add to the index - missing --add option?", path);
}
return 0;
}
/*
* Handle a path that was a directory. Four cases:
*
* - it's already a gitlink in the index, and we keep it that
* way, and update it if we can (if we cannot find the HEAD,
* we're going to keep it unchanged in the index!)
*
* - it's a *file* in the index, in which case it should be
* removed as a file if removal is allowed, since it doesn't
* exist as such any more. If removal isn't allowed, it's
* an error.
*
* (NOTE! This is old and arguably fairly strange behaviour.
* We might want to make this an error unconditionally, and
* use "--force-remove" if you actually want to force removal).
*
* - it used to exist as a subdirectory (ie multiple files with
* this particular prefix) in the index, in which case it's wrong
* to try to update it as a directory.
*
* - it doesn't exist at all in the index, but it is a valid
* git directory, and it should be *added* as a gitlink.
*/
static int process_directory(const char *path, int len, struct stat *st)
{
struct object_id oid;
int pos = cache_name_pos(path, len);
/* Exact match: file or existing gitlink */
if (pos >= 0) {
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
const struct cache_entry *ce = active_cache[pos];
if (S_ISGITLINK(ce->ce_mode)) {
/* Do nothing to the index if there is no HEAD! */
if (resolve_gitlink_ref(path, "HEAD", &oid) < 0)
return 0;
return add_one_path(ce, path, len, st);
}
/* Should this be an unconditional error? */
return remove_one_path(path);
}
/* Inexact match: is there perhaps a subdirectory match? */
pos = -pos-1;
while (pos < active_nr) {
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
const struct cache_entry *ce = active_cache[pos++];
if (strncmp(ce->name, path, len))
break;
if (ce->name[len] > '/')
break;
if (ce->name[len] < '/')
continue;
/* Subdirectory match - error out */
return error("%s: is a directory - add individual files instead", path);
}
/* No match - should we add it as a gitlink? */
if (!resolve_gitlink_ref(path, "HEAD", &oid))
return add_one_path(NULL, path, len, st);
/* Error out. */
return error("%s: is a directory - add files inside instead", path);
}
static int process_path(const char *path, struct stat *st, int stat_errno)
{
int pos, len;
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
const struct cache_entry *ce;
len = strlen(path);
if (has_symlink_leading_path(path, len))
return error("'%s' is beyond a symbolic link", path);
pos = cache_name_pos(path, len);
ce = pos < 0 ? NULL : active_cache[pos];
if (ce && ce_skip_worktree(ce)) {
/*
* working directory version is assumed "good"
* so updating it does not make sense.
* On the other hand, removing it from index should work
*/
if (allow_remove && remove_file_from_cache(path))
return error("%s: cannot remove from the index", path);
return 0;
}
/*
* First things first: get the stat information, to decide
* what to do about the pathname!
*/
if (stat_errno)
return process_lstat_error(path, stat_errno);
if (S_ISDIR(st->st_mode))
return process_directory(path, len, st);
return add_one_path(ce, path, len, st);
}
static int add_cacheinfo(unsigned int mode, const struct object_id *oid,
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
const char *path, int stage)
{
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
int len, option;
struct cache_entry *ce;
if (!verify_path(path, mode))
return error("Invalid path '%s'", path);
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
len = strlen(path);
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
ce = make_empty_cache_entry(&the_index, len);
oidcpy(&ce->oid, oid);
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
memcpy(ce->name, path, len);
ce->ce_flags = create_ce_flags(stage);
ce->ce_namelen = len;
ce->ce_mode = create_ce_mode(mode);
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
if (assume_unchanged)
ce->ce_flags |= CE_VALID;
option = allow_add ? ADD_CACHE_OK_TO_ADD : 0;
option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0;
if (add_cache_entry(ce, option))
return error("%s: cannot add to the index - missing --add option?",
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
path);
report("add '%s'", path);
return 0;
}
static void chmod_path(char flip, const char *path)
{
int pos;
struct cache_entry *ce;
pos = cache_name_pos(path, strlen(path));
if (pos < 0)
goto fail;
ce = active_cache[pos];
if (chmod_cache_entry(ce, flip) < 0)
goto fail;
report("chmod %cx '%s'", flip, path);
return;
fail:
die("git update-index: cannot chmod %cx '%s'", flip, path);
}
static void update_one(const char *path)
{
int stat_errno = 0;
struct stat st;
if (mark_valid_only || mark_skip_worktree_only || force_remove ||
mark_fsmonitor_only)
st.st_mode = 0;
else if (lstat(path, &st) < 0) {
st.st_mode = 0;
stat_errno = errno;
} /* else stat is valid */
if (!verify_path(path, st.st_mode)) {
fprintf(stderr, "Ignoring path %s\n", path);
return;
}
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
if (mark_valid_only) {
if (mark_ce_flags(path, CE_VALID, mark_valid_only == MARK_FLAG))
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
die("Unable to mark file %s", path);
return;
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
}
if (mark_skip_worktree_only) {
if (mark_ce_flags(path, CE_SKIP_WORKTREE, mark_skip_worktree_only == MARK_FLAG))
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
die("Unable to mark file %s", path);
return;
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
}
if (mark_fsmonitor_only) {
if (mark_ce_flags(path, CE_FSMONITOR_VALID, mark_fsmonitor_only == MARK_FLAG))
die("Unable to mark file %s", path);
return;
}
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
if (force_remove) {
if (remove_file_from_cache(path))
die("git update-index: unable to remove %s", path);
report("remove '%s'", path);
return;
}
if (process_path(path, &st, stat_errno))
die("Unable to process path %s", path);
report("add '%s'", path);
}
static void read_index_info(int nul_term_line)
{
const int hexsz = the_hash_algo->hexsz;
struct strbuf buf = STRBUF_INIT;
struct strbuf uq = STRBUF_INIT;
strbuf_getline_fn getline_fn;
getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf;
while (getline_fn(&buf, stdin) != EOF) {
char *ptr, *tab;
char *path_name;
struct object_id oid;
unsigned int mode;
unsigned long ul;
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
int stage;
/* This reads lines formatted in one of three formats:
*
* (1) mode SP sha1 TAB path
* The first format is what "git apply --index-info"
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
* reports, and used to reconstruct a partial tree
* that is used for phony merge base tree when falling
* back on 3-way merge.
*
* (2) mode SP type SP sha1 TAB path
* The second format is to stuff "git ls-tree" output
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
* into the index file.
*
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
* (3) mode SP sha1 SP stage TAB path
* This format is to put higher order stages into the
* index file and matches "git ls-files --stage" output.
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
*/
errno = 0;
ul = strtoul(buf.buf, &ptr, 8);
if (ptr == buf.buf || *ptr != ' '
|| errno || (unsigned int) ul != ul)
goto bad_line;
mode = ul;
tab = strchr(ptr, '\t');
if (!tab || tab - ptr < hexsz + 1)
goto bad_line;
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
if (tab[-2] == ' ' && '0' <= tab[-1] && tab[-1] <= '3') {
update-index: allow --index-info to add higher stages. The new merge world order tells the merge strategies to leave the cache unmerged and store the automerge result in the working tree if automerge is not clean. This was done for the resolve strategy and recursive strategy when no rename is involved, but recording a conflicting merge in the rename case could not easily be done by the recursive strategy. This commit adds a new input format, in addition to the exsting two, to "update-index --index-info". (1) mode SP sha1 TAB path The first format is what "git-apply --index-info" reports, and used to reconstruct a partial tree that is used for phony merge base tree when falling back on 3-way merge. (2) mode SP type SP sha1 TAB path The second format is to stuff git-ls-tree output into the index file. (3) mode SP sha1 SP stage TAB path This format is to put higher order stages into the index file and matches git-ls-files --stage output. To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and then feeding necessary input lines in the (3) format. For example, starting with this index: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 0 frotz $ git update-index --index-info ;# interactive session -- input follows... 0 0000000000000000000000000000000000000000 frotz 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz The first line of the input feeds 0 as the mode to remove the path; the SHA1 does not matter as long as it is well formatted. Then the second and third line feeds stage 1 and stage 2 entries for that path. After the above, we would end up with this: $ git ls-files -s 100644 8a1218a1024a212bb3db30becd860315f9f3ac52 1 frotz 100755 8a1218a1024a212bb3db30becd860315f9f3ac52 2 frotz This completes the groundwork for the new merge world order. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-07 10:45:38 +01:00
stage = tab[-1] - '0';
ptr = tab + 1; /* point at the head of path */
tab = tab - 2; /* point at tail of sha1 */
}
else {
stage = 0;
ptr = tab + 1; /* point at the head of path */
}
if (get_oid_hex(tab - hexsz, &oid) ||
tab[-(hexsz + 1)] != ' ')
goto bad_line;
path_name = ptr;
if (!nul_term_line && path_name[0] == '"') {
strbuf_reset(&uq);
if (unquote_c_style(&uq, path_name, NULL)) {
die("git update-index: bad quoting of path name");
}
path_name = uq.buf;
}
if (!verify_path(path_name, mode)) {
fprintf(stderr, "Ignoring path %s\n", path_name);
continue;
}
if (!mode) {
/* mode == 0 means there is no such path -- remove */
if (remove_file_from_cache(path_name))
die("git update-index: unable to remove %s",
ptr);
}
else {
/* mode ' ' sha1 '\t' name
* ptr[-1] points at tab,
* ptr[-41] is at the beginning of sha1
*/
ptr[-(hexsz + 2)] = ptr[-1] = 0;
if (add_cacheinfo(mode, &oid, path_name, stage))
die("git update-index: unable to update %s",
path_name);
}
continue;
bad_line:
die("malformed index info %s", buf.buf);
}
strbuf_release(&buf);
strbuf_release(&uq);
}
static const char * const update_index_usage[] = {
N_("git update-index [<options>] [--] [<file>...]"),
NULL
};
static struct object_id head_oid;
static struct object_id merge_head_oid;
static struct cache_entry *read_one_ent(const char *which,
struct object_id *ent, const char *path,
int namelen, int stage)
{
unsigned mode;
struct object_id oid;
struct cache_entry *ce;
if (get_tree_entry(ent, path, &oid, &mode)) {
if (which)
error("%s: not in %s branch.", path, which);
return NULL;
}
if (mode == S_IFDIR) {
if (which)
error("%s: not a blob in %s branch.", path, which);
return NULL;
}
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
ce = make_empty_cache_entry(&the_index, namelen);
oidcpy(&ce->oid, &oid);
memcpy(ce->name, path, namelen);
ce->ce_flags = create_ce_flags(stage);
ce->ce_namelen = namelen;
ce->ce_mode = create_ce_mode(mode);
return ce;
}
static int unresolve_one(const char *path)
{
int namelen = strlen(path);
int pos;
int ret = 0;
struct cache_entry *ce_2 = NULL, *ce_3 = NULL;
/* See if there is such entry in the index. */
pos = cache_name_pos(path, namelen);
if (0 <= pos) {
/* already merged */
pos = unmerge_cache_entry_at(pos);
if (pos < active_nr) {
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
const struct cache_entry *ce = active_cache[pos];
if (ce_stage(ce) &&
ce_namelen(ce) == namelen &&
!memcmp(ce->name, path, namelen))
return 0;
}
/* no resolve-undo information; fall back */
} else {
/* If there isn't, either it is unmerged, or
* resolved as "removed" by mistake. We do not
* want to do anything in the former case.
*/
pos = -pos-1;
if (pos < active_nr) {
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
const struct cache_entry *ce = active_cache[pos];
if (ce_namelen(ce) == namelen &&
!memcmp(ce->name, path, namelen)) {
fprintf(stderr,
"%s: skipping still unmerged path.\n",
path);
goto free_return;
}
}
}
/* Grab blobs from given path from HEAD and MERGE_HEAD,
* stuff HEAD version in stage #2,
* stuff MERGE_HEAD version in stage #3.
*/
ce_2 = read_one_ent("our", &head_oid, path, namelen, 2);
ce_3 = read_one_ent("their", &merge_head_oid, path, namelen, 3);
if (!ce_2 || !ce_3) {
ret = -1;
goto free_return;
}
if (!oidcmp(&ce_2->oid, &ce_3->oid) &&
ce_2->ce_mode == ce_3->ce_mode) {
fprintf(stderr, "%s: identical in both, skipping.\n",
path);
goto free_return;
}
remove_file_from_cache(path);
if (add_cache_entry(ce_2, ADD_CACHE_OK_TO_ADD)) {
error("%s: cannot add our version to the index.", path);
ret = -1;
goto free_return;
}
if (!add_cache_entry(ce_3, ADD_CACHE_OK_TO_ADD))
return 0;
error("%s: cannot add their version to the index.", path);
ret = -1;
free_return:
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
discard_cache_entry(ce_2);
discard_cache_entry(ce_3);
return ret;
}
static void read_head_pointers(void)
{
if (read_ref("HEAD", &head_oid))
die("No HEAD -- no initial commit yet?");
if (read_ref("MERGE_HEAD", &merge_head_oid)) {
fprintf(stderr, "Not in the middle of a merge.\n");
exit(0);
}
}
static int do_unresolve(int ac, const char **av,
const char *prefix, int prefix_length)
{
int i;
int err = 0;
/* Read HEAD and MERGE_HEAD; if MERGE_HEAD does not exist, we
* are not doing a merge, so exit with success status.
*/
read_head_pointers();
for (i = 1; i < ac; i++) {
const char *arg = av[i];
char *p = prefix_path(prefix, prefix_length, arg);
err |= unresolve_one(p);
free(p);
}
return err;
}
static int do_reupdate(int ac, const char **av,
const char *prefix, int prefix_length)
{
/* Read HEAD and run update-index on paths that are
* merged and already different between index and HEAD.
*/
int pos;
int has_head = 1;
struct pathspec pathspec;
parse_pathspec(&pathspec, 0,
PATHSPEC_PREFER_CWD,
prefix, av + 1);
if (read_ref("HEAD", &head_oid))
/* If there is no HEAD, that means it is an initial
* commit. Update everything in the index.
*/
has_head = 0;
redo:
for (pos = 0; pos < active_nr; pos++) {
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
const struct cache_entry *ce = active_cache[pos];
struct cache_entry *old = NULL;
int save_nr;
char *path;
if (ce_stage(ce) || !ce_path_match(ce, &pathspec, NULL))
continue;
if (has_head)
old = read_one_ent(NULL, &head_oid,
ce->name, ce_namelen(ce), 0);
if (old && ce->ce_mode == old->ce_mode &&
!oidcmp(&ce->oid, &old->oid)) {
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
discard_cache_entry(old);
continue; /* unchanged */
}
/* Be careful. The working tree may not have the
* path anymore, in which case, under 'allow_remove',
* or worse yet 'allow_replace', active_nr may decrease.
*/
save_nr = active_nr;
path = xstrdup(ce->name);
update_one(path);
free(path);
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 21:49:31 +02:00
discard_cache_entry(old);
if (save_nr != active_nr)
goto redo;
}
clear_pathspec(&pathspec);
return 0;
}
struct refresh_params {
unsigned int flags;
int *has_errors;
};
static int refresh(struct refresh_params *o, unsigned int flag)
{
setup_work_tree();
read_cache_preload(NULL);
*o->has_errors |= refresh_cache(o->flags | flag);
return 0;
}
static int refresh_callback(const struct option *opt,
const char *arg, int unset)
{
return refresh(opt->value, 0);
}
static int really_refresh_callback(const struct option *opt,
const char *arg, int unset)
{
return refresh(opt->value, REFRESH_REALLY);
}
static int chmod_callback(const struct option *opt,
const char *arg, int unset)
{
char *flip = opt->value;
if ((arg[0] != '-' && arg[0] != '+') || arg[1] != 'x' || arg[2])
return error("option 'chmod' expects \"+x\" or \"-x\"");
*flip = arg[0];
return 0;
}
static int resolve_undo_clear_callback(const struct option *opt,
const char *arg, int unset)
{
resolve_undo_clear();
return 0;
}
static int parse_new_style_cacheinfo(const char *arg,
unsigned int *mode,
struct object_id *oid,
const char **path)
{
unsigned long ul;
char *endp;
const char *p;
if (!arg)
return -1;
errno = 0;
ul = strtoul(arg, &endp, 8);
if (errno || endp == arg || *endp != ',' || (unsigned int) ul != ul)
return -1; /* not a new-style cacheinfo */
*mode = ul;
endp++;
if (parse_oid_hex(endp, oid, &p) || *p != ',')
return -1;
*path = p + 1;
return 0;
}
static int cacheinfo_callback(struct parse_opt_ctx_t *ctx,
const struct option *opt, int unset)
{
struct object_id oid;
unsigned int mode;
const char *path;
if (!parse_new_style_cacheinfo(ctx->argv[1], &mode, &oid, &path)) {
if (add_cacheinfo(mode, &oid, path, 0))
die("git update-index: --cacheinfo cannot add %s", path);
ctx->argv++;
ctx->argc--;
return 0;
}
if (ctx->argc <= 3)
return error("option 'cacheinfo' expects <mode>,<sha1>,<path>");
if (strtoul_ui(*++ctx->argv, 8, &mode) ||
get_oid_hex(*++ctx->argv, &oid) ||
add_cacheinfo(mode, &oid, *++ctx->argv, 0))
die("git update-index: --cacheinfo cannot add %s", *ctx->argv);
ctx->argc -= 3;
return 0;
}
static int stdin_cacheinfo_callback(struct parse_opt_ctx_t *ctx,
const struct option *opt, int unset)
{
int *nul_term_line = opt->value;
if (ctx->argc != 1)
return error("option '%s' must be the last argument", opt->long_name);
allow_add = allow_replace = allow_remove = 1;
read_index_info(*nul_term_line);
return 0;
}
static int stdin_callback(struct parse_opt_ctx_t *ctx,
const struct option *opt, int unset)
{
int *read_from_stdin = opt->value;
if (ctx->argc != 1)
return error("option '%s' must be the last argument", opt->long_name);
*read_from_stdin = 1;
return 0;
}
static int unresolve_callback(struct parse_opt_ctx_t *ctx,
const struct option *opt, int flags)
{
int *has_errors = opt->value;
const char *prefix = startup_info->prefix;
/* consume remaining arguments. */
*has_errors = do_unresolve(ctx->argc, ctx->argv,
prefix, prefix ? strlen(prefix) : 0);
if (*has_errors)
active_cache_changed = 0;
ctx->argv += ctx->argc - 1;
ctx->argc = 1;
return 0;
}
static int reupdate_callback(struct parse_opt_ctx_t *ctx,
const struct option *opt, int flags)
{
int *has_errors = opt->value;
const char *prefix = startup_info->prefix;
/* consume remaining arguments. */
setup_work_tree();
*has_errors = do_reupdate(ctx->argc, ctx->argv,
prefix, prefix ? strlen(prefix) : 0);
if (*has_errors)
active_cache_changed = 0;
ctx->argv += ctx->argc - 1;
ctx->argc = 1;
return 0;
}
int cmd_update_index(int argc, const char **argv, const char *prefix)
{
int newfd, entries, has_errors = 0, nul_term_line = 0;
enum uc_mode untracked_cache = UC_UNSPECIFIED;
int read_from_stdin = 0;
int prefix_length = prefix ? strlen(prefix) : 0;
int preferred_index_format = 0;
char set_executable_bit = 0;
struct refresh_params refresh_args = {0, &has_errors};
int lock_error = 0;
int split_index = -1;
int force_write = 0;
int fsmonitor = -1;
struct lock_file lock_file = LOCK_INIT;
struct parse_opt_ctx_t ctx;
strbuf_getline_fn getline_fn;
int parseopt_state = PARSE_OPT_UNKNOWN;
struct option options[] = {
OPT_BIT('q', NULL, &refresh_args.flags,
N_("continue refresh even when index needs update"),
REFRESH_QUIET),
OPT_BIT(0, "ignore-submodules", &refresh_args.flags,
N_("refresh: ignore submodules"),
REFRESH_IGNORE_SUBMODULES),
OPT_SET_INT(0, "add", &allow_add,
N_("do not ignore new files"), 1),
OPT_SET_INT(0, "replace", &allow_replace,
N_("let files replace directories and vice-versa"), 1),
OPT_SET_INT(0, "remove", &allow_remove,
N_("notice files missing from worktree"), 1),
OPT_BIT(0, "unmerged", &refresh_args.flags,
N_("refresh even if index contains unmerged entries"),
REFRESH_UNMERGED),
{OPTION_CALLBACK, 0, "refresh", &refresh_args, NULL,
N_("refresh stat information"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG,
refresh_callback},
{OPTION_CALLBACK, 0, "really-refresh", &refresh_args, NULL,
N_("like --refresh, but ignore assume-unchanged setting"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG,
really_refresh_callback},
{OPTION_LOWLEVEL_CALLBACK, 0, "cacheinfo", NULL,
N_("<mode>,<object>,<path>"),
N_("add the specified entry to the index"),
PARSE_OPT_NOARG | /* disallow --cacheinfo=<mode> form */
PARSE_OPT_NONEG | PARSE_OPT_LITERAL_ARGHELP,
(parse_opt_cb *) cacheinfo_callback},
{OPTION_CALLBACK, 0, "chmod", &set_executable_bit, "(+|-)x",
N_("override the executable bit of the listed files"),
PARSE_OPT_NONEG,
chmod_callback},
{OPTION_SET_INT, 0, "assume-unchanged", &mark_valid_only, NULL,
N_("mark files as \"not changing\""),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, MARK_FLAG},
{OPTION_SET_INT, 0, "no-assume-unchanged", &mark_valid_only, NULL,
N_("clear assumed-unchanged bit"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, UNMARK_FLAG},
{OPTION_SET_INT, 0, "skip-worktree", &mark_skip_worktree_only, NULL,
N_("mark files as \"index-only\""),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, MARK_FLAG},
{OPTION_SET_INT, 0, "no-skip-worktree", &mark_skip_worktree_only, NULL,
N_("clear skip-worktree bit"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, UNMARK_FLAG},
OPT_SET_INT(0, "info-only", &info_only,
N_("add to index only; do not add content to object database"), 1),
OPT_SET_INT(0, "force-remove", &force_remove,
N_("remove named paths even if present in worktree"), 1),
OPT_BOOL('z', NULL, &nul_term_line,
N_("with --stdin: input lines are terminated by null bytes")),
{OPTION_LOWLEVEL_CALLBACK, 0, "stdin", &read_from_stdin, NULL,
N_("read list of paths to be updated from standard input"),
PARSE_OPT_NONEG | PARSE_OPT_NOARG,
(parse_opt_cb *) stdin_callback},
{OPTION_LOWLEVEL_CALLBACK, 0, "index-info", &nul_term_line, NULL,
N_("add entries from standard input to the index"),
PARSE_OPT_NONEG | PARSE_OPT_NOARG,
(parse_opt_cb *) stdin_cacheinfo_callback},
{OPTION_LOWLEVEL_CALLBACK, 0, "unresolve", &has_errors, NULL,
N_("repopulate stages #2 and #3 for the listed paths"),
PARSE_OPT_NONEG | PARSE_OPT_NOARG,
(parse_opt_cb *) unresolve_callback},
{OPTION_LOWLEVEL_CALLBACK, 'g', "again", &has_errors, NULL,
N_("only update entries that differ from HEAD"),
PARSE_OPT_NONEG | PARSE_OPT_NOARG,
(parse_opt_cb *) reupdate_callback},
OPT_BIT(0, "ignore-missing", &refresh_args.flags,
N_("ignore files missing from worktree"),
REFRESH_IGNORE_MISSING),
OPT_SET_INT(0, "verbose", &verbose,
N_("report actions to standard output"), 1),
{OPTION_CALLBACK, 0, "clear-resolve-undo", NULL, NULL,
N_("(for porcelains) forget saved unresolved conflicts"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG,
resolve_undo_clear_callback},
OPT_INTEGER(0, "index-version", &preferred_index_format,
N_("write index in this format")),
OPT_BOOL(0, "split-index", &split_index,
N_("enable or disable split index")),
update-index: manually enable or disable untracked cache Overall time saving on "git status" is about 40% in the best case scenario, removing ..collect_untracked() as the most time consuming function. read and refresh index operations are now at the top (which should drop when index-helper and/or watchman support is added). More numbers and analysis below. webkit.git ========== 169k files. 6k dirs. Lots of test data (i.e. not touched most of the time) Base status ----------- Index version 4 in split index mode and cache-tree populated. No untracked cache. It shows how time is consumed by "git status". The same settings are used for other repos below. 18:28:10.199679 builtin/commit.c:1394 performance: 0.000000451 s: cmd_status:setup 18:28:10.474847 read-cache.c:1407 performance: 0.274873831 s: read_index 18:28:10.475295 read-cache.c:1407 performance: 0.000000656 s: read_index 18:28:10.728443 preload-index.c:131 performance: 0.253147487 s: read_index_preload 18:28:10.741422 read-cache.c:1254 performance: 0.012868340 s: refresh_index 18:28:10.752300 wt-status.c:623 performance: 0.010421357 s: wt_status_collect_changes_worktree 18:28:10.762069 wt-status.c:629 performance: 0.009644748 s: wt_status_collect_changes_index 18:28:11.601019 wt-status.c:632 performance: 0.838859547 s: wt_status_collect_untracked 18:28:11.605939 builtin/commit.c:1421 performance: 0.004835004 s: cmd_status:update_index 18:28:11.606580 trace.c:415 performance: 1.407878388 s: git command: 'git' 'status' Populating status ----------------- This is after enabling untracked cache and the cache is still empty. We see a slight increase in .._collect_untracked() and update_index (because new cache has to be written to $GIT_DIR/index). 18:28:18.915213 builtin/commit.c:1394 performance: 0.000000326 s: cmd_status:setup 18:28:19.197364 read-cache.c:1407 performance: 0.281901416 s: read_index 18:28:19.197754 read-cache.c:1407 performance: 0.000000546 s: read_index 18:28:19.451355 preload-index.c:131 performance: 0.253599607 s: read_index_preload 18:28:19.464400 read-cache.c:1254 performance: 0.012935336 s: refresh_index 18:28:19.475115 wt-status.c:623 performance: 0.010236920 s: wt_status_collect_changes_worktree 18:28:19.486022 wt-status.c:629 performance: 0.010801685 s: wt_status_collect_changes_index 18:28:20.362660 wt-status.c:632 performance: 0.876551366 s: wt_status_collect_untracked 18:28:20.396199 builtin/commit.c:1421 performance: 0.033447969 s: cmd_status:update_index 18:28:20.396939 trace.c:415 performance: 1.482695902 s: git command: 'git' 'status' Populated status ---------------- After the cache is populated, wt_status_collect_untracked() drops 82% from 0.838s to 0.144s. Overall time drops 45%. Top offenders are now read_index() and read_index_preload(). 18:28:20.408605 builtin/commit.c:1394 performance: 0.000000457 s: cmd_status:setup 18:28:20.692864 read-cache.c:1407 performance: 0.283980458 s: read_index 18:28:20.693273 read-cache.c:1407 performance: 0.000000661 s: read_index 18:28:20.958814 preload-index.c:131 performance: 0.265540254 s: read_index_preload 18:28:20.972375 read-cache.c:1254 performance: 0.013437429 s: refresh_index 18:28:20.983959 wt-status.c:623 performance: 0.011146646 s: wt_status_collect_changes_worktree 18:28:20.993948 wt-status.c:629 performance: 0.009879094 s: wt_status_collect_changes_index 18:28:21.138125 wt-status.c:632 performance: 0.144084737 s: wt_status_collect_untracked 18:28:21.173678 builtin/commit.c:1421 performance: 0.035463949 s: cmd_status:update_index 18:28:21.174251 trace.c:415 performance: 0.766707355 s: git command: 'git' 'status' gentoo-x86.git ============== This repository is a strange one with a balanced, wide and shallow worktree (about 100k files and 23k dirs) and no .gitignore in worktree. .._collect_untracked() time drops 88%, total time drops 56%. Base status ----------- 18:20:40.828642 builtin/commit.c:1394 performance: 0.000000496 s: cmd_status:setup 18:20:41.027233 read-cache.c:1407 performance: 0.198130532 s: read_index 18:20:41.027670 read-cache.c:1407 performance: 0.000000581 s: read_index 18:20:41.171716 preload-index.c:131 performance: 0.144045594 s: read_index_preload 18:20:41.179171 read-cache.c:1254 performance: 0.007320424 s: refresh_index 18:20:41.185785 wt-status.c:623 performance: 0.006144638 s: wt_status_collect_changes_worktree 18:20:41.192701 wt-status.c:629 performance: 0.006780184 s: wt_status_collect_changes_index 18:20:41.991723 wt-status.c:632 performance: 0.798927029 s: wt_status_collect_untracked 18:20:41.994664 builtin/commit.c:1421 performance: 0.002852772 s: cmd_status:update_index 18:20:41.995458 trace.c:415 performance: 1.168427502 s: git command: 'git' 'status' Populating status ----------------- 18:20:48.968848 builtin/commit.c:1394 performance: 0.000000380 s: cmd_status:setup 18:20:49.172918 read-cache.c:1407 performance: 0.203734214 s: read_index 18:20:49.173341 read-cache.c:1407 performance: 0.000000562 s: read_index 18:20:49.320013 preload-index.c:131 performance: 0.146671391 s: read_index_preload 18:20:49.328039 read-cache.c:1254 performance: 0.007921957 s: refresh_index 18:20:49.334680 wt-status.c:623 performance: 0.006172020 s: wt_status_collect_changes_worktree 18:20:49.342526 wt-status.c:629 performance: 0.007731746 s: wt_status_collect_changes_index 18:20:50.257510 wt-status.c:632 performance: 0.914864222 s: wt_status_collect_untracked 18:20:50.338371 builtin/commit.c:1421 performance: 0.080776477 s: cmd_status:update_index 18:20:50.338900 trace.c:415 performance: 1.371462446 s: git command: 'git' 'status' Populated status ---------------- 18:20:50.351160 builtin/commit.c:1394 performance: 0.000000571 s: cmd_status:setup 18:20:50.577358 read-cache.c:1407 performance: 0.225917338 s: read_index 18:20:50.577794 read-cache.c:1407 performance: 0.000000617 s: read_index 18:20:50.734140 preload-index.c:131 performance: 0.156345564 s: read_index_preload 18:20:50.745717 read-cache.c:1254 performance: 0.011463075 s: refresh_index 18:20:50.755176 wt-status.c:623 performance: 0.008877929 s: wt_status_collect_changes_worktree 18:20:50.763768 wt-status.c:629 performance: 0.008471633 s: wt_status_collect_changes_index 18:20:50.854885 wt-status.c:632 performance: 0.090988721 s: wt_status_collect_untracked 18:20:50.857765 builtin/commit.c:1421 performance: 0.002789097 s: cmd_status:update_index 18:20:50.858411 trace.c:415 performance: 0.508647673 s: git command: 'git' 'status' linux-2.6 ========= Reference repo. Not too big. .._collect_status() drops 84%. Total time drops 42%. Base status ----------- 18:34:09.870122 builtin/commit.c:1394 performance: 0.000000385 s: cmd_status:setup 18:34:09.943218 read-cache.c:1407 performance: 0.072871177 s: read_index 18:34:09.943614 read-cache.c:1407 performance: 0.000000491 s: read_index 18:34:10.004364 preload-index.c:131 performance: 0.060748102 s: read_index_preload 18:34:10.008190 read-cache.c:1254 performance: 0.003714285 s: refresh_index 18:34:10.012087 wt-status.c:623 performance: 0.002775446 s: wt_status_collect_changes_worktree 18:34:10.016054 wt-status.c:629 performance: 0.003862140 s: wt_status_collect_changes_index 18:34:10.214747 wt-status.c:632 performance: 0.198604837 s: wt_status_collect_untracked 18:34:10.216102 builtin/commit.c:1421 performance: 0.001244166 s: cmd_status:update_index 18:34:10.216817 trace.c:415 performance: 0.347670735 s: git command: 'git' 'status' Populating status ----------------- 18:34:16.595102 builtin/commit.c:1394 performance: 0.000000456 s: cmd_status:setup 18:34:16.666600 read-cache.c:1407 performance: 0.070992413 s: read_index 18:34:16.667012 read-cache.c:1407 performance: 0.000000606 s: read_index 18:34:16.729375 preload-index.c:131 performance: 0.062362492 s: read_index_preload 18:34:16.732565 read-cache.c:1254 performance: 0.003075517 s: refresh_index 18:34:16.736148 wt-status.c:623 performance: 0.002422201 s: wt_status_collect_changes_worktree 18:34:16.739990 wt-status.c:629 performance: 0.003746618 s: wt_status_collect_changes_index 18:34:16.948505 wt-status.c:632 performance: 0.208426710 s: wt_status_collect_untracked 18:34:16.961744 builtin/commit.c:1421 performance: 0.013151887 s: cmd_status:update_index 18:34:16.962233 trace.c:415 performance: 0.368537535 s: git command: 'git' 'status' Populated status ---------------- 18:34:16.970026 builtin/commit.c:1394 performance: 0.000000631 s: cmd_status:setup 18:34:17.046235 read-cache.c:1407 performance: 0.075904673 s: read_index 18:34:17.046644 read-cache.c:1407 performance: 0.000000681 s: read_index 18:34:17.113564 preload-index.c:131 performance: 0.066920253 s: read_index_preload 18:34:17.117281 read-cache.c:1254 performance: 0.003604055 s: refresh_index 18:34:17.121115 wt-status.c:623 performance: 0.002508345 s: wt_status_collect_changes_worktree 18:34:17.125089 wt-status.c:629 performance: 0.003871636 s: wt_status_collect_changes_index 18:34:17.156089 wt-status.c:632 performance: 0.030895703 s: wt_status_collect_untracked 18:34:17.169861 builtin/commit.c:1421 performance: 0.013686404 s: cmd_status:update_index 18:34:17.170391 trace.c:415 performance: 0.201474531 s: git command: 'git' 'status' Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-08 11:12:42 +01:00
OPT_BOOL(0, "untracked-cache", &untracked_cache,
N_("enable/disable untracked cache")),
OPT_SET_INT(0, "test-untracked-cache", &untracked_cache,
N_("test if the filesystem supports untracked cache"), UC_TEST),
OPT_SET_INT(0, "force-untracked-cache", &untracked_cache,
N_("enable untracked cache without testing the filesystem"), UC_FORCE),
OPT_SET_INT(0, "force-write-index", &force_write,
N_("write out the index even if is not flagged as changed"), 1),
OPT_BOOL(0, "fsmonitor", &fsmonitor,
N_("enable or disable file system monitor")),
{OPTION_SET_INT, 0, "fsmonitor-valid", &mark_fsmonitor_only, NULL,
N_("mark files as fsmonitor valid"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, MARK_FLAG},
{OPTION_SET_INT, 0, "no-fsmonitor-valid", &mark_fsmonitor_only, NULL,
N_("clear fsmonitor valid bit"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, UNMARK_FLAG},
OPT_END()
};
if (argc == 2 && !strcmp(argv[1], "-h"))
usage_with_options(update_index_usage, options);
git_config(git_default_config, NULL);
hold_locked_index(): align error handling with hold_lockfile_for_update() Callers of the hold_locked_index() function pass 0 when they want to prepare to write a new version of the index file without wishing to die or emit an error message when the request fails (e.g. somebody else already held the lock), and pass 1 when they want the call to die upon failure. This option is called LOCK_DIE_ON_ERROR by the underlying lockfile API, and the hold_locked_index() function translates the paramter to LOCK_DIE_ON_ERROR when calling the hold_lock_file_for_update(). Replace these hardcoded '1' with LOCK_DIE_ON_ERROR and stop translating. Callers other than the ones that are replaced with this change pass '0' to the function; no behaviour change is intended with this patch. Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Among the callers of hold_locked_index() that passes 0: - diff.c::refresh_index_quietly() at the end of "git diff" is an opportunistic update; it leaks the lockfile structure but it is just before the program exits and nobody should care. - builtin/describe.c::cmd_describe(), builtin/commit.c::cmd_status(), sequencer.c::read_and_refresh_cache() are all opportunistic updates and they are OK. - builtin/update-index.c::cmd_update_index() takes a lock upfront but we may end up not needing to update the index (i.e. the entries may be fully up-to-date), in which case we do not need to issue an error upon failure to acquire the lock. We do diagnose and die if we indeed need to update, so it is OK. - wt-status.c::require_clean_work_tree() IS BUGGY. It asks silence, does not check the returned value. Compare with callsites like cmd_describe() and cmd_status() to notice that it is wrong to call update_index_if_able() unconditionally.
2016-12-07 19:33:54 +01:00
/* we will diagnose later if it turns out that we need to update it */
newfd = hold_locked_index(&lock_file, 0);
if (newfd < 0)
lock_error = errno;
entries = read_cache();
if (entries < 0)
die("cache corrupted");
/*
* Custom copy of parse_options() because we want to handle
* filename arguments as they come.
*/
parse_options_start(&ctx, argc, argv, prefix,
options, PARSE_OPT_STOP_AT_NON_OPTION);
while (ctx.argc) {
if (parseopt_state != PARSE_OPT_DONE)
parseopt_state = parse_options_step(&ctx, options,
update_index_usage);
if (!ctx.argc)
break;
switch (parseopt_state) {
case PARSE_OPT_HELP:
case PARSE_OPT_ERROR:
exit(129);
case PARSE_OPT_NON_OPTION:
case PARSE_OPT_DONE:
{
const char *path = ctx.argv[0];
char *p;
setup_work_tree();
p = prefix_path(prefix, prefix_length, path);
update_one(p);
if (set_executable_bit)
chmod_path(set_executable_bit, p);
free(p);
ctx.argc--;
ctx.argv++;
break;
}
case PARSE_OPT_UNKNOWN:
if (ctx.argv[0][1] == '-')
error("unknown option '%s'", ctx.argv[0] + 2);
else
error("unknown switch '%c'", *ctx.opt);
usage_with_options(update_index_usage, options);
}
}
argc = parse_options_end(&ctx);
getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf;
if (preferred_index_format) {
if (preferred_index_format < INDEX_FORMAT_LB ||
INDEX_FORMAT_UB < preferred_index_format)
die("index-version %d not in range: %d..%d",
preferred_index_format,
INDEX_FORMAT_LB, INDEX_FORMAT_UB);
if (the_index.version != preferred_index_format)
active_cache_changed |= SOMETHING_CHANGED;
the_index.version = preferred_index_format;
}
if (read_from_stdin) {
struct strbuf buf = STRBUF_INIT;
struct strbuf unquoted = STRBUF_INIT;
setup_work_tree();
while (getline_fn(&buf, stdin) != EOF) {
char *p;
if (!nul_term_line && buf.buf[0] == '"') {
strbuf_reset(&unquoted);
if (unquote_c_style(&unquoted, buf.buf, NULL))
die("line is badly quoted");
strbuf_swap(&buf, &unquoted);
}
p = prefix_path(prefix, prefix_length, buf.buf);
update_one(p);
if (set_executable_bit)
chmod_path(set_executable_bit, p);
free(p);
}
strbuf_release(&unquoted);
strbuf_release(&buf);
}
if (split_index > 0) {
if (git_config_get_split_index() == 0)
warning(_("core.splitIndex is set to false; "
"remove or change it, if you really want to "
"enable split index"));
if (the_index.split_index)
the_index.cache_changed |= SPLIT_INDEX_ORDERED;
else
add_split_index(&the_index);
} else if (!split_index) {
if (git_config_get_split_index() == 1)
warning(_("core.splitIndex is set to true; "
"remove or change it, if you really want to "
"disable split index"));
remove_split_index(&the_index);
}
update-index: manually enable or disable untracked cache Overall time saving on "git status" is about 40% in the best case scenario, removing ..collect_untracked() as the most time consuming function. read and refresh index operations are now at the top (which should drop when index-helper and/or watchman support is added). More numbers and analysis below. webkit.git ========== 169k files. 6k dirs. Lots of test data (i.e. not touched most of the time) Base status ----------- Index version 4 in split index mode and cache-tree populated. No untracked cache. It shows how time is consumed by "git status". The same settings are used for other repos below. 18:28:10.199679 builtin/commit.c:1394 performance: 0.000000451 s: cmd_status:setup 18:28:10.474847 read-cache.c:1407 performance: 0.274873831 s: read_index 18:28:10.475295 read-cache.c:1407 performance: 0.000000656 s: read_index 18:28:10.728443 preload-index.c:131 performance: 0.253147487 s: read_index_preload 18:28:10.741422 read-cache.c:1254 performance: 0.012868340 s: refresh_index 18:28:10.752300 wt-status.c:623 performance: 0.010421357 s: wt_status_collect_changes_worktree 18:28:10.762069 wt-status.c:629 performance: 0.009644748 s: wt_status_collect_changes_index 18:28:11.601019 wt-status.c:632 performance: 0.838859547 s: wt_status_collect_untracked 18:28:11.605939 builtin/commit.c:1421 performance: 0.004835004 s: cmd_status:update_index 18:28:11.606580 trace.c:415 performance: 1.407878388 s: git command: 'git' 'status' Populating status ----------------- This is after enabling untracked cache and the cache is still empty. We see a slight increase in .._collect_untracked() and update_index (because new cache has to be written to $GIT_DIR/index). 18:28:18.915213 builtin/commit.c:1394 performance: 0.000000326 s: cmd_status:setup 18:28:19.197364 read-cache.c:1407 performance: 0.281901416 s: read_index 18:28:19.197754 read-cache.c:1407 performance: 0.000000546 s: read_index 18:28:19.451355 preload-index.c:131 performance: 0.253599607 s: read_index_preload 18:28:19.464400 read-cache.c:1254 performance: 0.012935336 s: refresh_index 18:28:19.475115 wt-status.c:623 performance: 0.010236920 s: wt_status_collect_changes_worktree 18:28:19.486022 wt-status.c:629 performance: 0.010801685 s: wt_status_collect_changes_index 18:28:20.362660 wt-status.c:632 performance: 0.876551366 s: wt_status_collect_untracked 18:28:20.396199 builtin/commit.c:1421 performance: 0.033447969 s: cmd_status:update_index 18:28:20.396939 trace.c:415 performance: 1.482695902 s: git command: 'git' 'status' Populated status ---------------- After the cache is populated, wt_status_collect_untracked() drops 82% from 0.838s to 0.144s. Overall time drops 45%. Top offenders are now read_index() and read_index_preload(). 18:28:20.408605 builtin/commit.c:1394 performance: 0.000000457 s: cmd_status:setup 18:28:20.692864 read-cache.c:1407 performance: 0.283980458 s: read_index 18:28:20.693273 read-cache.c:1407 performance: 0.000000661 s: read_index 18:28:20.958814 preload-index.c:131 performance: 0.265540254 s: read_index_preload 18:28:20.972375 read-cache.c:1254 performance: 0.013437429 s: refresh_index 18:28:20.983959 wt-status.c:623 performance: 0.011146646 s: wt_status_collect_changes_worktree 18:28:20.993948 wt-status.c:629 performance: 0.009879094 s: wt_status_collect_changes_index 18:28:21.138125 wt-status.c:632 performance: 0.144084737 s: wt_status_collect_untracked 18:28:21.173678 builtin/commit.c:1421 performance: 0.035463949 s: cmd_status:update_index 18:28:21.174251 trace.c:415 performance: 0.766707355 s: git command: 'git' 'status' gentoo-x86.git ============== This repository is a strange one with a balanced, wide and shallow worktree (about 100k files and 23k dirs) and no .gitignore in worktree. .._collect_untracked() time drops 88%, total time drops 56%. Base status ----------- 18:20:40.828642 builtin/commit.c:1394 performance: 0.000000496 s: cmd_status:setup 18:20:41.027233 read-cache.c:1407 performance: 0.198130532 s: read_index 18:20:41.027670 read-cache.c:1407 performance: 0.000000581 s: read_index 18:20:41.171716 preload-index.c:131 performance: 0.144045594 s: read_index_preload 18:20:41.179171 read-cache.c:1254 performance: 0.007320424 s: refresh_index 18:20:41.185785 wt-status.c:623 performance: 0.006144638 s: wt_status_collect_changes_worktree 18:20:41.192701 wt-status.c:629 performance: 0.006780184 s: wt_status_collect_changes_index 18:20:41.991723 wt-status.c:632 performance: 0.798927029 s: wt_status_collect_untracked 18:20:41.994664 builtin/commit.c:1421 performance: 0.002852772 s: cmd_status:update_index 18:20:41.995458 trace.c:415 performance: 1.168427502 s: git command: 'git' 'status' Populating status ----------------- 18:20:48.968848 builtin/commit.c:1394 performance: 0.000000380 s: cmd_status:setup 18:20:49.172918 read-cache.c:1407 performance: 0.203734214 s: read_index 18:20:49.173341 read-cache.c:1407 performance: 0.000000562 s: read_index 18:20:49.320013 preload-index.c:131 performance: 0.146671391 s: read_index_preload 18:20:49.328039 read-cache.c:1254 performance: 0.007921957 s: refresh_index 18:20:49.334680 wt-status.c:623 performance: 0.006172020 s: wt_status_collect_changes_worktree 18:20:49.342526 wt-status.c:629 performance: 0.007731746 s: wt_status_collect_changes_index 18:20:50.257510 wt-status.c:632 performance: 0.914864222 s: wt_status_collect_untracked 18:20:50.338371 builtin/commit.c:1421 performance: 0.080776477 s: cmd_status:update_index 18:20:50.338900 trace.c:415 performance: 1.371462446 s: git command: 'git' 'status' Populated status ---------------- 18:20:50.351160 builtin/commit.c:1394 performance: 0.000000571 s: cmd_status:setup 18:20:50.577358 read-cache.c:1407 performance: 0.225917338 s: read_index 18:20:50.577794 read-cache.c:1407 performance: 0.000000617 s: read_index 18:20:50.734140 preload-index.c:131 performance: 0.156345564 s: read_index_preload 18:20:50.745717 read-cache.c:1254 performance: 0.011463075 s: refresh_index 18:20:50.755176 wt-status.c:623 performance: 0.008877929 s: wt_status_collect_changes_worktree 18:20:50.763768 wt-status.c:629 performance: 0.008471633 s: wt_status_collect_changes_index 18:20:50.854885 wt-status.c:632 performance: 0.090988721 s: wt_status_collect_untracked 18:20:50.857765 builtin/commit.c:1421 performance: 0.002789097 s: cmd_status:update_index 18:20:50.858411 trace.c:415 performance: 0.508647673 s: git command: 'git' 'status' linux-2.6 ========= Reference repo. Not too big. .._collect_status() drops 84%. Total time drops 42%. Base status ----------- 18:34:09.870122 builtin/commit.c:1394 performance: 0.000000385 s: cmd_status:setup 18:34:09.943218 read-cache.c:1407 performance: 0.072871177 s: read_index 18:34:09.943614 read-cache.c:1407 performance: 0.000000491 s: read_index 18:34:10.004364 preload-index.c:131 performance: 0.060748102 s: read_index_preload 18:34:10.008190 read-cache.c:1254 performance: 0.003714285 s: refresh_index 18:34:10.012087 wt-status.c:623 performance: 0.002775446 s: wt_status_collect_changes_worktree 18:34:10.016054 wt-status.c:629 performance: 0.003862140 s: wt_status_collect_changes_index 18:34:10.214747 wt-status.c:632 performance: 0.198604837 s: wt_status_collect_untracked 18:34:10.216102 builtin/commit.c:1421 performance: 0.001244166 s: cmd_status:update_index 18:34:10.216817 trace.c:415 performance: 0.347670735 s: git command: 'git' 'status' Populating status ----------------- 18:34:16.595102 builtin/commit.c:1394 performance: 0.000000456 s: cmd_status:setup 18:34:16.666600 read-cache.c:1407 performance: 0.070992413 s: read_index 18:34:16.667012 read-cache.c:1407 performance: 0.000000606 s: read_index 18:34:16.729375 preload-index.c:131 performance: 0.062362492 s: read_index_preload 18:34:16.732565 read-cache.c:1254 performance: 0.003075517 s: refresh_index 18:34:16.736148 wt-status.c:623 performance: 0.002422201 s: wt_status_collect_changes_worktree 18:34:16.739990 wt-status.c:629 performance: 0.003746618 s: wt_status_collect_changes_index 18:34:16.948505 wt-status.c:632 performance: 0.208426710 s: wt_status_collect_untracked 18:34:16.961744 builtin/commit.c:1421 performance: 0.013151887 s: cmd_status:update_index 18:34:16.962233 trace.c:415 performance: 0.368537535 s: git command: 'git' 'status' Populated status ---------------- 18:34:16.970026 builtin/commit.c:1394 performance: 0.000000631 s: cmd_status:setup 18:34:17.046235 read-cache.c:1407 performance: 0.075904673 s: read_index 18:34:17.046644 read-cache.c:1407 performance: 0.000000681 s: read_index 18:34:17.113564 preload-index.c:131 performance: 0.066920253 s: read_index_preload 18:34:17.117281 read-cache.c:1254 performance: 0.003604055 s: refresh_index 18:34:17.121115 wt-status.c:623 performance: 0.002508345 s: wt_status_collect_changes_worktree 18:34:17.125089 wt-status.c:629 performance: 0.003871636 s: wt_status_collect_changes_index 18:34:17.156089 wt-status.c:632 performance: 0.030895703 s: wt_status_collect_untracked 18:34:17.169861 builtin/commit.c:1421 performance: 0.013686404 s: cmd_status:update_index 18:34:17.170391 trace.c:415 performance: 0.201474531 s: git command: 'git' 'status' Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-08 11:12:42 +01:00
config: add core.untrackedCache When we know that mtime on directory as given by the environment is usable for the purpose of untracked cache, we may want the untracked cache to be always used without any mtime test or kernel name check being performed. Also when we know that mtime is not usable for the purpose of untracked cache, for example because the repo is shared over a network file system, we may want the untracked-cache to be automatically removed from the index. Allow the user to express such preference by setting the 'core.untrackedCache' configuration variable, which can take 'keep', 'false', or 'true' and default to 'keep'. When read_index_from() is called, it now adds or removes the untracked cache in the index to respect the value of this variable. So it does nothing if the value is `keep` or if the variable is unset; it adds the untracked cache if the value is `true`; and it removes the cache if the value is `false`. `git update-index --[no-|force-]untracked-cache` still adds the untracked cache to, or removes it, from the index, but this shows a warning if it goes against the value of core.untrackedCache, because the next time the index is read the untracked cache will be added or removed if the configuration is set to do so. Also `--untracked-cache` used to check that the underlying operating system and file system change `st_mtime` field of a directory if files are added or deleted in that directory. But because those tests take a long time, `--untracked-cache` no longer performs them. Instead, there is now `--test-untracked-cache` to perform the tests. This change makes `--untracked-cache` the same as `--force-untracked-cache`. This last change is backward incompatible and should be mentioned in the release notes. Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Torsten Bögershausen <tboegi@web.de> Helped-by: Stefan Beller <sbeller@google.com> Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> read-cache: Duy'sfixup Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-27 07:58:05 +01:00
switch (untracked_cache) {
case UC_UNSPECIFIED:
break;
case UC_DISABLE:
if (git_config_get_untracked_cache() == 1)
warning(_("core.untrackedCache is set to true; "
"remove or change it, if you really want to "
"disable the untracked cache"));
remove_untracked_cache(&the_index);
report(_("Untracked cache disabled"));
config: add core.untrackedCache When we know that mtime on directory as given by the environment is usable for the purpose of untracked cache, we may want the untracked cache to be always used without any mtime test or kernel name check being performed. Also when we know that mtime is not usable for the purpose of untracked cache, for example because the repo is shared over a network file system, we may want the untracked-cache to be automatically removed from the index. Allow the user to express such preference by setting the 'core.untrackedCache' configuration variable, which can take 'keep', 'false', or 'true' and default to 'keep'. When read_index_from() is called, it now adds or removes the untracked cache in the index to respect the value of this variable. So it does nothing if the value is `keep` or if the variable is unset; it adds the untracked cache if the value is `true`; and it removes the cache if the value is `false`. `git update-index --[no-|force-]untracked-cache` still adds the untracked cache to, or removes it, from the index, but this shows a warning if it goes against the value of core.untrackedCache, because the next time the index is read the untracked cache will be added or removed if the configuration is set to do so. Also `--untracked-cache` used to check that the underlying operating system and file system change `st_mtime` field of a directory if files are added or deleted in that directory. But because those tests take a long time, `--untracked-cache` no longer performs them. Instead, there is now `--test-untracked-cache` to perform the tests. This change makes `--untracked-cache` the same as `--force-untracked-cache`. This last change is backward incompatible and should be mentioned in the release notes. Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Torsten Bögershausen <tboegi@web.de> Helped-by: Stefan Beller <sbeller@google.com> Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> read-cache: Duy'sfixup Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-27 07:58:05 +01:00
break;
case UC_TEST:
setup_work_tree();
return !test_if_untracked_cache_is_supported();
case UC_ENABLE:
case UC_FORCE:
if (git_config_get_untracked_cache() == 0)
warning(_("core.untrackedCache is set to false; "
"remove or change it, if you really want to "
"enable the untracked cache"));
config: add core.untrackedCache When we know that mtime on directory as given by the environment is usable for the purpose of untracked cache, we may want the untracked cache to be always used without any mtime test or kernel name check being performed. Also when we know that mtime is not usable for the purpose of untracked cache, for example because the repo is shared over a network file system, we may want the untracked-cache to be automatically removed from the index. Allow the user to express such preference by setting the 'core.untrackedCache' configuration variable, which can take 'keep', 'false', or 'true' and default to 'keep'. When read_index_from() is called, it now adds or removes the untracked cache in the index to respect the value of this variable. So it does nothing if the value is `keep` or if the variable is unset; it adds the untracked cache if the value is `true`; and it removes the cache if the value is `false`. `git update-index --[no-|force-]untracked-cache` still adds the untracked cache to, or removes it, from the index, but this shows a warning if it goes against the value of core.untrackedCache, because the next time the index is read the untracked cache will be added or removed if the configuration is set to do so. Also `--untracked-cache` used to check that the underlying operating system and file system change `st_mtime` field of a directory if files are added or deleted in that directory. But because those tests take a long time, `--untracked-cache` no longer performs them. Instead, there is now `--test-untracked-cache` to perform the tests. This change makes `--untracked-cache` the same as `--force-untracked-cache`. This last change is backward incompatible and should be mentioned in the release notes. Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Torsten Bögershausen <tboegi@web.de> Helped-by: Stefan Beller <sbeller@google.com> Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> read-cache: Duy'sfixup Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-27 07:58:05 +01:00
add_untracked_cache(&the_index);
report(_("Untracked cache enabled for '%s'"), get_git_work_tree());
break;
default:
BUG("bad untracked_cache value: %d", untracked_cache);
update-index: manually enable or disable untracked cache Overall time saving on "git status" is about 40% in the best case scenario, removing ..collect_untracked() as the most time consuming function. read and refresh index operations are now at the top (which should drop when index-helper and/or watchman support is added). More numbers and analysis below. webkit.git ========== 169k files. 6k dirs. Lots of test data (i.e. not touched most of the time) Base status ----------- Index version 4 in split index mode and cache-tree populated. No untracked cache. It shows how time is consumed by "git status". The same settings are used for other repos below. 18:28:10.199679 builtin/commit.c:1394 performance: 0.000000451 s: cmd_status:setup 18:28:10.474847 read-cache.c:1407 performance: 0.274873831 s: read_index 18:28:10.475295 read-cache.c:1407 performance: 0.000000656 s: read_index 18:28:10.728443 preload-index.c:131 performance: 0.253147487 s: read_index_preload 18:28:10.741422 read-cache.c:1254 performance: 0.012868340 s: refresh_index 18:28:10.752300 wt-status.c:623 performance: 0.010421357 s: wt_status_collect_changes_worktree 18:28:10.762069 wt-status.c:629 performance: 0.009644748 s: wt_status_collect_changes_index 18:28:11.601019 wt-status.c:632 performance: 0.838859547 s: wt_status_collect_untracked 18:28:11.605939 builtin/commit.c:1421 performance: 0.004835004 s: cmd_status:update_index 18:28:11.606580 trace.c:415 performance: 1.407878388 s: git command: 'git' 'status' Populating status ----------------- This is after enabling untracked cache and the cache is still empty. We see a slight increase in .._collect_untracked() and update_index (because new cache has to be written to $GIT_DIR/index). 18:28:18.915213 builtin/commit.c:1394 performance: 0.000000326 s: cmd_status:setup 18:28:19.197364 read-cache.c:1407 performance: 0.281901416 s: read_index 18:28:19.197754 read-cache.c:1407 performance: 0.000000546 s: read_index 18:28:19.451355 preload-index.c:131 performance: 0.253599607 s: read_index_preload 18:28:19.464400 read-cache.c:1254 performance: 0.012935336 s: refresh_index 18:28:19.475115 wt-status.c:623 performance: 0.010236920 s: wt_status_collect_changes_worktree 18:28:19.486022 wt-status.c:629 performance: 0.010801685 s: wt_status_collect_changes_index 18:28:20.362660 wt-status.c:632 performance: 0.876551366 s: wt_status_collect_untracked 18:28:20.396199 builtin/commit.c:1421 performance: 0.033447969 s: cmd_status:update_index 18:28:20.396939 trace.c:415 performance: 1.482695902 s: git command: 'git' 'status' Populated status ---------------- After the cache is populated, wt_status_collect_untracked() drops 82% from 0.838s to 0.144s. Overall time drops 45%. Top offenders are now read_index() and read_index_preload(). 18:28:20.408605 builtin/commit.c:1394 performance: 0.000000457 s: cmd_status:setup 18:28:20.692864 read-cache.c:1407 performance: 0.283980458 s: read_index 18:28:20.693273 read-cache.c:1407 performance: 0.000000661 s: read_index 18:28:20.958814 preload-index.c:131 performance: 0.265540254 s: read_index_preload 18:28:20.972375 read-cache.c:1254 performance: 0.013437429 s: refresh_index 18:28:20.983959 wt-status.c:623 performance: 0.011146646 s: wt_status_collect_changes_worktree 18:28:20.993948 wt-status.c:629 performance: 0.009879094 s: wt_status_collect_changes_index 18:28:21.138125 wt-status.c:632 performance: 0.144084737 s: wt_status_collect_untracked 18:28:21.173678 builtin/commit.c:1421 performance: 0.035463949 s: cmd_status:update_index 18:28:21.174251 trace.c:415 performance: 0.766707355 s: git command: 'git' 'status' gentoo-x86.git ============== This repository is a strange one with a balanced, wide and shallow worktree (about 100k files and 23k dirs) and no .gitignore in worktree. .._collect_untracked() time drops 88%, total time drops 56%. Base status ----------- 18:20:40.828642 builtin/commit.c:1394 performance: 0.000000496 s: cmd_status:setup 18:20:41.027233 read-cache.c:1407 performance: 0.198130532 s: read_index 18:20:41.027670 read-cache.c:1407 performance: 0.000000581 s: read_index 18:20:41.171716 preload-index.c:131 performance: 0.144045594 s: read_index_preload 18:20:41.179171 read-cache.c:1254 performance: 0.007320424 s: refresh_index 18:20:41.185785 wt-status.c:623 performance: 0.006144638 s: wt_status_collect_changes_worktree 18:20:41.192701 wt-status.c:629 performance: 0.006780184 s: wt_status_collect_changes_index 18:20:41.991723 wt-status.c:632 performance: 0.798927029 s: wt_status_collect_untracked 18:20:41.994664 builtin/commit.c:1421 performance: 0.002852772 s: cmd_status:update_index 18:20:41.995458 trace.c:415 performance: 1.168427502 s: git command: 'git' 'status' Populating status ----------------- 18:20:48.968848 builtin/commit.c:1394 performance: 0.000000380 s: cmd_status:setup 18:20:49.172918 read-cache.c:1407 performance: 0.203734214 s: read_index 18:20:49.173341 read-cache.c:1407 performance: 0.000000562 s: read_index 18:20:49.320013 preload-index.c:131 performance: 0.146671391 s: read_index_preload 18:20:49.328039 read-cache.c:1254 performance: 0.007921957 s: refresh_index 18:20:49.334680 wt-status.c:623 performance: 0.006172020 s: wt_status_collect_changes_worktree 18:20:49.342526 wt-status.c:629 performance: 0.007731746 s: wt_status_collect_changes_index 18:20:50.257510 wt-status.c:632 performance: 0.914864222 s: wt_status_collect_untracked 18:20:50.338371 builtin/commit.c:1421 performance: 0.080776477 s: cmd_status:update_index 18:20:50.338900 trace.c:415 performance: 1.371462446 s: git command: 'git' 'status' Populated status ---------------- 18:20:50.351160 builtin/commit.c:1394 performance: 0.000000571 s: cmd_status:setup 18:20:50.577358 read-cache.c:1407 performance: 0.225917338 s: read_index 18:20:50.577794 read-cache.c:1407 performance: 0.000000617 s: read_index 18:20:50.734140 preload-index.c:131 performance: 0.156345564 s: read_index_preload 18:20:50.745717 read-cache.c:1254 performance: 0.011463075 s: refresh_index 18:20:50.755176 wt-status.c:623 performance: 0.008877929 s: wt_status_collect_changes_worktree 18:20:50.763768 wt-status.c:629 performance: 0.008471633 s: wt_status_collect_changes_index 18:20:50.854885 wt-status.c:632 performance: 0.090988721 s: wt_status_collect_untracked 18:20:50.857765 builtin/commit.c:1421 performance: 0.002789097 s: cmd_status:update_index 18:20:50.858411 trace.c:415 performance: 0.508647673 s: git command: 'git' 'status' linux-2.6 ========= Reference repo. Not too big. .._collect_status() drops 84%. Total time drops 42%. Base status ----------- 18:34:09.870122 builtin/commit.c:1394 performance: 0.000000385 s: cmd_status:setup 18:34:09.943218 read-cache.c:1407 performance: 0.072871177 s: read_index 18:34:09.943614 read-cache.c:1407 performance: 0.000000491 s: read_index 18:34:10.004364 preload-index.c:131 performance: 0.060748102 s: read_index_preload 18:34:10.008190 read-cache.c:1254 performance: 0.003714285 s: refresh_index 18:34:10.012087 wt-status.c:623 performance: 0.002775446 s: wt_status_collect_changes_worktree 18:34:10.016054 wt-status.c:629 performance: 0.003862140 s: wt_status_collect_changes_index 18:34:10.214747 wt-status.c:632 performance: 0.198604837 s: wt_status_collect_untracked 18:34:10.216102 builtin/commit.c:1421 performance: 0.001244166 s: cmd_status:update_index 18:34:10.216817 trace.c:415 performance: 0.347670735 s: git command: 'git' 'status' Populating status ----------------- 18:34:16.595102 builtin/commit.c:1394 performance: 0.000000456 s: cmd_status:setup 18:34:16.666600 read-cache.c:1407 performance: 0.070992413 s: read_index 18:34:16.667012 read-cache.c:1407 performance: 0.000000606 s: read_index 18:34:16.729375 preload-index.c:131 performance: 0.062362492 s: read_index_preload 18:34:16.732565 read-cache.c:1254 performance: 0.003075517 s: refresh_index 18:34:16.736148 wt-status.c:623 performance: 0.002422201 s: wt_status_collect_changes_worktree 18:34:16.739990 wt-status.c:629 performance: 0.003746618 s: wt_status_collect_changes_index 18:34:16.948505 wt-status.c:632 performance: 0.208426710 s: wt_status_collect_untracked 18:34:16.961744 builtin/commit.c:1421 performance: 0.013151887 s: cmd_status:update_index 18:34:16.962233 trace.c:415 performance: 0.368537535 s: git command: 'git' 'status' Populated status ---------------- 18:34:16.970026 builtin/commit.c:1394 performance: 0.000000631 s: cmd_status:setup 18:34:17.046235 read-cache.c:1407 performance: 0.075904673 s: read_index 18:34:17.046644 read-cache.c:1407 performance: 0.000000681 s: read_index 18:34:17.113564 preload-index.c:131 performance: 0.066920253 s: read_index_preload 18:34:17.117281 read-cache.c:1254 performance: 0.003604055 s: refresh_index 18:34:17.121115 wt-status.c:623 performance: 0.002508345 s: wt_status_collect_changes_worktree 18:34:17.125089 wt-status.c:629 performance: 0.003871636 s: wt_status_collect_changes_index 18:34:17.156089 wt-status.c:632 performance: 0.030895703 s: wt_status_collect_untracked 18:34:17.169861 builtin/commit.c:1421 performance: 0.013686404 s: cmd_status:update_index 18:34:17.170391 trace.c:415 performance: 0.201474531 s: git command: 'git' 'status' Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-08 11:12:42 +01:00
}
if (fsmonitor > 0) {
if (git_config_get_fsmonitor() == 0)
warning(_("core.fsmonitor is unset; "
"set it if you really want to "
"enable fsmonitor"));
add_fsmonitor(&the_index);
report(_("fsmonitor enabled"));
} else if (!fsmonitor) {
if (git_config_get_fsmonitor() == 1)
warning(_("core.fsmonitor is set; "
"remove it if you really want to "
"disable fsmonitor"));
remove_fsmonitor(&the_index);
report(_("fsmonitor disabled"));
}
if (active_cache_changed || force_write) {
if (newfd < 0) {
if (refresh_args.flags & REFRESH_QUIET)
exit(128);
unable_to_lock_die(get_index_file(), lock_error);
}
if (write_locked_index(&the_index, &lock_file, COMMIT_LOCK))
die("Unable to write new index file");
}
rollback_lock_file(&lock_file);
return has_errors ? 1 : 0;
}