From c18b80a0e86c4529146e3947454159627f1419a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Fri, 13 Jun 2014 19:19:44 +0700 Subject: [PATCH] update-index: new options to enable/disable split index mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If you have a large work tree but only make changes in a subset, then $GIT_DIR/index's size should be stable after a while. If you change branches that touch something else, $GIT_DIR/index's size may grow large that it becomes as slow as the unified index. Do --split-index again occasionally to force all changes back to the shared index and keep $GIT_DIR/index small. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-update-index.txt | 11 +++++ builtin/update-index.c | 18 ++++++++ cache.h | 1 + read-cache.c | 67 +++++++++++++++++++++++++++--- split-index.c | 23 ++++++++++ 5 files changed, 114 insertions(+), 6 deletions(-) diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt index d6de4a008c..dfc09d93d8 100644 --- a/Documentation/git-update-index.txt +++ b/Documentation/git-update-index.txt @@ -161,6 +161,17 @@ may not support it yet. Only meaningful with `--stdin` or `--index-info`; paths are separated with NUL character instead of LF. +--split-index:: +--no-split-index:: + Enable or disable split index mode. If enabled, the index is + split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.. + Changes are accumulated in $GIT_DIR/index while the shared + index file contains all index entries stays unchanged. If + split-index mode is already enabled and `--split-index` is + given again, all changes in $GIT_DIR/index are pushed back to + the shared index file. This mode is designed for very large + indexes that take a signficant amount of time to read or write. + \--:: Do not interpret any more arguments as options. diff --git a/builtin/update-index.c b/builtin/update-index.c index f7a19c47a6..b0503f4e3d 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -13,6 +13,7 @@ #include "parse-options.h" #include "pathspec.h" #include "dir.h" +#include "split-index.h" /* * Default to not allowing changes to the list of files. The @@ -742,6 +743,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) char set_executable_bit = 0; struct refresh_params refresh_args = {0, &has_errors}; int lock_error = 0; + int split_index = -1; struct lock_file *lock_file; struct parse_opt_ctx_t ctx; int parseopt_state = PARSE_OPT_UNKNOWN; @@ -824,6 +826,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) resolve_undo_clear_callback}, OPT_INTEGER(0, "index-version", &preferred_index_format, N_("write index in this format")), + OPT_BOOL(0, "split-index", &split_index, + N_("enable or disable split index")), OPT_END() }; @@ -917,6 +921,20 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) strbuf_release(&buf); } + if (split_index > 0) { + init_split_index(&the_index); + the_index.cache_changed |= SPLIT_INDEX_ORDERED; + } else if (!split_index && the_index.split_index) { + /* + * can't discard_split_index(&the_index); because that + * will destroy split_index->base->cache[], which may + * be shared with the_index.cache[]. So yeah we're + * leaking a bit here. + */ + the_index.split_index = NULL; + the_index.cache_changed |= SOMETHING_CHANGED; + } + if (active_cache_changed) { if (newfd < 0) { if (refresh_args.flags & REFRESH_QUIET) diff --git a/cache.h b/cache.h index ddb7cd26d1..7523c28ec7 100644 --- a/cache.h +++ b/cache.h @@ -278,6 +278,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define CE_ENTRY_ADDED (1 << 3) #define RESOLVE_UNDO_CHANGED (1 << 4) #define CACHE_TREE_CHANGED (1 << 5) +#define SPLIT_INDEX_ORDERED (1 << 6) struct split_index; struct index_state { diff --git a/read-cache.c b/read-cache.c index af475dc58c..8ecb9599e5 100644 --- a/read-cache.c +++ b/read-cache.c @@ -15,6 +15,7 @@ #include "strbuf.h" #include "varint.h" #include "split-index.h" +#include "sigchain.h" static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, unsigned int options); @@ -39,7 +40,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ - CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED) + CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \ + SPLIT_INDEX_ORDERED) struct index_state the_index; static const char *alternate_index_output; @@ -1860,7 +1862,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile rollback_lock_file(lockfile); } -static int do_write_index(struct index_state *istate, int newfd) +static int do_write_index(struct index_state *istate, int newfd, + int strip_extensions) { git_SHA_CTX c; struct cache_header hdr; @@ -1923,7 +1926,7 @@ static int do_write_index(struct index_state *istate, int newfd) strbuf_release(&previous_name_buf); /* Write extension data here */ - if (istate->split_index) { + if (!strip_extensions && istate->split_index) { struct strbuf sb = STRBUF_INIT; err = write_link_extension(&sb, istate) < 0 || @@ -1934,7 +1937,7 @@ static int do_write_index(struct index_state *istate, int newfd) if (err) return -1; } - if (istate->cache_tree) { + if (!strip_extensions && istate->cache_tree) { struct strbuf sb = STRBUF_INIT; cache_tree_write(&sb, istate->cache_tree); @@ -1944,7 +1947,7 @@ static int do_write_index(struct index_state *istate, int newfd) if (err) return -1; } - if (istate->resolve_undo) { + if (!strip_extensions && istate->resolve_undo) { struct strbuf sb = STRBUF_INIT; resolve_undo_write(&sb, istate->resolve_undo); @@ -1985,7 +1988,7 @@ static int commit_locked_index(struct lock_file *lk) static int do_write_locked_index(struct index_state *istate, struct lock_file *lock, unsigned flags) { - int ret = do_write_index(istate, lock->fd); + int ret = do_write_index(istate, lock->fd, 0); if (ret) return ret; assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) != @@ -2009,6 +2012,52 @@ static int write_split_index(struct index_state *istate, return ret; } +static char *temporary_sharedindex; + +static void remove_temporary_sharedindex(void) +{ + if (temporary_sharedindex) { + unlink_or_warn(temporary_sharedindex); + free(temporary_sharedindex); + temporary_sharedindex = NULL; + } +} + +static void remove_temporary_sharedindex_on_signal(int signo) +{ + remove_temporary_sharedindex(); + sigchain_pop(signo); + raise(signo); +} + +static int write_shared_index(struct index_state *istate) +{ + struct split_index *si = istate->split_index; + static int installed_handler; + int fd, ret; + + temporary_sharedindex = git_pathdup("sharedindex_XXXXXX"); + fd = xmkstemp(temporary_sharedindex); + if (!installed_handler) { + atexit(remove_temporary_sharedindex); + sigchain_push_common(remove_temporary_sharedindex_on_signal); + } + move_cache_to_base_index(istate); + ret = do_write_index(si->base, fd, 1); + close(fd); + if (ret) { + remove_temporary_sharedindex(); + return ret; + } + ret = rename(temporary_sharedindex, + git_path("sharedindex.%s", sha1_to_hex(si->base->sha1))); + free(temporary_sharedindex); + temporary_sharedindex = NULL; + if (!ret) + hashcpy(si->base_sha1, si->base->sha1); + return ret; +} + int write_locked_index(struct index_state *istate, struct lock_file *lock, unsigned flags) { @@ -2020,6 +2069,12 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock, return do_write_locked_index(istate, lock, flags); } + if (istate->cache_changed & SPLIT_INDEX_ORDERED) { + int ret = write_shared_index(istate); + if (ret) + return ret; + } + return write_split_index(istate, lock, flags); } diff --git a/split-index.c b/split-index.c index ee3246f391..21485e2066 100644 --- a/split-index.c +++ b/split-index.c @@ -74,6 +74,29 @@ static void mark_base_index_entries(struct index_state *base) base->cache[i]->index = i + 1; } +void move_cache_to_base_index(struct index_state *istate) +{ + struct split_index *si = istate->split_index; + int i; + + /* + * do not delete old si->base, its index entries may be shared + * with istate->cache[]. Accept a bit of leaking here because + * this code is only used by short-lived update-index. + */ + si->base = xcalloc(1, sizeof(*si->base)); + si->base->version = istate->version; + /* zero timestamp disables racy test in ce_write_index() */ + si->base->timestamp = istate->timestamp; + ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc); + si->base->cache_nr = istate->cache_nr; + memcpy(si->base->cache, istate->cache, + sizeof(*istate->cache) * istate->cache_nr); + mark_base_index_entries(si->base); + for (i = 0; i < si->base->cache_nr; i++) + si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE; +} + static void mark_entry_for_delete(size_t pos, void *data) { struct index_state *istate = data;