update-index: new options to enable/disable split index mode

If you have a large work tree but only make changes in a subset, then
$GIT_DIR/index's size should be stable after a while. If you change
branches that touch something else, $GIT_DIR/index's size may grow
large that it becomes as slow as the unified index. Do --split-index
again occasionally to force all changes back to the shared index and
keep $GIT_DIR/index small.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Nguyễn Thái Ngọc Duy 2014-06-13 19:19:44 +07:00 committed by Junio C Hamano
parent b3c96fb158
commit c18b80a0e8
5 changed files with 114 additions and 6 deletions

View File

@ -161,6 +161,17 @@ may not support it yet.
Only meaningful with `--stdin` or `--index-info`; paths are
separated with NUL character instead of LF.
--split-index::
--no-split-index::
Enable or disable split index mode. If enabled, the index is
split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.<SHA-1>.
Changes are accumulated in $GIT_DIR/index while the shared
index file contains all index entries stays unchanged. If
split-index mode is already enabled and `--split-index` is
given again, all changes in $GIT_DIR/index are pushed back to
the shared index file. This mode is designed for very large
indexes that take a signficant amount of time to read or write.
\--::
Do not interpret any more arguments as options.

View File

@ -13,6 +13,7 @@
#include "parse-options.h"
#include "pathspec.h"
#include "dir.h"
#include "split-index.h"
/*
* Default to not allowing changes to the list of files. The
@ -742,6 +743,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
char set_executable_bit = 0;
struct refresh_params refresh_args = {0, &has_errors};
int lock_error = 0;
int split_index = -1;
struct lock_file *lock_file;
struct parse_opt_ctx_t ctx;
int parseopt_state = PARSE_OPT_UNKNOWN;
@ -824,6 +826,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
resolve_undo_clear_callback},
OPT_INTEGER(0, "index-version", &preferred_index_format,
N_("write index in this format")),
OPT_BOOL(0, "split-index", &split_index,
N_("enable or disable split index")),
OPT_END()
};
@ -917,6 +921,20 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
strbuf_release(&buf);
}
if (split_index > 0) {
init_split_index(&the_index);
the_index.cache_changed |= SPLIT_INDEX_ORDERED;
} else if (!split_index && the_index.split_index) {
/*
* can't discard_split_index(&the_index); because that
* will destroy split_index->base->cache[], which may
* be shared with the_index.cache[]. So yeah we're
* leaking a bit here.
*/
the_index.split_index = NULL;
the_index.cache_changed |= SOMETHING_CHANGED;
}
if (active_cache_changed) {
if (newfd < 0) {
if (refresh_args.flags & REFRESH_QUIET)

View File

@ -278,6 +278,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define CE_ENTRY_ADDED (1 << 3)
#define RESOLVE_UNDO_CHANGED (1 << 4)
#define CACHE_TREE_CHANGED (1 << 5)
#define SPLIT_INDEX_ORDERED (1 << 6)
struct split_index;
struct index_state {

View File

@ -15,6 +15,7 @@
#include "strbuf.h"
#include "varint.h"
#include "split-index.h"
#include "sigchain.h"
static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
unsigned int options);
@ -39,7 +40,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
/* changes that can be kept in $GIT_DIR/index (basically all extensions) */
#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED)
CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
SPLIT_INDEX_ORDERED)
struct index_state the_index;
static const char *alternate_index_output;
@ -1860,7 +1862,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
rollback_lock_file(lockfile);
}
static int do_write_index(struct index_state *istate, int newfd)
static int do_write_index(struct index_state *istate, int newfd,
int strip_extensions)
{
git_SHA_CTX c;
struct cache_header hdr;
@ -1923,7 +1926,7 @@ static int do_write_index(struct index_state *istate, int newfd)
strbuf_release(&previous_name_buf);
/* Write extension data here */
if (istate->split_index) {
if (!strip_extensions && istate->split_index) {
struct strbuf sb = STRBUF_INIT;
err = write_link_extension(&sb, istate) < 0 ||
@ -1934,7 +1937,7 @@ static int do_write_index(struct index_state *istate, int newfd)
if (err)
return -1;
}
if (istate->cache_tree) {
if (!strip_extensions && istate->cache_tree) {
struct strbuf sb = STRBUF_INIT;
cache_tree_write(&sb, istate->cache_tree);
@ -1944,7 +1947,7 @@ static int do_write_index(struct index_state *istate, int newfd)
if (err)
return -1;
}
if (istate->resolve_undo) {
if (!strip_extensions && istate->resolve_undo) {
struct strbuf sb = STRBUF_INIT;
resolve_undo_write(&sb, istate->resolve_undo);
@ -1985,7 +1988,7 @@ static int commit_locked_index(struct lock_file *lk)
static int do_write_locked_index(struct index_state *istate, struct lock_file *lock,
unsigned flags)
{
int ret = do_write_index(istate, lock->fd);
int ret = do_write_index(istate, lock->fd, 0);
if (ret)
return ret;
assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) !=
@ -2009,6 +2012,52 @@ static int write_split_index(struct index_state *istate,
return ret;
}
static char *temporary_sharedindex;
static void remove_temporary_sharedindex(void)
{
if (temporary_sharedindex) {
unlink_or_warn(temporary_sharedindex);
free(temporary_sharedindex);
temporary_sharedindex = NULL;
}
}
static void remove_temporary_sharedindex_on_signal(int signo)
{
remove_temporary_sharedindex();
sigchain_pop(signo);
raise(signo);
}
static int write_shared_index(struct index_state *istate)
{
struct split_index *si = istate->split_index;
static int installed_handler;
int fd, ret;
temporary_sharedindex = git_pathdup("sharedindex_XXXXXX");
fd = xmkstemp(temporary_sharedindex);
if (!installed_handler) {
atexit(remove_temporary_sharedindex);
sigchain_push_common(remove_temporary_sharedindex_on_signal);
}
move_cache_to_base_index(istate);
ret = do_write_index(si->base, fd, 1);
close(fd);
if (ret) {
remove_temporary_sharedindex();
return ret;
}
ret = rename(temporary_sharedindex,
git_path("sharedindex.%s", sha1_to_hex(si->base->sha1)));
free(temporary_sharedindex);
temporary_sharedindex = NULL;
if (!ret)
hashcpy(si->base_sha1, si->base->sha1);
return ret;
}
int write_locked_index(struct index_state *istate, struct lock_file *lock,
unsigned flags)
{
@ -2020,6 +2069,12 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
return do_write_locked_index(istate, lock, flags);
}
if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
int ret = write_shared_index(istate);
if (ret)
return ret;
}
return write_split_index(istate, lock, flags);
}

View File

@ -74,6 +74,29 @@ static void mark_base_index_entries(struct index_state *base)
base->cache[i]->index = i + 1;
}
void move_cache_to_base_index(struct index_state *istate)
{
struct split_index *si = istate->split_index;
int i;
/*
* do not delete old si->base, its index entries may be shared
* with istate->cache[]. Accept a bit of leaking here because
* this code is only used by short-lived update-index.
*/
si->base = xcalloc(1, sizeof(*si->base));
si->base->version = istate->version;
/* zero timestamp disables racy test in ce_write_index() */
si->base->timestamp = istate->timestamp;
ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
si->base->cache_nr = istate->cache_nr;
memcpy(si->base->cache, istate->cache,
sizeof(*istate->cache) * istate->cache_nr);
mark_base_index_entries(si->base);
for (i = 0; i < si->base->cache_nr; i++)
si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
}
static void mark_entry_for_delete(size_t pos, void *data)
{
struct index_state *istate = data;