Merge branch 'ds/omit-trailing-hash-in-index'

Introduce an optional configuration to allow the trailing hash that
protects the index file from bit flipping.

* ds/omit-trailing-hash-in-index:
  features: feature.manyFiles implies fast index writes
  test-lib-functions: add helper for trailing hash
  read-cache: add index.skipHash config option
  hashfile: allow skipping the hash function
This commit is contained in:
Junio C Hamano 2023-01-16 12:07:47 -08:00
commit ffd9238685
10 changed files with 89 additions and 4 deletions

View File

@ -23,6 +23,11 @@ feature.manyFiles::
working directory. With many files, commands such as `git status` and
`git checkout` may be slow and these new defaults improve performance:
+
* `index.skipHash=true` speeds up index writes by not computing a trailing
checksum. Note that this will cause Git versions earlier than 2.13.0 to
refuse to parse the index and Git versions earlier than 2.40.0 will report
a corrupted index during `git fsck`.
+
* `index.version=4` enables path-prefix compression in the index.
+
* `core.untrackedCache=true` enables the untracked cache. This setting assumes

View File

@ -30,3 +30,14 @@ index.version::
Specify the version with which new index files should be
initialized. This does not affect existing repositories.
If `feature.manyFiles` is enabled, then the default is 4.
index.skipHash::
When enabled, do not compute the trailing hash for the index file.
This accelerates Git commands that manipulate the index, such as
`git add`, `git commit`, or `git status`. Instead of storing the
checksum, write a trailing set of bytes with value zero, indicating
that the computation was skipped.
+
If you enable `index.skipHash`, then Git clients older than 2.13.0 will
refuse to parse the index and Git clients older than 2.40.0 will report an
error during `git fsck`.

View File

@ -45,7 +45,8 @@ void hashflush(struct hashfile *f)
unsigned offset = f->offset;
if (offset) {
the_hash_algo->update_fn(&f->ctx, f->buffer, offset);
if (!f->skip_hash)
the_hash_algo->update_fn(&f->ctx, f->buffer, offset);
flush(f, f->buffer, offset);
f->offset = 0;
}
@ -64,7 +65,12 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result,
int fd;
hashflush(f);
the_hash_algo->final_fn(f->buffer, &f->ctx);
if (f->skip_hash)
hashclr(f->buffer);
else
the_hash_algo->final_fn(f->buffer, &f->ctx);
if (result)
hashcpy(result, f->buffer);
if (flags & CSUM_HASH_IN_STREAM)
@ -108,7 +114,8 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
* the hashfile's buffer. In this block,
* f->offset is necessarily zero.
*/
the_hash_algo->update_fn(&f->ctx, buf, nr);
if (!f->skip_hash)
the_hash_algo->update_fn(&f->ctx, buf, nr);
flush(f, buf, nr);
} else {
/*
@ -153,6 +160,7 @@ static struct hashfile *hashfd_internal(int fd, const char *name,
f->tp = tp;
f->name = name;
f->do_crc = 0;
f->skip_hash = 0;
the_hash_algo->init_fn(&f->ctx);
f->buffer_len = buffer_len;

View File

@ -20,6 +20,13 @@ struct hashfile {
size_t buffer_len;
unsigned char *buffer;
unsigned char *check_buffer;
/**
* If non-zero, skip_hash indicates that we should
* not actually compute the hash for this hashfile and
* instead only use it as a buffered write.
*/
int skip_hash;
};
/* Checkpoint */

View File

@ -1817,6 +1817,8 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size)
git_hash_ctx c;
unsigned char hash[GIT_MAX_RAWSZ];
int hdr_version;
unsigned char *start, *end;
struct object_id oid;
if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
return error(_("bad signature 0x%08x"), hdr->hdr_signature);
@ -1827,10 +1829,16 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size)
if (!verify_index_checksum)
return 0;
end = (unsigned char *)hdr + size;
start = end - the_hash_algo->rawsz;
oidread(&oid, start);
if (oideq(&oid, null_oid()))
return 0;
the_hash_algo->init_fn(&c);
the_hash_algo->update_fn(&c, hdr, size - the_hash_algo->rawsz);
the_hash_algo->final_fn(hash, &c);
if (!hasheq(hash, (unsigned char *)hdr + size - the_hash_algo->rawsz))
if (!hasheq(hash, start))
return error(_("bad index file sha1 signature"));
return 0;
}
@ -2920,9 +2928,13 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
int ieot_entries = 1;
struct index_entry_offset_table *ieot = NULL;
int nr, nr_threads;
struct repository *r = istate->repo ? istate->repo : the_repository;
f = hashfd(tempfile->fd, tempfile->filename.buf);
prepare_repo_settings(r);
f->skip_hash = r->settings.index_skip_hash;
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
removed++;

View File

@ -47,6 +47,7 @@ void prepare_repo_settings(struct repository *r)
}
if (manyfiles) {
r->settings.index_version = 4;
r->settings.index_skip_hash = 1;
r->settings.core_untracked_cache = UNTRACKED_CACHE_WRITE;
}
@ -61,6 +62,7 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "pack.usesparse", &r->settings.pack_use_sparse, 1);
repo_cfg_bool(r, "core.multipackindex", &r->settings.core_multi_pack_index, 1);
repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
/*
* The GIT_TEST_MULTI_PACK_INDEX variable is special in that

View File

@ -42,6 +42,7 @@ struct repo_settings {
struct fsmonitor_settings *fsmonitor; /* lazily loaded */
int index_version;
int index_skip_hash;
enum untracked_cache_setting core_untracked_cache;
int pack_use_sparse;

View File

@ -143,6 +143,7 @@ static int set_recommended_config(int reconfigure)
{ "credential.validate", "false", 1 }, /* GCM4W-only */
{ "gc.auto", "0", 1 },
{ "gui.GCWarning", "false", 1 },
{ "index.skipHash", "false", 1 },
{ "index.threads", "true", 1 },
{ "index.version", "4", 1 },
{ "merge.stat", "false", 1 },

View File

@ -65,6 +65,36 @@ test_expect_success 'out of bounds index.version issues warning' '
)
'
test_expect_success 'index.skipHash config option' '
rm -f .git/index &&
git -c index.skipHash=true add a &&
test_trailing_hash .git/index >hash &&
echo $(test_oid zero) >expect &&
test_cmp expect hash &&
git fsck &&
rm -f .git/index &&
git -c feature.manyFiles=true add a &&
test_trailing_hash .git/index >hash &&
cmp expect hash &&
rm -f .git/index &&
git -c feature.manyFiles=true \
-c index.skipHash=false add a &&
test_trailing_hash .git/index >hash &&
! cmp expect hash &&
test_commit start &&
git -c protocol.file.allow=always submodule add ./ sub &&
git config index.skipHash false &&
git -C sub config index.skipHash true &&
>sub/file &&
git -C sub add a &&
test_trailing_hash .git/modules/sub/index >hash &&
test_cmp expect hash &&
git -C sub fsck
'
test_index_version () {
INDEX_VERSION_CONFIG=$1 &&
FEATURE_MANY_FILES=$2 &&

View File

@ -1816,3 +1816,11 @@ test_cmp_config_output () {
sort config-actual >sorted-actual &&
test_cmp sorted-expect sorted-actual
}
# Given a filename, extract its trailing hash as a hex string
test_trailing_hash () {
local file="$1" &&
tail -c $(test_oid rawsz) "$file" |
test-tool hexdump |
sed "s/ //g"
}