Merge branch 'ds/omit-trailing-hash-in-index'

Introduce an optional configuration to allow the trailing hash that
protects the index file from bit flipping.

* ds/omit-trailing-hash-in-index:
  features: feature.manyFiles implies fast index writes
  test-lib-functions: add helper for trailing hash
  read-cache: add index.skipHash config option
  hashfile: allow skipping the hash function
This commit is contained in:
Junio C Hamano 2023-01-16 12:07:47 -08:00
commit ffd9238685
10 changed files with 89 additions and 4 deletions

View File

@ -23,6 +23,11 @@ feature.manyFiles::
working directory. With many files, commands such as `git status` and working directory. With many files, commands such as `git status` and
`git checkout` may be slow and these new defaults improve performance: `git checkout` may be slow and these new defaults improve performance:
+ +
* `index.skipHash=true` speeds up index writes by not computing a trailing
checksum. Note that this will cause Git versions earlier than 2.13.0 to
refuse to parse the index and Git versions earlier than 2.40.0 will report
a corrupted index during `git fsck`.
+
* `index.version=4` enables path-prefix compression in the index. * `index.version=4` enables path-prefix compression in the index.
+ +
* `core.untrackedCache=true` enables the untracked cache. This setting assumes * `core.untrackedCache=true` enables the untracked cache. This setting assumes

View File

@ -30,3 +30,14 @@ index.version::
Specify the version with which new index files should be Specify the version with which new index files should be
initialized. This does not affect existing repositories. initialized. This does not affect existing repositories.
If `feature.manyFiles` is enabled, then the default is 4. If `feature.manyFiles` is enabled, then the default is 4.
index.skipHash::
When enabled, do not compute the trailing hash for the index file.
This accelerates Git commands that manipulate the index, such as
`git add`, `git commit`, or `git status`. Instead of storing the
checksum, write a trailing set of bytes with value zero, indicating
that the computation was skipped.
+
If you enable `index.skipHash`, then Git clients older than 2.13.0 will
refuse to parse the index and Git clients older than 2.40.0 will report an
error during `git fsck`.

View File

@ -45,6 +45,7 @@ void hashflush(struct hashfile *f)
unsigned offset = f->offset; unsigned offset = f->offset;
if (offset) { if (offset) {
if (!f->skip_hash)
the_hash_algo->update_fn(&f->ctx, f->buffer, offset); the_hash_algo->update_fn(&f->ctx, f->buffer, offset);
flush(f, f->buffer, offset); flush(f, f->buffer, offset);
f->offset = 0; f->offset = 0;
@ -64,7 +65,12 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result,
int fd; int fd;
hashflush(f); hashflush(f);
if (f->skip_hash)
hashclr(f->buffer);
else
the_hash_algo->final_fn(f->buffer, &f->ctx); the_hash_algo->final_fn(f->buffer, &f->ctx);
if (result) if (result)
hashcpy(result, f->buffer); hashcpy(result, f->buffer);
if (flags & CSUM_HASH_IN_STREAM) if (flags & CSUM_HASH_IN_STREAM)
@ -108,6 +114,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
* the hashfile's buffer. In this block, * the hashfile's buffer. In this block,
* f->offset is necessarily zero. * f->offset is necessarily zero.
*/ */
if (!f->skip_hash)
the_hash_algo->update_fn(&f->ctx, buf, nr); the_hash_algo->update_fn(&f->ctx, buf, nr);
flush(f, buf, nr); flush(f, buf, nr);
} else { } else {
@ -153,6 +160,7 @@ static struct hashfile *hashfd_internal(int fd, const char *name,
f->tp = tp; f->tp = tp;
f->name = name; f->name = name;
f->do_crc = 0; f->do_crc = 0;
f->skip_hash = 0;
the_hash_algo->init_fn(&f->ctx); the_hash_algo->init_fn(&f->ctx);
f->buffer_len = buffer_len; f->buffer_len = buffer_len;

View File

@ -20,6 +20,13 @@ struct hashfile {
size_t buffer_len; size_t buffer_len;
unsigned char *buffer; unsigned char *buffer;
unsigned char *check_buffer; unsigned char *check_buffer;
/**
* If non-zero, skip_hash indicates that we should
* not actually compute the hash for this hashfile and
* instead only use it as a buffered write.
*/
int skip_hash;
}; };
/* Checkpoint */ /* Checkpoint */

View File

@ -1817,6 +1817,8 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size)
git_hash_ctx c; git_hash_ctx c;
unsigned char hash[GIT_MAX_RAWSZ]; unsigned char hash[GIT_MAX_RAWSZ];
int hdr_version; int hdr_version;
unsigned char *start, *end;
struct object_id oid;
if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
return error(_("bad signature 0x%08x"), hdr->hdr_signature); return error(_("bad signature 0x%08x"), hdr->hdr_signature);
@ -1827,10 +1829,16 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size)
if (!verify_index_checksum) if (!verify_index_checksum)
return 0; return 0;
end = (unsigned char *)hdr + size;
start = end - the_hash_algo->rawsz;
oidread(&oid, start);
if (oideq(&oid, null_oid()))
return 0;
the_hash_algo->init_fn(&c); the_hash_algo->init_fn(&c);
the_hash_algo->update_fn(&c, hdr, size - the_hash_algo->rawsz); the_hash_algo->update_fn(&c, hdr, size - the_hash_algo->rawsz);
the_hash_algo->final_fn(hash, &c); the_hash_algo->final_fn(hash, &c);
if (!hasheq(hash, (unsigned char *)hdr + size - the_hash_algo->rawsz)) if (!hasheq(hash, start))
return error(_("bad index file sha1 signature")); return error(_("bad index file sha1 signature"));
return 0; return 0;
} }
@ -2920,9 +2928,13 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
int ieot_entries = 1; int ieot_entries = 1;
struct index_entry_offset_table *ieot = NULL; struct index_entry_offset_table *ieot = NULL;
int nr, nr_threads; int nr, nr_threads;
struct repository *r = istate->repo ? istate->repo : the_repository;
f = hashfd(tempfile->fd, tempfile->filename.buf); f = hashfd(tempfile->fd, tempfile->filename.buf);
prepare_repo_settings(r);
f->skip_hash = r->settings.index_skip_hash;
for (i = removed = extended = 0; i < entries; i++) { for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE) if (cache[i]->ce_flags & CE_REMOVE)
removed++; removed++;

View File

@ -47,6 +47,7 @@ void prepare_repo_settings(struct repository *r)
} }
if (manyfiles) { if (manyfiles) {
r->settings.index_version = 4; r->settings.index_version = 4;
r->settings.index_skip_hash = 1;
r->settings.core_untracked_cache = UNTRACKED_CACHE_WRITE; r->settings.core_untracked_cache = UNTRACKED_CACHE_WRITE;
} }
@ -61,6 +62,7 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "pack.usesparse", &r->settings.pack_use_sparse, 1); repo_cfg_bool(r, "pack.usesparse", &r->settings.pack_use_sparse, 1);
repo_cfg_bool(r, "core.multipackindex", &r->settings.core_multi_pack_index, 1); repo_cfg_bool(r, "core.multipackindex", &r->settings.core_multi_pack_index, 1);
repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0); repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
/* /*
* The GIT_TEST_MULTI_PACK_INDEX variable is special in that * The GIT_TEST_MULTI_PACK_INDEX variable is special in that

View File

@ -42,6 +42,7 @@ struct repo_settings {
struct fsmonitor_settings *fsmonitor; /* lazily loaded */ struct fsmonitor_settings *fsmonitor; /* lazily loaded */
int index_version; int index_version;
int index_skip_hash;
enum untracked_cache_setting core_untracked_cache; enum untracked_cache_setting core_untracked_cache;
int pack_use_sparse; int pack_use_sparse;

View File

@ -143,6 +143,7 @@ static int set_recommended_config(int reconfigure)
{ "credential.validate", "false", 1 }, /* GCM4W-only */ { "credential.validate", "false", 1 }, /* GCM4W-only */
{ "gc.auto", "0", 1 }, { "gc.auto", "0", 1 },
{ "gui.GCWarning", "false", 1 }, { "gui.GCWarning", "false", 1 },
{ "index.skipHash", "false", 1 },
{ "index.threads", "true", 1 }, { "index.threads", "true", 1 },
{ "index.version", "4", 1 }, { "index.version", "4", 1 },
{ "merge.stat", "false", 1 }, { "merge.stat", "false", 1 },

View File

@ -65,6 +65,36 @@ test_expect_success 'out of bounds index.version issues warning' '
) )
' '
test_expect_success 'index.skipHash config option' '
rm -f .git/index &&
git -c index.skipHash=true add a &&
test_trailing_hash .git/index >hash &&
echo $(test_oid zero) >expect &&
test_cmp expect hash &&
git fsck &&
rm -f .git/index &&
git -c feature.manyFiles=true add a &&
test_trailing_hash .git/index >hash &&
cmp expect hash &&
rm -f .git/index &&
git -c feature.manyFiles=true \
-c index.skipHash=false add a &&
test_trailing_hash .git/index >hash &&
! cmp expect hash &&
test_commit start &&
git -c protocol.file.allow=always submodule add ./ sub &&
git config index.skipHash false &&
git -C sub config index.skipHash true &&
>sub/file &&
git -C sub add a &&
test_trailing_hash .git/modules/sub/index >hash &&
test_cmp expect hash &&
git -C sub fsck
'
test_index_version () { test_index_version () {
INDEX_VERSION_CONFIG=$1 && INDEX_VERSION_CONFIG=$1 &&
FEATURE_MANY_FILES=$2 && FEATURE_MANY_FILES=$2 &&

View File

@ -1816,3 +1816,11 @@ test_cmp_config_output () {
sort config-actual >sorted-actual && sort config-actual >sorted-actual &&
test_cmp sorted-expect sorted-actual test_cmp sorted-expect sorted-actual
} }
# Given a filename, extract its trailing hash as a hex string
test_trailing_hash () {
local file="$1" &&
tail -c $(test_oid rawsz) "$file" |
test-tool hexdump |
sed "s/ //g"
}