Merge branch 'tb/pack-revindex-on-disk'

Introduce an on-disk file to record revindex for packdata, which
traditionally was always created on the fly and only in-core.

* tb/pack-revindex-on-disk:
  t5325: check both on-disk and in-memory reverse index
  pack-revindex: ensure that on-disk reverse indexes are given precedence
  t: support GIT_TEST_WRITE_REV_INDEX
  t: prepare for GIT_TEST_WRITE_REV_INDEX
  Documentation/config/pack.txt: advertise 'pack.writeReverseIndex'
  builtin/pack-objects.c: respect 'pack.writeReverseIndex'
  builtin/index-pack.c: write reverse indexes
  builtin/index-pack.c: allow stripping arbitrary extensions
  pack-write.c: prepare to write 'pack-*.rev' files
  packfile: prepare for the existence of '*.rev' files
This commit is contained in:
Junio C Hamano 2021-02-12 14:21:04 -08:00
commit 3c12d0b885
22 changed files with 545 additions and 42 deletions

View File

@ -133,3 +133,10 @@ pack.writeBitmapHashCache::
between an older, bitmapped pack and objects that have been between an older, bitmapped pack and objects that have been
pushed since the last gc). The downside is that it consumes 4 pushed since the last gc). The downside is that it consumes 4
bytes per object of disk space. Defaults to true. bytes per object of disk space. Defaults to true.
pack.writeReverseIndex::
When true, git will write a corresponding .rev file (see:
link:../technical/pack-format.html[Documentation/technical/pack-format.txt])
for each new packfile that it writes in all places except for
linkgit:git-fast-import[1] and in the bulk checkin mechanism.
Defaults to false.

View File

@ -9,17 +9,18 @@ git-index-pack - Build pack index file for an existing packed archive
SYNOPSIS SYNOPSIS
-------- --------
[verse] [verse]
'git index-pack' [-v] [-o <index-file>] <pack-file> 'git index-pack' [-v] [-o <index-file>] [--[no-]rev-index] <pack-file>
'git index-pack' --stdin [--fix-thin] [--keep] [-v] [-o <index-file>] 'git index-pack' --stdin [--fix-thin] [--keep] [-v] [-o <index-file>]
[<pack-file>] [--[no-]rev-index] [<pack-file>]
DESCRIPTION DESCRIPTION
----------- -----------
Reads a packed archive (.pack) from the specified file, and Reads a packed archive (.pack) from the specified file, and
builds a pack index file (.idx) for it. The packed archive builds a pack index file (.idx) for it. Optionally writes a
together with the pack index can then be placed in the reverse-index (.rev) for the specified pack. The packed
objects/pack/ directory of a Git repository. archive together with the pack index can then be placed in
the objects/pack/ directory of a Git repository.
OPTIONS OPTIONS
@ -35,6 +36,13 @@ OPTIONS
fails if the name of packed archive does not end fails if the name of packed archive does not end
with .pack). with .pack).
--[no-]rev-index::
When this flag is provided, generate a reverse index
(a `.rev` file) corresponding to the given pack. If
`--verify` is given, ensure that the existing
reverse index is correct. Takes precedence over
`pack.writeReverseIndex`.
--stdin:: --stdin::
When this flag is provided, the pack is read from stdin When this flag is provided, the pack is read from stdin
instead and a copy is then written to <pack-file>. If instead and a copy is then written to <pack-file>. If

View File

@ -274,6 +274,26 @@ Pack file entry: <+
Index checksum of all of the above. Index checksum of all of the above.
== pack-*.rev files have the format:
- A 4-byte magic number '0x52494458' ('RIDX').
- A 4-byte version identifier (= 1).
- A 4-byte hash function identifier (= 1 for SHA-1, 2 for SHA-256).
- A table of index positions (one per packed object, num_objects in
total, each a 4-byte unsigned integer in network order), sorted by
their corresponding offsets in the packfile.
- A trailer, containing a:
checksum of the corresponding packfile, and
a checksum of all of the above.
All 4-byte numbers are in network order.
== multi-pack-index (MIDX) files have the following format: == multi-pack-index (MIDX) files have the following format:
The multi-pack-index files refer to multiple pack-files and loose objects. The multi-pack-index files refer to multiple pack-files and loose objects.

View File

@ -17,7 +17,7 @@
#include "promisor-remote.h" #include "promisor-remote.h"
static const char index_pack_usage[] = static const char index_pack_usage[] =
"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])"; "git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
struct object_entry { struct object_entry {
struct pack_idx_entry idx; struct pack_idx_entry idx;
@ -1436,15 +1436,15 @@ static void fix_unresolved_deltas(struct hashfile *f)
free(sorted_by_pos); free(sorted_by_pos);
} }
static const char *derive_filename(const char *pack_name, const char *suffix, static const char *derive_filename(const char *pack_name, const char *strip,
struct strbuf *buf) const char *suffix, struct strbuf *buf)
{ {
size_t len; size_t len;
if (!strip_suffix(pack_name, ".pack", &len)) if (!strip_suffix(pack_name, strip, &len) || !len ||
die(_("packfile name '%s' does not end with '.pack'"), pack_name[len - 1] != '.')
pack_name); die(_("packfile name '%s' does not end with '.%s'"),
pack_name, strip);
strbuf_add(buf, pack_name, len); strbuf_add(buf, pack_name, len);
strbuf_addch(buf, '.');
strbuf_addstr(buf, suffix); strbuf_addstr(buf, suffix);
return buf->buf; return buf->buf;
} }
@ -1459,7 +1459,7 @@ static void write_special_file(const char *suffix, const char *msg,
int msg_len = strlen(msg); int msg_len = strlen(msg);
if (pack_name) if (pack_name)
filename = derive_filename(pack_name, suffix, &name_buf); filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else else
filename = odb_pack_name(&name_buf, hash, suffix); filename = odb_pack_name(&name_buf, hash, suffix);
@ -1484,12 +1484,14 @@ static void write_special_file(const char *suffix, const char *msg,
static void final(const char *final_pack_name, const char *curr_pack_name, static void final(const char *final_pack_name, const char *curr_pack_name,
const char *final_index_name, const char *curr_index_name, const char *final_index_name, const char *curr_index_name,
const char *final_rev_index_name, const char *curr_rev_index_name,
const char *keep_msg, const char *promisor_msg, const char *keep_msg, const char *promisor_msg,
unsigned char *hash) unsigned char *hash)
{ {
const char *report = "pack"; const char *report = "pack";
struct strbuf pack_name = STRBUF_INIT; struct strbuf pack_name = STRBUF_INIT;
struct strbuf index_name = STRBUF_INIT; struct strbuf index_name = STRBUF_INIT;
struct strbuf rev_index_name = STRBUF_INIT;
int err; int err;
if (!from_stdin) { if (!from_stdin) {
@ -1524,6 +1526,16 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
} else } else
chmod(final_index_name, 0444); chmod(final_index_name, 0444);
if (curr_rev_index_name) {
if (final_rev_index_name != curr_rev_index_name) {
if (!final_rev_index_name)
final_rev_index_name = odb_pack_name(&rev_index_name, hash, "rev");
if (finalize_object_file(curr_rev_index_name, final_rev_index_name))
die(_("cannot store reverse index file"));
} else
chmod(final_rev_index_name, 0444);
}
if (do_fsck_object) { if (do_fsck_object) {
struct packed_git *p; struct packed_git *p;
p = add_packed_git(final_index_name, strlen(final_index_name), 0); p = add_packed_git(final_index_name, strlen(final_index_name), 0);
@ -1553,6 +1565,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
} }
} }
strbuf_release(&rev_index_name);
strbuf_release(&index_name); strbuf_release(&index_name);
strbuf_release(&pack_name); strbuf_release(&pack_name);
} }
@ -1578,6 +1591,12 @@ static int git_index_pack_config(const char *k, const char *v, void *cb)
} }
return 0; return 0;
} }
if (!strcmp(k, "pack.writereverseindex")) {
if (git_config_bool(k, v))
opts->flags |= WRITE_REV;
else
opts->flags &= ~WRITE_REV;
}
return git_default_config(k, v, cb); return git_default_config(k, v, cb);
} }
@ -1695,12 +1714,14 @@ static void show_pack_info(int stat_only)
int cmd_index_pack(int argc, const char **argv, const char *prefix) int cmd_index_pack(int argc, const char **argv, const char *prefix)
{ {
int i, fix_thin_pack = 0, verify = 0, stat_only = 0; int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index;
const char *curr_index; const char *curr_index;
const char *index_name = NULL, *pack_name = NULL; const char *curr_rev_index = NULL;
const char *index_name = NULL, *pack_name = NULL, *rev_index_name = NULL;
const char *keep_msg = NULL; const char *keep_msg = NULL;
const char *promisor_msg = NULL; const char *promisor_msg = NULL;
struct strbuf index_name_buf = STRBUF_INIT; struct strbuf index_name_buf = STRBUF_INIT;
struct strbuf rev_index_name_buf = STRBUF_INIT;
struct pack_idx_entry **idx_objects; struct pack_idx_entry **idx_objects;
struct pack_idx_option opts; struct pack_idx_option opts;
unsigned char pack_hash[GIT_MAX_RAWSZ]; unsigned char pack_hash[GIT_MAX_RAWSZ];
@ -1727,6 +1748,11 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
if (prefix && chdir(prefix)) if (prefix && chdir(prefix))
die(_("Cannot come back to cwd")); die(_("Cannot come back to cwd"));
if (git_env_bool(GIT_TEST_WRITE_REV_INDEX, 0))
rev_index = 1;
else
rev_index = !!(opts.flags & (WRITE_REV_VERIFY | WRITE_REV));
for (i = 1; i < argc; i++) { for (i = 1; i < argc; i++) {
const char *arg = argv[i]; const char *arg = argv[i];
@ -1805,6 +1831,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
if (hash_algo == GIT_HASH_UNKNOWN) if (hash_algo == GIT_HASH_UNKNOWN)
die(_("unknown hash algorithm '%s'"), arg); die(_("unknown hash algorithm '%s'"), arg);
repo_set_hash_algo(the_repository, hash_algo); repo_set_hash_algo(the_repository, hash_algo);
} else if (!strcmp(arg, "--rev-index")) {
rev_index = 1;
} else if (!strcmp(arg, "--no-rev-index")) {
rev_index = 0;
} else } else
usage(index_pack_usage); usage(index_pack_usage);
continue; continue;
@ -1824,7 +1854,16 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
if (from_stdin && hash_algo) if (from_stdin && hash_algo)
die(_("--object-format cannot be used with --stdin")); die(_("--object-format cannot be used with --stdin"));
if (!index_name && pack_name) if (!index_name && pack_name)
index_name = derive_filename(pack_name, "idx", &index_name_buf); index_name = derive_filename(pack_name, "pack", "idx", &index_name_buf);
opts.flags &= ~(WRITE_REV | WRITE_REV_VERIFY);
if (rev_index) {
opts.flags |= verify ? WRITE_REV_VERIFY : WRITE_REV;
if (index_name)
rev_index_name = derive_filename(index_name,
"idx", "rev",
&rev_index_name_buf);
}
if (verify) { if (verify) {
if (!index_name) if (!index_name)
@ -1878,11 +1917,16 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
for (i = 0; i < nr_objects; i++) for (i = 0; i < nr_objects; i++)
idx_objects[i] = &objects[i].idx; idx_objects[i] = &objects[i].idx;
curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_hash); curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_hash);
if (rev_index)
curr_rev_index = write_rev_file(rev_index_name, idx_objects,
nr_objects, pack_hash,
opts.flags);
free(idx_objects); free(idx_objects);
if (!verify) if (!verify)
final(pack_name, curr_pack, final(pack_name, curr_pack,
index_name, curr_index, index_name, curr_index,
rev_index_name, curr_rev_index,
keep_msg, promisor_msg, keep_msg, promisor_msg,
pack_hash); pack_hash);
else else
@ -1893,10 +1937,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
free(objects); free(objects);
strbuf_release(&index_name_buf); strbuf_release(&index_name_buf);
strbuf_release(&rev_index_name_buf);
if (pack_name == NULL) if (pack_name == NULL)
free((void *) curr_pack); free((void *) curr_pack);
if (index_name == NULL) if (index_name == NULL)
free((void *) curr_index); free((void *) curr_index);
if (rev_index_name == NULL)
free((void *) curr_rev_index);
/* /*
* Let the caller know this pack is not self contained * Let the caller know this pack is not self contained

View File

@ -2953,6 +2953,13 @@ static int git_pack_config(const char *k, const char *v, void *cb)
pack_idx_opts.version); pack_idx_opts.version);
return 0; return 0;
} }
if (!strcmp(k, "pack.writereverseindex")) {
if (git_config_bool(k, v))
pack_idx_opts.flags |= WRITE_REV;
else
pack_idx_opts.flags &= ~WRITE_REV;
return 0;
}
if (!strcmp(k, "uploadpack.blobpackfileuri")) { if (!strcmp(k, "uploadpack.blobpackfileuri")) {
struct configured_exclusion *ex = xmalloc(sizeof(*ex)); struct configured_exclusion *ex = xmalloc(sizeof(*ex));
const char *oid_end, *pack_end; const char *oid_end, *pack_end;
@ -3592,6 +3599,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
reset_pack_idx_option(&pack_idx_opts); reset_pack_idx_option(&pack_idx_opts);
git_config(git_pack_config, NULL); git_config(git_pack_config, NULL);
if (git_env_bool(GIT_TEST_WRITE_REV_INDEX, 0))
pack_idx_opts.flags |= WRITE_REV;
progress = isatty(2); progress = isatty(2);
argc = parse_options(argc, argv, prefix, pack_objects_options, argc = parse_options(argc, argv, prefix, pack_objects_options,

View File

@ -209,6 +209,7 @@ static struct {
} exts[] = { } exts[] = {
{".pack"}, {".pack"},
{".idx"}, {".idx"},
{".rev", 1},
{".bitmap", 1}, {".bitmap", 1},
{".promisor", 1}, {".promisor", 1},
}; };

View File

@ -24,6 +24,7 @@ linux-gcc)
export GIT_TEST_MULTI_PACK_INDEX=1 export GIT_TEST_MULTI_PACK_INDEX=1
export GIT_TEST_ADD_I_USE_BUILTIN=1 export GIT_TEST_ADD_I_USE_BUILTIN=1
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
export GIT_TEST_WRITE_REV_INDEX=1
make test make test
;; ;;
linux-clang) linux-clang)

View File

@ -85,6 +85,9 @@ struct packed_git {
multi_pack_index:1; multi_pack_index:1;
unsigned char hash[GIT_MAX_RAWSZ]; unsigned char hash[GIT_MAX_RAWSZ];
struct revindex_entry *revindex; struct revindex_entry *revindex;
const uint32_t *revindex_data;
const uint32_t *revindex_map;
size_t revindex_size;
/* something like ".git/objects/pack/xxxxx.pack" */ /* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */ char pack_name[FLEX_ARRAY]; /* more */
}; };

View File

@ -2,6 +2,7 @@
#include "pack-revindex.h" #include "pack-revindex.h"
#include "object-store.h" #include "object-store.h"
#include "packfile.h" #include "packfile.h"
#include "config.h"
struct revindex_entry { struct revindex_entry {
off_t offset; off_t offset;
@ -164,14 +165,131 @@ static void create_pack_revindex(struct packed_git *p)
sort_revindex(p->revindex, num_ent, p->pack_size); sort_revindex(p->revindex, num_ent, p->pack_size);
} }
static int create_pack_revindex_in_memory(struct packed_git *p)
{
if (git_env_bool(GIT_TEST_REV_INDEX_DIE_IN_MEMORY, 0))
die("dying as requested by '%s'",
GIT_TEST_REV_INDEX_DIE_IN_MEMORY);
if (open_pack_index(p))
return -1;
create_pack_revindex(p);
return 0;
}
static char *pack_revindex_filename(struct packed_git *p)
{
size_t len;
if (!strip_suffix(p->pack_name, ".pack", &len))
BUG("pack_name does not end in .pack");
return xstrfmt("%.*s.rev", (int)len, p->pack_name);
}
#define RIDX_HEADER_SIZE (12)
#define RIDX_MIN_SIZE (RIDX_HEADER_SIZE + (2 * the_hash_algo->rawsz))
struct revindex_header {
uint32_t signature;
uint32_t version;
uint32_t hash_id;
};
static int load_revindex_from_disk(char *revindex_name,
uint32_t num_objects,
const uint32_t **data_p, size_t *len_p)
{
int fd, ret = 0;
struct stat st;
void *data = NULL;
size_t revindex_size;
struct revindex_header *hdr;
fd = git_open(revindex_name);
if (fd < 0) {
ret = -1;
goto cleanup;
}
if (fstat(fd, &st)) {
ret = error_errno(_("failed to read %s"), revindex_name);
goto cleanup;
}
revindex_size = xsize_t(st.st_size);
if (revindex_size < RIDX_MIN_SIZE) {
ret = error(_("reverse-index file %s is too small"), revindex_name);
goto cleanup;
}
if (revindex_size - RIDX_MIN_SIZE != st_mult(sizeof(uint32_t), num_objects)) {
ret = error(_("reverse-index file %s is corrupt"), revindex_name);
goto cleanup;
}
data = xmmap(NULL, revindex_size, PROT_READ, MAP_PRIVATE, fd, 0);
hdr = data;
if (ntohl(hdr->signature) != RIDX_SIGNATURE) {
ret = error(_("reverse-index file %s has unknown signature"), revindex_name);
goto cleanup;
}
if (ntohl(hdr->version) != 1) {
ret = error(_("reverse-index file %s has unsupported version %"PRIu32),
revindex_name, ntohl(hdr->version));
goto cleanup;
}
if (!(ntohl(hdr->hash_id) == 1 || ntohl(hdr->hash_id) == 2)) {
ret = error(_("reverse-index file %s has unsupported hash id %"PRIu32),
revindex_name, ntohl(hdr->hash_id));
goto cleanup;
}
cleanup:
if (ret) {
if (data)
munmap(data, revindex_size);
} else {
*len_p = revindex_size;
*data_p = (const uint32_t *)data;
}
close(fd);
return ret;
}
static int load_pack_revindex_from_disk(struct packed_git *p)
{
char *revindex_name;
int ret;
if (open_pack_index(p))
return -1;
revindex_name = pack_revindex_filename(p);
ret = load_revindex_from_disk(revindex_name,
p->num_objects,
&p->revindex_map,
&p->revindex_size);
if (ret)
goto cleanup;
p->revindex_data = (const uint32_t *)((const char *)p->revindex_map + RIDX_HEADER_SIZE);
cleanup:
free(revindex_name);
return ret;
}
int load_pack_revindex(struct packed_git *p) int load_pack_revindex(struct packed_git *p)
{ {
if (!p->revindex) { if (p->revindex || p->revindex_data)
if (open_pack_index(p)) return 0;
return -1;
create_pack_revindex(p); if (!load_pack_revindex_from_disk(p))
} return 0;
return 0; else if (!create_pack_revindex_in_memory(p))
return 0;
return -1;
} }
int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos) int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
@ -203,18 +321,28 @@ int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos) uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos)
{ {
if (!p->revindex) if (!(p->revindex || p->revindex_data))
BUG("pack_pos_to_index: reverse index not yet loaded"); BUG("pack_pos_to_index: reverse index not yet loaded");
if (p->num_objects <= pos) if (p->num_objects <= pos)
BUG("pack_pos_to_index: out-of-bounds object at %"PRIu32, pos); BUG("pack_pos_to_index: out-of-bounds object at %"PRIu32, pos);
return p->revindex[pos].nr;
if (p->revindex)
return p->revindex[pos].nr;
else
return get_be32(p->revindex_data + pos);
} }
off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos) off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos)
{ {
if (!p->revindex) if (!(p->revindex || p->revindex_data))
BUG("pack_pos_to_index: reverse index not yet loaded"); BUG("pack_pos_to_index: reverse index not yet loaded");
if (p->num_objects < pos) if (p->num_objects < pos)
BUG("pack_pos_to_offset: out-of-bounds object at %"PRIu32, pos); BUG("pack_pos_to_offset: out-of-bounds object at %"PRIu32, pos);
return p->revindex[pos].offset;
if (p->revindex)
return p->revindex[pos].offset;
else if (pos == p->num_objects)
return p->pack_size - the_hash_algo->rawsz;
else
return nth_packed_object_offset(p, pack_pos_to_index(p, pos));
} }

View File

@ -16,11 +16,21 @@
* can be found * can be found
*/ */
#define RIDX_SIGNATURE 0x52494458 /* "RIDX" */
#define RIDX_VERSION 1
#define GIT_TEST_WRITE_REV_INDEX "GIT_TEST_WRITE_REV_INDEX"
#define GIT_TEST_REV_INDEX_DIE_IN_MEMORY "GIT_TEST_REV_INDEX_DIE_IN_MEMORY"
struct packed_git; struct packed_git;
/* /*
* load_pack_revindex populates the revindex's internal data-structures for the * load_pack_revindex populates the revindex's internal data-structures for the
* given pack, returning zero on success and a negative value otherwise. * given pack, returning zero on success and a negative value otherwise.
*
* If a '.rev' file is present it is mmap'd, and pointers are assigned into it
* (instead of using the in-memory variant).
*/ */
int load_pack_revindex(struct packed_git *p); int load_pack_revindex(struct packed_git *p);
@ -55,7 +65,9 @@ uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos);
* If the reverse index has not yet been loaded, or the position is out of * If the reverse index has not yet been loaded, or the position is out of
* bounds, this function aborts. * bounds, this function aborts.
* *
* This function runs in constant time. * This function runs in constant time under both in-memory and on-disk reverse
* indexes, but an additional step is taken to consult the corresponding .idx
* file when using the on-disk format.
*/ */
off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos); off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos);

View File

@ -167,6 +167,113 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
return index_name; return index_name;
} }
static int pack_order_cmp(const void *va, const void *vb, void *ctx)
{
struct pack_idx_entry **objects = ctx;
off_t oa = objects[*(uint32_t*)va]->offset;
off_t ob = objects[*(uint32_t*)vb]->offset;
if (oa < ob)
return -1;
if (oa > ob)
return 1;
return 0;
}
static void write_rev_header(struct hashfile *f)
{
uint32_t oid_version;
switch (hash_algo_by_ptr(the_hash_algo)) {
case GIT_HASH_SHA1:
oid_version = 1;
break;
case GIT_HASH_SHA256:
oid_version = 2;
break;
default:
die("write_rev_header: unknown hash version");
}
hashwrite_be32(f, RIDX_SIGNATURE);
hashwrite_be32(f, RIDX_VERSION);
hashwrite_be32(f, oid_version);
}
static void write_rev_index_positions(struct hashfile *f,
struct pack_idx_entry **objects,
uint32_t nr_objects)
{
uint32_t *pack_order;
uint32_t i;
ALLOC_ARRAY(pack_order, nr_objects);
for (i = 0; i < nr_objects; i++)
pack_order[i] = i;
QSORT_S(pack_order, nr_objects, pack_order_cmp, objects);
for (i = 0; i < nr_objects; i++)
hashwrite_be32(f, pack_order[i]);
free(pack_order);
}
static void write_rev_trailer(struct hashfile *f, const unsigned char *hash)
{
hashwrite(f, hash, the_hash_algo->rawsz);
}
const char *write_rev_file(const char *rev_name,
struct pack_idx_entry **objects,
uint32_t nr_objects,
const unsigned char *hash,
unsigned flags)
{
struct hashfile *f;
int fd;
if ((flags & WRITE_REV) && (flags & WRITE_REV_VERIFY))
die(_("cannot both write and verify reverse index"));
if (flags & WRITE_REV) {
if (!rev_name) {
struct strbuf tmp_file = STRBUF_INIT;
fd = odb_mkstemp(&tmp_file, "pack/tmp_rev_XXXXXX");
rev_name = strbuf_detach(&tmp_file, NULL);
} else {
unlink(rev_name);
fd = open(rev_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
if (fd < 0)
die_errno("unable to create '%s'", rev_name);
}
f = hashfd(fd, rev_name);
} else if (flags & WRITE_REV_VERIFY) {
struct stat statbuf;
if (stat(rev_name, &statbuf)) {
if (errno == ENOENT) {
/* .rev files are optional */
return NULL;
} else
die_errno(_("could not stat: %s"), rev_name);
}
f = hashfd_check(rev_name);
} else
return NULL;
write_rev_header(f);
write_rev_index_positions(f, objects, nr_objects);
write_rev_trailer(f, hash);
if (rev_name && adjust_shared_perm(rev_name) < 0)
die(_("failed to make %s readable"), rev_name);
finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
return rev_name;
}
off_t write_pack_header(struct hashfile *f, uint32_t nr_entries) off_t write_pack_header(struct hashfile *f, uint32_t nr_entries)
{ {
struct pack_header hdr; struct pack_header hdr;
@ -342,7 +449,7 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
struct pack_idx_option *pack_idx_opts, struct pack_idx_option *pack_idx_opts,
unsigned char hash[]) unsigned char hash[])
{ {
const char *idx_tmp_name; const char *idx_tmp_name, *rev_tmp_name = NULL;
int basename_len = name_buffer->len; int basename_len = name_buffer->len;
if (adjust_shared_perm(pack_tmp_name)) if (adjust_shared_perm(pack_tmp_name))
@ -353,6 +460,9 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
if (adjust_shared_perm(idx_tmp_name)) if (adjust_shared_perm(idx_tmp_name))
die_errno("unable to make temporary index file readable"); die_errno("unable to make temporary index file readable");
rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash,
pack_idx_opts->flags);
strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash)); strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash));
if (rename(pack_tmp_name, name_buffer->buf)) if (rename(pack_tmp_name, name_buffer->buf))
@ -366,6 +476,14 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
strbuf_setlen(name_buffer, basename_len); strbuf_setlen(name_buffer, basename_len);
if (rev_tmp_name) {
strbuf_addf(name_buffer, "%s.rev", hash_to_hex(hash));
if (rename(rev_tmp_name, name_buffer->buf))
die_errno("unable to rename temporary reverse-index file");
}
strbuf_setlen(name_buffer, basename_len);
free((void *)idx_tmp_name); free((void *)idx_tmp_name);
} }

4
pack.h
View File

@ -42,6 +42,8 @@ struct pack_idx_option {
/* flag bits */ /* flag bits */
#define WRITE_IDX_VERIFY 01 /* verify only, do not write the idx file */ #define WRITE_IDX_VERIFY 01 /* verify only, do not write the idx file */
#define WRITE_IDX_STRICT 02 #define WRITE_IDX_STRICT 02
#define WRITE_REV 04
#define WRITE_REV_VERIFY 010
uint32_t version; uint32_t version;
uint32_t off32_limit; uint32_t off32_limit;
@ -91,6 +93,8 @@ struct ref;
void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought); void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought);
const char *write_rev_file(const char *rev_name, struct pack_idx_entry **objects, uint32_t nr_objects, const unsigned char *hash, unsigned flags);
/* /*
* The "hdr" output buffer should be at least this big, which will handle sizes * The "hdr" output buffer should be at least this big, which will handle sizes
* up to 2^67. * up to 2^67.

View File

@ -324,11 +324,21 @@ void close_pack_index(struct packed_git *p)
} }
} }
void close_pack_revindex(struct packed_git *p) {
if (!p->revindex_map)
return;
munmap((void *)p->revindex_map, p->revindex_size);
p->revindex_map = NULL;
p->revindex_data = NULL;
}
void close_pack(struct packed_git *p) void close_pack(struct packed_git *p)
{ {
close_pack_windows(p); close_pack_windows(p);
close_pack_fd(p); close_pack_fd(p);
close_pack_index(p); close_pack_index(p);
close_pack_revindex(p);
} }
void close_object_store(struct raw_object_store *o) void close_object_store(struct raw_object_store *o)
@ -351,7 +361,7 @@ void close_object_store(struct raw_object_store *o)
void unlink_pack_path(const char *pack_name, int force_delete) void unlink_pack_path(const char *pack_name, int force_delete)
{ {
static const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"}; static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor"};
int i; int i;
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
size_t plen; size_t plen;
@ -853,6 +863,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
if (!strcmp(file_name, "multi-pack-index")) if (!strcmp(file_name, "multi-pack-index"))
return; return;
if (ends_with(file_name, ".idx") || if (ends_with(file_name, ".idx") ||
ends_with(file_name, ".rev") ||
ends_with(file_name, ".pack") || ends_with(file_name, ".pack") ||
ends_with(file_name, ".bitmap") || ends_with(file_name, ".bitmap") ||
ends_with(file_name, ".keep") || ends_with(file_name, ".keep") ||

View File

@ -90,6 +90,7 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
void close_pack_windows(struct packed_git *); void close_pack_windows(struct packed_git *);
void close_pack_revindex(struct packed_git *);
void close_pack(struct packed_git *); void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o); void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **); void unuse_pack(struct pack_window **);

View File

@ -433,6 +433,9 @@ GIT_TEST_DEFAULT_HASH=<hash-algo> specifies which hash algorithm to
use in the test scripts. Recognized values for <hash-algo> are "sha1" use in the test scripts. Recognized values for <hash-algo> are "sha1"
and "sha256". and "sha256".
GIT_TEST_WRITE_REV_INDEX=<boolean>, when true enables the
'pack.writeReverseIndex' setting.
Naming Tests Naming Tests
------------ ------------

View File

@ -710,8 +710,9 @@ test_expect_success 'expire respects .keep files' '
PACKA=$(ls .git/objects/pack/a-pack*\.pack | sed s/\.pack\$//) && PACKA=$(ls .git/objects/pack/a-pack*\.pack | sed s/\.pack\$//) &&
touch $PACKA.keep && touch $PACKA.keep &&
git multi-pack-index expire && git multi-pack-index expire &&
ls -S .git/objects/pack/a-pack* | grep $PACKA >a-pack-files && test_path_is_file $PACKA.idx &&
test_line_count = 3 a-pack-files && test_path_is_file $PACKA.keep &&
test_path_is_file $PACKA.pack &&
test-tool read-midx .git/objects | grep idx >midx-list && test-tool read-midx .git/objects | grep idx >midx-list &&
test_line_count = 2 midx-list test_line_count = 2 midx-list
) )

120
t/t5325-reverse-index.sh Executable file
View File

@ -0,0 +1,120 @@
#!/bin/sh
test_description='on-disk reverse index'
. ./test-lib.sh
# The below tests want control over the 'pack.writeReverseIndex' setting
# themselves to assert various combinations of it with other options.
sane_unset GIT_TEST_WRITE_REV_INDEX
packdir=.git/objects/pack
test_expect_success 'setup' '
test_commit base &&
pack=$(git pack-objects --all $packdir/pack) &&
rev=$packdir/pack-$pack.rev &&
test_path_is_missing $rev
'
test_index_pack () {
rm -f $rev &&
conf=$1 &&
shift &&
# remove the index since Windows won't overwrite an existing file
rm $packdir/pack-$pack.idx &&
git -c pack.writeReverseIndex=$conf index-pack "$@" \
$packdir/pack-$pack.pack
}
test_expect_success 'index-pack with pack.writeReverseIndex' '
test_index_pack "" &&
test_path_is_missing $rev &&
test_index_pack false &&
test_path_is_missing $rev &&
test_index_pack true &&
test_path_is_file $rev
'
test_expect_success 'index-pack with --[no-]rev-index' '
for conf in "" true false
do
test_index_pack "$conf" --rev-index &&
test_path_exists $rev &&
test_index_pack "$conf" --no-rev-index &&
test_path_is_missing $rev
done
'
test_expect_success 'index-pack can verify reverse indexes' '
test_when_finished "rm -f $rev" &&
test_index_pack true &&
test_path_is_file $rev &&
git index-pack --rev-index --verify $packdir/pack-$pack.pack &&
# Intentionally corrupt the reverse index.
chmod u+w $rev &&
printf "xxxx" | dd of=$rev bs=1 count=4 conv=notrunc &&
test_must_fail git index-pack --rev-index --verify \
$packdir/pack-$pack.pack 2>err &&
grep "validation error" err
'
test_expect_success 'index-pack infers reverse index name with -o' '
git index-pack --rev-index -o other.idx $packdir/pack-$pack.pack &&
test_path_is_file other.idx &&
test_path_is_file other.rev
'
test_expect_success 'pack-objects respects pack.writeReverseIndex' '
test_when_finished "rm -fr pack-1-*" &&
git -c pack.writeReverseIndex= pack-objects --all pack-1 &&
test_path_is_missing pack-1-*.rev &&
git -c pack.writeReverseIndex=false pack-objects --all pack-1 &&
test_path_is_missing pack-1-*.rev &&
git -c pack.writeReverseIndex=true pack-objects --all pack-1 &&
test_path_is_file pack-1-*.rev
'
test_expect_success 'reverse index is not generated when available on disk' '
test_index_pack true &&
test_path_is_file $rev &&
git rev-parse HEAD >tip &&
GIT_TEST_REV_INDEX_DIE_IN_MEMORY=1 git cat-file \
--batch-check="%(objectsize:disk)" <tip
'
test_expect_success 'revindex in-memory vs on-disk' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit commit &&
git rev-list --objects --no-object-names --all >objects &&
git -c pack.writeReverseIndex=false repack -ad &&
test_path_is_missing $packdir/pack-*.rev &&
git cat-file --batch-check="%(objectsize:disk) %(objectname)" \
<objects >in-core &&
git -c pack.writeReverseIndex=true repack -ad &&
test_path_is_file $packdir/pack-*.rev &&
git cat-file --batch-check="%(objectsize:disk) %(objectname)" \
<objects >on-disk &&
test_cmp on-disk in-core
)
'
test_done

View File

@ -329,7 +329,7 @@ test_expect_success SYMLINKS 'clone repo with symlinked or unknown files at obje
for raw in $(ls T*.raw) for raw in $(ls T*.raw)
do do
sed -e "s!/../!/Y/!; s![0-9a-f]\{38,\}!Z!" -e "/commit-graph/d" \ sed -e "s!/../!/Y/!; s![0-9a-f]\{38,\}!Z!" -e "/commit-graph/d" \
-e "/multi-pack-index/d" <$raw >$raw.de-sha-1 && -e "/multi-pack-index/d" -e "/rev/d" <$raw >$raw.de-sha-1 &&
sort $raw.de-sha-1 >$raw.de-sha || return 1 sort $raw.de-sha-1 >$raw.de-sha || return 1
done && done &&

View File

@ -851,8 +851,10 @@ test_expect_success 'part of packfile response provided as URI' '
test -f h2found && test -f h2found &&
# Ensure that there are exactly 6 files (3 .pack and 3 .idx). # Ensure that there are exactly 6 files (3 .pack and 3 .idx).
ls http_child/.git/objects/pack/* >filelist && ls http_child/.git/objects/pack/*.pack >packlist &&
test_line_count = 6 filelist ls http_child/.git/objects/pack/*.idx >idxlist &&
test_line_count = 3 idxlist &&
test_line_count = 3 packlist
' '
test_expect_success 'fetching with valid packfile URI but invalid hash fails' ' test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
@ -905,8 +907,10 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
clone "$HTTPD_URL/smart/http_parent" http_child && clone "$HTTPD_URL/smart/http_parent" http_child &&
# Ensure that there are exactly 4 files (2 .pack and 2 .idx). # Ensure that there are exactly 4 files (2 .pack and 2 .idx).
ls http_child/.git/objects/pack/* >filelist && ls http_child/.git/objects/pack/*.pack >packlist &&
test_line_count = 4 filelist ls http_child/.git/objects/pack/*.idx >idxlist &&
test_line_count = 2 idxlist &&
test_line_count = 2 packlist
' '
test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' ' test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' '

View File

@ -106,17 +106,17 @@ test_expect_success 'auto gc with too many loose objects does not attempt to cre
test_commit "$(test_oid obj2)" && test_commit "$(test_oid obj2)" &&
# Our first gc will create a pack; our second will create a second pack # Our first gc will create a pack; our second will create a second pack
git gc --auto && git gc --auto &&
ls .git/objects/pack | sort >existing_packs && ls .git/objects/pack/pack-*.pack | sort >existing_packs &&
test_commit "$(test_oid obj3)" && test_commit "$(test_oid obj3)" &&
test_commit "$(test_oid obj4)" && test_commit "$(test_oid obj4)" &&
git gc --auto 2>err && git gc --auto 2>err &&
test_i18ngrep ! "^warning:" err && test_i18ngrep ! "^warning:" err &&
ls .git/objects/pack/ | sort >post_packs && ls .git/objects/pack/pack-*.pack | sort >post_packs &&
comm -1 -3 existing_packs post_packs >new && comm -1 -3 existing_packs post_packs >new &&
comm -2 -3 existing_packs post_packs >del && comm -2 -3 existing_packs post_packs >del &&
test_line_count = 0 del && # No packs are deleted test_line_count = 0 del && # No packs are deleted
test_line_count = 2 new # There is one new pack and its .idx test_line_count = 1 new # There is one new pack
' '
test_expect_success 'gc --no-quiet' ' test_expect_success 'gc --no-quiet' '

View File

@ -1632,7 +1632,10 @@ test_expect_success 'O: blank lines not necessary after other commands' '
INPUT_END INPUT_END
git fast-import <input && git fast-import <input &&
test 8 = $(find .git/objects/pack -type f | grep -v multi-pack-index | wc -l) && ls -la .git/objects/pack/pack-*.pack >packlist &&
ls -la .git/objects/pack/pack-*.pack >idxlist &&
test_line_count = 4 idxlist &&
test_line_count = 4 packlist &&
test $(git rev-parse refs/tags/O3-2nd) = $(git rev-parse O3^) && test $(git rev-parse refs/tags/O3-2nd) = $(git rev-parse O3^) &&
git log --reverse --pretty=oneline O3 | sed s/^.*z// >actual && git log --reverse --pretty=oneline O3 | sed s/^.*z// >actual &&
test_cmp expect actual test_cmp expect actual

View File

@ -185,9 +185,11 @@ static int pack_copy_priority(const char *name)
return 1; return 1;
if (ends_with(name, ".pack")) if (ends_with(name, ".pack"))
return 2; return 2;
if (ends_with(name, ".idx")) if (ends_with(name, ".rev"))
return 3; return 3;
return 4; if (ends_with(name, ".idx"))
return 4;
return 5;
} }
static int pack_copy_cmp(const char *a, const char *b) static int pack_copy_cmp(const char *a, const char *b)