Merge branch 'jk/oid-array-cleanup'

Code clean-up.

* jk/oid-array-cleanup:
  commit-graph: use size_t for array allocation and indexing
  commit-graph: replace packed_oid_list with oid_array
  commit-graph: drop count_distinct_commits() function
  oid-array: provide a for-loop iterator
  oid-array: make sort function public
  cache.h: move hash/oid functions to hash.h
  t0064: make duplicate tests more robust
  t0064: drop sha1 mention from filename
  oid-array.h: drop sha1 mention from header guard
This commit is contained in:
Junio C Hamano 2020-12-17 15:06:40 -08:00
commit e5ace7167a
6 changed files with 157 additions and 199 deletions

94
cache.h
View File

@ -1123,100 +1123,6 @@ const char *repo_find_unique_abbrev(struct repository *r, const struct object_id
int repo_find_unique_abbrev_r(struct repository *r, char *hex, const struct object_id *oid, int len);
#define find_unique_abbrev_r(hex, oid, len) repo_find_unique_abbrev_r(the_repository, hex, oid, len)
extern const struct object_id null_oid;
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
{
/*
* Teach the compiler that there are only two possibilities of hash size
* here, so that it can optimize for this case as much as possible.
*/
if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
return memcmp(sha1, sha2, GIT_MAX_RAWSZ);
return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
}
static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
{
return hashcmp(oid1->hash, oid2->hash);
}
static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
{
/*
* We write this here instead of deferring to hashcmp so that the
* compiler can properly inline it and avoid calling memcmp.
*/
if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
return !memcmp(sha1, sha2, GIT_MAX_RAWSZ);
return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
}
static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
{
return hasheq(oid1->hash, oid2->hash);
}
static inline int is_null_oid(const struct object_id *oid)
{
return oideq(oid, &null_oid);
}
static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
{
memcpy(sha_dst, sha_src, the_hash_algo->rawsz);
}
static inline void oidcpy(struct object_id *dst, const struct object_id *src)
{
memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ);
}
static inline struct object_id *oiddup(const struct object_id *src)
{
struct object_id *dst = xmalloc(sizeof(struct object_id));
oidcpy(dst, src);
return dst;
}
static inline void hashclr(unsigned char *hash)
{
memset(hash, 0, the_hash_algo->rawsz);
}
static inline void oidclr(struct object_id *oid)
{
memset(oid->hash, 0, GIT_MAX_RAWSZ);
}
static inline void oidread(struct object_id *oid, const unsigned char *hash)
{
memcpy(oid->hash, hash, the_hash_algo->rawsz);
}
static inline int is_empty_blob_sha1(const unsigned char *sha1)
{
return hasheq(sha1, the_hash_algo->empty_blob->hash);
}
static inline int is_empty_blob_oid(const struct object_id *oid)
{
return oideq(oid, the_hash_algo->empty_blob);
}
static inline int is_empty_tree_sha1(const unsigned char *sha1)
{
return hasheq(sha1, the_hash_algo->empty_tree->hash);
}
static inline int is_empty_tree_oid(const struct object_id *oid)
{
return oideq(oid, the_hash_algo->empty_tree);
}
const char *empty_tree_oid_hex(void);
const char *empty_blob_oid_hex(void);
/* set default permissions by passing mode arguments to open(2) */
int git_mkstemps_mode(char *pattern, int suffix_len, int mode);
int git_mkstemp_mode(char *pattern, int mode);

View File

@ -932,21 +932,15 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit
struct packed_commit_list {
struct commit **list;
int nr;
int alloc;
};
struct packed_oid_list {
struct object_id *list;
int nr;
int alloc;
size_t nr;
size_t alloc;
};
struct write_commit_graph_context {
struct repository *r;
struct object_directory *odb;
char *graph_name;
struct packed_oid_list oids;
struct oid_array oids;
struct packed_commit_list commits;
int num_extra_edges;
unsigned long approx_nr_objects;
@ -1240,13 +1234,6 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
return 0;
}
static int oid_compare(const void *_a, const void *_b)
{
const struct object_id *a = (const struct object_id *)_a;
const struct object_id *b = (const struct object_id *)_b;
return oidcmp(a, b);
}
static int add_packed_commits(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
@ -1267,10 +1254,7 @@ static int add_packed_commits(const struct object_id *oid,
if (type != OBJ_COMMIT)
return 0;
ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
oidcpy(&(ctx->oids.list[ctx->oids.nr]), oid);
ctx->oids.nr++;
oid_array_append(&ctx->oids, oid);
set_commit_pos(ctx->r, oid);
return 0;
@ -1281,9 +1265,7 @@ static void add_missing_parents(struct write_commit_graph_context *ctx, struct c
struct commit_list *parent;
for (parent = commit->parents; parent; parent = parent->next) {
if (!(parent->item->object.flags & REACHABLE)) {
ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
oidcpy(&ctx->oids.list[ctx->oids.nr], &(parent->item->object.oid));
ctx->oids.nr++;
oid_array_append(&ctx->oids, &parent->item->object.oid);
parent->item->object.flags |= REACHABLE;
}
}
@ -1302,7 +1284,7 @@ static void close_reachable(struct write_commit_graph_context *ctx)
ctx->oids.nr);
for (i = 0; i < ctx->oids.nr; i++) {
display_progress(ctx->progress, i + 1);
commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
if (commit)
commit->object.flags |= REACHABLE;
}
@ -1319,7 +1301,7 @@ static void close_reachable(struct write_commit_graph_context *ctx)
0);
for (i = 0; i < ctx->oids.nr; i++) {
display_progress(ctx->progress, i + 1);
commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
if (!commit)
continue;
@ -1339,7 +1321,7 @@ static void close_reachable(struct write_commit_graph_context *ctx)
ctx->oids.nr);
for (i = 0; i < ctx->oids.nr; i++) {
display_progress(ctx->progress, i + 1);
commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
if (commit)
commit->object.flags &= ~REACHABLE;
@ -1567,9 +1549,7 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
oidset_iter_init(commits, &iter);
while ((oid = oidset_iter_next(&iter))) {
ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
oidcpy(&ctx->oids.list[ctx->oids.nr], oid);
ctx->oids.nr++;
oid_array_append(&ctx->oids, oid);
}
return 0;
@ -1588,35 +1568,6 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
stop_progress(&ctx->progress);
}
static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
{
uint32_t i, count_distinct = 1;
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
_("Counting distinct commits in commit graph"),
ctx->oids.nr);
display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
QSORT(ctx->oids.list, ctx->oids.nr, oid_compare);
for (i = 1; i < ctx->oids.nr; i++) {
display_progress(ctx->progress, i + 1);
if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) {
if (ctx->split) {
struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]);
if (!c || commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH)
continue;
}
count_distinct++;
}
}
stop_progress(&ctx->progress);
return count_distinct;
}
static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
{
uint32_t i;
@ -1628,15 +1579,14 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding extra edges in commit graph"),
ctx->oids.nr);
for (i = 0; i < ctx->oids.nr; i++) {
oid_array_sort(&ctx->oids);
for (i = 0; i < ctx->oids.nr; i = oid_array_next_unique(&ctx->oids, i)) {
unsigned int num_parents;
display_progress(ctx->progress, i + 1);
if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
continue;
ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]);
ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.oid[i]);
if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE &&
commit_graph_position(ctx->commits.list[ctx->commits.nr]) != COMMIT_NOT_FROM_GRAPH)
@ -2155,7 +2105,7 @@ int write_commit_graph(struct object_directory *odb,
const struct commit_graph_opts *opts)
{
struct write_commit_graph_context *ctx;
uint32_t i, count_distinct = 0;
uint32_t i;
int res = 0;
int replace = 0;
struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@ -2227,26 +2177,16 @@ int write_commit_graph(struct object_directory *odb,
}
ctx->approx_nr_objects = approximate_object_count();
ctx->oids.alloc = ctx->approx_nr_objects / 32;
if (ctx->split && opts && ctx->oids.alloc > opts->max_commits)
ctx->oids.alloc = opts->max_commits;
if (ctx->append) {
if (ctx->append)
prepare_commit_graph_one(ctx->r, ctx->odb);
if (ctx->r->objects->commit_graph)
ctx->oids.alloc += ctx->r->objects->commit_graph->num_commits;
}
if (ctx->oids.alloc < 1024)
ctx->oids.alloc = 1024;
ALLOC_ARRAY(ctx->oids.list, ctx->oids.alloc);
if (ctx->append && ctx->r->objects->commit_graph) {
struct commit_graph *g = ctx->r->objects->commit_graph;
for (i = 0; i < g->num_commits; i++) {
const unsigned char *hash = g->chunk_oid_lookup + g->hash_len * i;
hashcpy(ctx->oids.list[ctx->oids.nr++].hash, hash);
struct object_id oid;
hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * i);
oid_array_append(&ctx->oids, &oid);
}
}
@ -2268,17 +2208,6 @@ int write_commit_graph(struct object_directory *odb,
close_reachable(ctx);
count_distinct = count_distinct_commits(ctx);
if (count_distinct >= GRAPH_EDGE_LAST_MASK) {
error(_("the commit graph format cannot write %d commits"), count_distinct);
res = -1;
goto cleanup;
}
ctx->commits.alloc = count_distinct;
ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc);
copy_oids_to_commits(ctx);
if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) {
@ -2313,7 +2242,7 @@ int write_commit_graph(struct object_directory *odb,
cleanup:
free(ctx->graph_name);
free(ctx->commits.list);
free(ctx->oids.list);
oid_array_clear(&ctx->oids);
if (ctx->commit_graph_filenames_after) {
for (i = 0; i < ctx->num_commit_graphs_after; i++) {

95
hash.h
View File

@ -2,6 +2,7 @@
#define HASH_H
#include "git-compat-util.h"
#include "repository.h"
#if defined(SHA1_PPC)
#include "ppc/sha1.h"
@ -184,4 +185,98 @@ struct object_id {
#define the_hash_algo the_repository->hash_algo
extern const struct object_id null_oid;
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
{
/*
* Teach the compiler that there are only two possibilities of hash size
* here, so that it can optimize for this case as much as possible.
*/
if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
return memcmp(sha1, sha2, GIT_MAX_RAWSZ);
return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
}
static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
{
return hashcmp(oid1->hash, oid2->hash);
}
static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
{
/*
* We write this here instead of deferring to hashcmp so that the
* compiler can properly inline it and avoid calling memcmp.
*/
if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
return !memcmp(sha1, sha2, GIT_MAX_RAWSZ);
return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
}
static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
{
return hasheq(oid1->hash, oid2->hash);
}
static inline int is_null_oid(const struct object_id *oid)
{
return oideq(oid, &null_oid);
}
static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
{
memcpy(sha_dst, sha_src, the_hash_algo->rawsz);
}
static inline void oidcpy(struct object_id *dst, const struct object_id *src)
{
memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ);
}
static inline struct object_id *oiddup(const struct object_id *src)
{
struct object_id *dst = xmalloc(sizeof(struct object_id));
oidcpy(dst, src);
return dst;
}
static inline void hashclr(unsigned char *hash)
{
memset(hash, 0, the_hash_algo->rawsz);
}
static inline void oidclr(struct object_id *oid)
{
memset(oid->hash, 0, GIT_MAX_RAWSZ);
}
static inline void oidread(struct object_id *oid, const unsigned char *hash)
{
memcpy(oid->hash, hash, the_hash_algo->rawsz);
}
static inline int is_empty_blob_sha1(const unsigned char *sha1)
{
return hasheq(sha1, the_hash_algo->empty_blob->hash);
}
static inline int is_empty_blob_oid(const struct object_id *oid)
{
return oideq(oid, the_hash_algo->empty_blob);
}
static inline int is_empty_tree_sha1(const unsigned char *sha1)
{
return hasheq(sha1, the_hash_algo->empty_tree->hash);
}
static inline int is_empty_tree_oid(const struct object_id *oid)
{
return oideq(oid, the_hash_algo->empty_tree);
}
const char *empty_tree_oid_hex(void);
const char *empty_blob_oid_hex(void);
#endif

View File

@ -14,8 +14,10 @@ static int void_hashcmp(const void *a, const void *b)
return oidcmp(a, b);
}
static void oid_array_sort(struct oid_array *array)
void oid_array_sort(struct oid_array *array)
{
if (array->sorted)
return;
QSORT(array->oid, array->nr, void_hashcmp);
array->sorted = 1;
}
@ -28,8 +30,7 @@ static const unsigned char *sha1_access(size_t index, void *table)
int oid_array_lookup(struct oid_array *array, const struct object_id *oid)
{
if (!array->sorted)
oid_array_sort(array);
oid_array_sort(array);
return sha1_pos(oid->hash, array->oid, array->nr, sha1_access);
}
@ -64,14 +65,10 @@ int oid_array_for_each_unique(struct oid_array *array,
{
size_t i;
if (!array->sorted)
oid_array_sort(array);
oid_array_sort(array);
for (i = 0; i < array->nr; i++) {
int ret;
if (i > 0 && oideq(array->oid + i, array->oid + i - 1))
continue;
ret = fn(array->oid + i, data);
for (i = 0; i < array->nr; i = oid_array_next_unique(array, i)) {
int ret = fn(array->oid + i, data);
if (ret)
return ret;
}

View File

@ -1,5 +1,7 @@
#ifndef SHA1_ARRAY_H
#define SHA1_ARRAY_H
#ifndef OID_ARRAY_H
#define OID_ARRAY_H
#include "hash.h"
/**
* The API provides storage and manipulation of sets of object identifiers.
@ -106,4 +108,30 @@ void oid_array_filter(struct oid_array *array,
for_each_oid_fn want,
void *cbdata);
#endif /* SHA1_ARRAY_H */
/**
* Sort the array in order of ascending object id.
*/
void oid_array_sort(struct oid_array *array);
/**
* Find the next unique oid in the array after position "cur".
* The array must be sorted for this to work. You can iterate
* over unique elements like this:
*
* size_t i;
* oid_array_sort(array);
* for (i = 0; i < array->nr; i = oid_array_next_unique(array, i))
* printf("%s", oid_to_hex(array->oids[i]);
*
* Non-unique iteration can just increment with "i++" to visit each element.
*/
static inline size_t oid_array_next_unique(struct oid_array *array, size_t cur)
{
do {
cur++;
} while (cur < array->nr &&
oideq(array->oid + cur, array->oid + cur - 1));
return cur;
}
#endif /* OID_ARRAY_H */

View File

@ -1,6 +1,6 @@
#!/bin/sh
test_description='basic tests for the SHA1 array implementation'
test_description='basic tests for the oid array implementation'
. ./test-lib.sh
echoid () {
@ -25,6 +25,7 @@ test_expect_success 'ordered enumeration' '
test_expect_success 'ordered enumeration with duplicate suppression' '
echoid "" 44 55 88 aa >expect &&
{
echoid append 88 44 aa 55 &&
echoid append 88 44 aa 55 &&
echoid append 88 44 aa 55 &&
echo for_each_unique
@ -52,17 +53,19 @@ test_expect_success 'lookup non-existing entry' '
test_expect_success 'lookup with duplicates' '
{
echoid append 88 44 aa 55 &&
echoid append 88 44 aa 55 &&
echoid append 88 44 aa 55 &&
echoid lookup 55
} | test-tool oid-array >actual &&
n=$(cat actual) &&
test "$n" -ge 2 &&
test "$n" -le 3
test "$n" -ge 3 &&
test "$n" -le 5
'
test_expect_success 'lookup non-existing entry with duplicates' '
{
echoid append 88 44 aa 55 &&
echoid append 88 44 aa 55 &&
echoid append 88 44 aa 55 &&
echoid lookup 66