reftable: (de)serialization for the polymorphic record type.

The reftable format is structured as a sequence of blocks, and each block
contains a sequence of prefix-compressed key-value records. There are 4 types of
records, and they have similarities in how they must be handled. This is
achieved by introducing a polymorphic 'record' type that encapsulates ref, log,
index and object records.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Han-Wen Nienhuys 2021-10-07 20:25:02 +00:00 committed by Junio C Hamano
parent 1214aa841b
commit e303bf22f9
7 changed files with 1901 additions and 1 deletions

View File

@ -2453,7 +2453,9 @@ REFTABLE_OBJS += reftable/basics.o
REFTABLE_OBJS += reftable/error.o REFTABLE_OBJS += reftable/error.o
REFTABLE_OBJS += reftable/blocksource.o REFTABLE_OBJS += reftable/blocksource.o
REFTABLE_OBJS += reftable/publicbasics.o REFTABLE_OBJS += reftable/publicbasics.o
REFTABLE_OBJS += reftable/record.o
REFTABLE_TEST_OBJS += reftable/record_test.o
REFTABLE_TEST_OBJS += reftable/test_framework.o REFTABLE_TEST_OBJS += reftable/test_framework.o
REFTABLE_TEST_OBJS += reftable/basics_test.o REFTABLE_TEST_OBJS += reftable/basics_test.o

21
reftable/constants.h Normal file
View File

@ -0,0 +1,21 @@
/*
Copyright 2020 Google LLC
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file or at
https://developers.google.com/open-source/licenses/bsd
*/
#ifndef CONSTANTS_H
#define CONSTANTS_H
#define BLOCK_TYPE_LOG 'g'
#define BLOCK_TYPE_INDEX 'i'
#define BLOCK_TYPE_REF 'r'
#define BLOCK_TYPE_OBJ 'o'
#define BLOCK_TYPE_ANY 0
#define MAX_RESTARTS ((1 << 16) - 1)
#define DEFAULT_BLOCK_SIZE 4096
#endif

1212
reftable/record.c Normal file

File diff suppressed because it is too large Load Diff

139
reftable/record.h Normal file
View File

@ -0,0 +1,139 @@
/*
Copyright 2020 Google LLC
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file or at
https://developers.google.com/open-source/licenses/bsd
*/
#ifndef RECORD_H
#define RECORD_H
#include "system.h"
#include <stdint.h>
#include "reftable-record.h"
/*
* A substring of existing string data. This structure takes no responsibility
* for the lifetime of the data it points to.
*/
struct string_view {
uint8_t *buf;
size_t len;
};
/* Advance `s.buf` by `n`, and decrease length. */
void string_view_consume(struct string_view *s, int n);
/* utilities for de/encoding varints */
int get_var_int(uint64_t *dest, struct string_view *in);
int put_var_int(struct string_view *dest, uint64_t val);
/* Methods for records. */
struct reftable_record_vtable {
/* encode the key of to a uint8_t strbuf. */
void (*key)(const void *rec, struct strbuf *dest);
/* The record type of ('r' for ref). */
uint8_t type;
void (*copy_from)(void *dest, const void *src, int hash_size);
/* a value of [0..7], indicating record subvariants (eg. ref vs. symref
* vs ref deletion) */
uint8_t (*val_type)(const void *rec);
/* encodes rec into dest, returning how much space was used. */
int (*encode)(const void *rec, struct string_view dest, int hash_size);
/* decode data from `src` into the record. */
int (*decode)(void *rec, struct strbuf key, uint8_t extra,
struct string_view src, int hash_size);
/* deallocate and null the record. */
void (*release)(void *rec);
/* is this a tombstone? */
int (*is_deletion)(const void *rec);
};
/* record is a generic wrapper for different types of records. */
struct reftable_record {
void *data;
struct reftable_record_vtable *ops;
};
/* returns true for recognized block types. Block start with the block type. */
int reftable_is_block_type(uint8_t typ);
/* creates a malloced record of the given type. Dispose with record_destroy */
struct reftable_record reftable_new_record(uint8_t typ);
/* Encode `key` into `dest`. Sets `is_restart` to indicate a restart. Returns
* number of bytes written. */
int reftable_encode_key(int *is_restart, struct string_view dest,
struct strbuf prev_key, struct strbuf key,
uint8_t extra);
/* Decode into `key` and `extra` from `in` */
int reftable_decode_key(struct strbuf *key, uint8_t *extra,
struct strbuf last_key, struct string_view in);
/* reftable_index_record are used internally to speed up lookups. */
struct reftable_index_record {
uint64_t offset; /* Offset of block */
struct strbuf last_key; /* Last key of the block. */
};
/* reftable_obj_record stores an object ID => ref mapping. */
struct reftable_obj_record {
uint8_t *hash_prefix; /* leading bytes of the object ID */
int hash_prefix_len; /* number of leading bytes. Constant
* across a single table. */
uint64_t *offsets; /* a vector of file offsets. */
int offset_len;
};
/* see struct record_vtable */
void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);
uint8_t reftable_record_type(struct reftable_record *rec);
void reftable_record_copy_from(struct reftable_record *rec,
struct reftable_record *src, int hash_size);
uint8_t reftable_record_val_type(struct reftable_record *rec);
int reftable_record_encode(struct reftable_record *rec, struct string_view dest,
int hash_size);
int reftable_record_decode(struct reftable_record *rec, struct strbuf key,
uint8_t extra, struct string_view src,
int hash_size);
int reftable_record_is_deletion(struct reftable_record *rec);
/* zeroes out the embedded record */
void reftable_record_release(struct reftable_record *rec);
/* clear and deallocate embedded record, and zero `rec`. */
void reftable_record_destroy(struct reftable_record *rec);
/* initialize generic records from concrete records. The generic record should
* be zeroed out. */
void reftable_record_from_obj(struct reftable_record *rec,
struct reftable_obj_record *objrec);
void reftable_record_from_index(struct reftable_record *rec,
struct reftable_index_record *idxrec);
void reftable_record_from_ref(struct reftable_record *rec,
struct reftable_ref_record *refrec);
void reftable_record_from_log(struct reftable_record *rec,
struct reftable_log_record *logrec);
struct reftable_ref_record *reftable_record_as_ref(struct reftable_record *ref);
struct reftable_log_record *reftable_record_as_log(struct reftable_record *ref);
/* for qsort. */
int reftable_ref_record_compare_name(const void *a, const void *b);
/* for qsort. */
int reftable_log_record_compare_key(const void *a, const void *b);
#endif

412
reftable/record_test.c Normal file
View File

@ -0,0 +1,412 @@
/*
Copyright 2020 Google LLC
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file or at
https://developers.google.com/open-source/licenses/bsd
*/
#include "record.h"
#include "system.h"
#include "basics.h"
#include "constants.h"
#include "test_framework.h"
#include "reftable-tests.h"
static void test_copy(struct reftable_record *rec)
{
struct reftable_record copy =
reftable_new_record(reftable_record_type(rec));
reftable_record_copy_from(&copy, rec, GIT_SHA1_RAWSZ);
/* do it twice to catch memory leaks */
reftable_record_copy_from(&copy, rec, GIT_SHA1_RAWSZ);
switch (reftable_record_type(&copy)) {
case BLOCK_TYPE_REF:
EXPECT(reftable_ref_record_equal(reftable_record_as_ref(&copy),
reftable_record_as_ref(rec),
GIT_SHA1_RAWSZ));
break;
case BLOCK_TYPE_LOG:
EXPECT(reftable_log_record_equal(reftable_record_as_log(&copy),
reftable_record_as_log(rec),
GIT_SHA1_RAWSZ));
break;
}
reftable_record_destroy(&copy);
}
static void test_varint_roundtrip(void)
{
uint64_t inputs[] = { 0,
1,
27,
127,
128,
257,
4096,
((uint64_t)1 << 63),
((uint64_t)1 << 63) + ((uint64_t)1 << 63) - 1 };
int i = 0;
for (i = 0; i < ARRAY_SIZE(inputs); i++) {
uint8_t dest[10];
struct string_view out = {
.buf = dest,
.len = sizeof(dest),
};
uint64_t in = inputs[i];
int n = put_var_int(&out, in);
uint64_t got = 0;
EXPECT(n > 0);
out.len = n;
n = get_var_int(&got, &out);
EXPECT(n > 0);
EXPECT(got == in);
}
}
static void test_common_prefix(void)
{
struct {
const char *a, *b;
int want;
} cases[] = {
{ "abc", "ab", 2 },
{ "", "abc", 0 },
{ "abc", "abd", 2 },
{ "abc", "pqr", 0 },
};
int i = 0;
for (i = 0; i < ARRAY_SIZE(cases); i++) {
struct strbuf a = STRBUF_INIT;
struct strbuf b = STRBUF_INIT;
strbuf_addstr(&a, cases[i].a);
strbuf_addstr(&b, cases[i].b);
EXPECT(common_prefix_size(&a, &b) == cases[i].want);
strbuf_release(&a);
strbuf_release(&b);
}
}
static void set_hash(uint8_t *h, int j)
{
int i = 0;
for (i = 0; i < hash_size(GIT_SHA1_FORMAT_ID); i++) {
h[i] = (j >> i) & 0xff;
}
}
static void test_reftable_ref_record_roundtrip(void)
{
int i = 0;
for (i = REFTABLE_REF_DELETION; i < REFTABLE_NR_REF_VALUETYPES; i++) {
struct reftable_ref_record in = { NULL };
struct reftable_ref_record out = { NULL };
struct reftable_record rec_out = { NULL };
struct strbuf key = STRBUF_INIT;
struct reftable_record rec = { NULL };
uint8_t buffer[1024] = { 0 };
struct string_view dest = {
.buf = buffer,
.len = sizeof(buffer),
};
int n, m;
in.value_type = i;
switch (i) {
case REFTABLE_REF_DELETION:
break;
case REFTABLE_REF_VAL1:
in.value.val1 = reftable_malloc(GIT_SHA1_RAWSZ);
set_hash(in.value.val1, 1);
break;
case REFTABLE_REF_VAL2:
in.value.val2.value = reftable_malloc(GIT_SHA1_RAWSZ);
set_hash(in.value.val2.value, 1);
in.value.val2.target_value =
reftable_malloc(GIT_SHA1_RAWSZ);
set_hash(in.value.val2.target_value, 2);
break;
case REFTABLE_REF_SYMREF:
in.value.symref = xstrdup("target");
break;
}
in.refname = xstrdup("refs/heads/master");
reftable_record_from_ref(&rec, &in);
test_copy(&rec);
EXPECT(reftable_record_val_type(&rec) == i);
reftable_record_key(&rec, &key);
n = reftable_record_encode(&rec, dest, GIT_SHA1_RAWSZ);
EXPECT(n > 0);
/* decode into a non-zero reftable_record to test for leaks. */
reftable_record_from_ref(&rec_out, &out);
m = reftable_record_decode(&rec_out, key, i, dest,
GIT_SHA1_RAWSZ);
EXPECT(n == m);
EXPECT(reftable_ref_record_equal(&in, &out, GIT_SHA1_RAWSZ));
reftable_record_release(&rec_out);
strbuf_release(&key);
reftable_ref_record_release(&in);
}
}
static void test_reftable_log_record_equal(void)
{
struct reftable_log_record in[2] = {
{
.refname = xstrdup("refs/heads/master"),
.update_index = 42,
},
{
.refname = xstrdup("refs/heads/master"),
.update_index = 22,
}
};
EXPECT(!reftable_log_record_equal(&in[0], &in[1], GIT_SHA1_RAWSZ));
in[1].update_index = in[0].update_index;
EXPECT(reftable_log_record_equal(&in[0], &in[1], GIT_SHA1_RAWSZ));
reftable_log_record_release(&in[0]);
reftable_log_record_release(&in[1]);
}
static void test_reftable_log_record_roundtrip(void)
{
int i;
struct reftable_log_record in[2] = {
{
.refname = xstrdup("refs/heads/master"),
.update_index = 42,
.value_type = REFTABLE_LOG_UPDATE,
.value = {
.update = {
.old_hash = reftable_malloc(GIT_SHA1_RAWSZ),
.new_hash = reftable_malloc(GIT_SHA1_RAWSZ),
.name = xstrdup("han-wen"),
.email = xstrdup("hanwen@google.com"),
.message = xstrdup("test"),
.time = 1577123507,
.tz_offset = 100,
},
}
},
{
.refname = xstrdup("refs/heads/master"),
.update_index = 22,
.value_type = REFTABLE_LOG_DELETION,
}
};
set_test_hash(in[0].value.update.new_hash, 1);
set_test_hash(in[0].value.update.old_hash, 2);
for (i = 0; i < ARRAY_SIZE(in); i++) {
struct reftable_record rec = { NULL };
struct strbuf key = STRBUF_INIT;
uint8_t buffer[1024] = { 0 };
struct string_view dest = {
.buf = buffer,
.len = sizeof(buffer),
};
/* populate out, to check for leaks. */
struct reftable_log_record out = {
.refname = xstrdup("old name"),
.value_type = REFTABLE_LOG_UPDATE,
.value = {
.update = {
.new_hash = reftable_calloc(GIT_SHA1_RAWSZ),
.old_hash = reftable_calloc(GIT_SHA1_RAWSZ),
.name = xstrdup("old name"),
.email = xstrdup("old@email"),
.message = xstrdup("old message"),
},
},
};
struct reftable_record rec_out = { NULL };
int n, m, valtype;
reftable_record_from_log(&rec, &in[i]);
test_copy(&rec);
reftable_record_key(&rec, &key);
n = reftable_record_encode(&rec, dest, GIT_SHA1_RAWSZ);
EXPECT(n >= 0);
reftable_record_from_log(&rec_out, &out);
valtype = reftable_record_val_type(&rec);
m = reftable_record_decode(&rec_out, key, valtype, dest,
GIT_SHA1_RAWSZ);
EXPECT(n == m);
EXPECT(reftable_log_record_equal(&in[i], &out, GIT_SHA1_RAWSZ));
reftable_log_record_release(&in[i]);
strbuf_release(&key);
reftable_record_release(&rec_out);
}
}
static void test_u24_roundtrip(void)
{
uint32_t in = 0x112233;
uint8_t dest[3];
uint32_t out;
put_be24(dest, in);
out = get_be24(dest);
EXPECT(in == out);
}
static void test_key_roundtrip(void)
{
uint8_t buffer[1024] = { 0 };
struct string_view dest = {
.buf = buffer,
.len = sizeof(buffer),
};
struct strbuf last_key = STRBUF_INIT;
struct strbuf key = STRBUF_INIT;
struct strbuf roundtrip = STRBUF_INIT;
int restart;
uint8_t extra;
int n, m;
uint8_t rt_extra;
strbuf_addstr(&last_key, "refs/heads/master");
strbuf_addstr(&key, "refs/tags/bla");
extra = 6;
n = reftable_encode_key(&restart, dest, last_key, key, extra);
EXPECT(!restart);
EXPECT(n > 0);
m = reftable_decode_key(&roundtrip, &rt_extra, last_key, dest);
EXPECT(n == m);
EXPECT(0 == strbuf_cmp(&key, &roundtrip));
EXPECT(rt_extra == extra);
strbuf_release(&last_key);
strbuf_release(&key);
strbuf_release(&roundtrip);
}
static void test_reftable_obj_record_roundtrip(void)
{
uint8_t testHash1[GIT_SHA1_RAWSZ] = { 1, 2, 3, 4, 0 };
uint64_t till9[] = { 1, 2, 3, 4, 500, 600, 700, 800, 9000 };
struct reftable_obj_record recs[3] = { {
.hash_prefix = testHash1,
.hash_prefix_len = 5,
.offsets = till9,
.offset_len = 3,
},
{
.hash_prefix = testHash1,
.hash_prefix_len = 5,
.offsets = till9,
.offset_len = 9,
},
{
.hash_prefix = testHash1,
.hash_prefix_len = 5,
} };
int i = 0;
for (i = 0; i < ARRAY_SIZE(recs); i++) {
struct reftable_obj_record in = recs[i];
uint8_t buffer[1024] = { 0 };
struct string_view dest = {
.buf = buffer,
.len = sizeof(buffer),
};
struct reftable_record rec = { NULL };
struct strbuf key = STRBUF_INIT;
struct reftable_obj_record out = { NULL };
struct reftable_record rec_out = { NULL };
int n, m;
uint8_t extra;
reftable_record_from_obj(&rec, &in);
test_copy(&rec);
reftable_record_key(&rec, &key);
n = reftable_record_encode(&rec, dest, GIT_SHA1_RAWSZ);
EXPECT(n > 0);
extra = reftable_record_val_type(&rec);
reftable_record_from_obj(&rec_out, &out);
m = reftable_record_decode(&rec_out, key, extra, dest,
GIT_SHA1_RAWSZ);
EXPECT(n == m);
EXPECT(in.hash_prefix_len == out.hash_prefix_len);
EXPECT(in.offset_len == out.offset_len);
EXPECT(!memcmp(in.hash_prefix, out.hash_prefix,
in.hash_prefix_len));
EXPECT(0 == memcmp(in.offsets, out.offsets,
sizeof(uint64_t) * in.offset_len));
strbuf_release(&key);
reftable_record_release(&rec_out);
}
}
static void test_reftable_index_record_roundtrip(void)
{
struct reftable_index_record in = {
.offset = 42,
.last_key = STRBUF_INIT,
};
uint8_t buffer[1024] = { 0 };
struct string_view dest = {
.buf = buffer,
.len = sizeof(buffer),
};
struct strbuf key = STRBUF_INIT;
struct reftable_record rec = { NULL };
struct reftable_index_record out = { .last_key = STRBUF_INIT };
struct reftable_record out_rec = { NULL };
int n, m;
uint8_t extra;
strbuf_addstr(&in.last_key, "refs/heads/master");
reftable_record_from_index(&rec, &in);
reftable_record_key(&rec, &key);
test_copy(&rec);
EXPECT(0 == strbuf_cmp(&key, &in.last_key));
n = reftable_record_encode(&rec, dest, GIT_SHA1_RAWSZ);
EXPECT(n > 0);
extra = reftable_record_val_type(&rec);
reftable_record_from_index(&out_rec, &out);
m = reftable_record_decode(&out_rec, key, extra, dest, GIT_SHA1_RAWSZ);
EXPECT(m == n);
EXPECT(in.offset == out.offset);
reftable_record_release(&out_rec);
strbuf_release(&key);
strbuf_release(&in.last_key);
}
int record_test_main(int argc, const char *argv[])
{
RUN_TEST(test_reftable_log_record_equal);
RUN_TEST(test_reftable_log_record_roundtrip);
RUN_TEST(test_reftable_ref_record_roundtrip);
RUN_TEST(test_varint_roundtrip);
RUN_TEST(test_key_roundtrip);
RUN_TEST(test_common_prefix);
RUN_TEST(test_reftable_obj_record_roundtrip);
RUN_TEST(test_reftable_index_record_roundtrip);
RUN_TEST(test_u24_roundtrip);
return 0;
}

114
reftable/reftable-record.h Normal file
View File

@ -0,0 +1,114 @@
/*
Copyright 2020 Google LLC
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file or at
https://developers.google.com/open-source/licenses/bsd
*/
#ifndef REFTABLE_RECORD_H
#define REFTABLE_RECORD_H
#include <stdint.h>
/*
* Basic data types
*
* Reftables store the state of each ref in struct reftable_ref_record, and they
* store a sequence of reflog updates in struct reftable_log_record.
*/
/* reftable_ref_record holds a ref database entry target_value */
struct reftable_ref_record {
char *refname; /* Name of the ref, malloced. */
uint64_t update_index; /* Logical timestamp at which this value is
* written */
enum {
/* tombstone to hide deletions from earlier tables */
REFTABLE_REF_DELETION = 0x0,
/* a simple ref */
REFTABLE_REF_VAL1 = 0x1,
/* a tag, plus its peeled hash */
REFTABLE_REF_VAL2 = 0x2,
/* a symbolic reference */
REFTABLE_REF_SYMREF = 0x3,
#define REFTABLE_NR_REF_VALUETYPES 4
} value_type;
union {
uint8_t *val1; /* malloced hash. */
struct {
uint8_t *value; /* first value, malloced hash */
uint8_t *target_value; /* second value, malloced hash */
} val2;
char *symref; /* referent, malloced 0-terminated string */
} value;
};
/* Returns the first hash, or NULL if `rec` is not of type
* REFTABLE_REF_VAL1 or REFTABLE_REF_VAL2. */
uint8_t *reftable_ref_record_val1(struct reftable_ref_record *rec);
/* Returns the second hash, or NULL if `rec` is not of type
* REFTABLE_REF_VAL2. */
uint8_t *reftable_ref_record_val2(struct reftable_ref_record *rec);
/* returns whether 'ref' represents a deletion */
int reftable_ref_record_is_deletion(const struct reftable_ref_record *ref);
/* prints a reftable_ref_record onto stdout. Useful for debugging. */
void reftable_ref_record_print(struct reftable_ref_record *ref,
uint32_t hash_id);
/* frees and nulls all pointer values inside `ref`. */
void reftable_ref_record_release(struct reftable_ref_record *ref);
/* returns whether two reftable_ref_records are the same. Useful for testing. */
int reftable_ref_record_equal(struct reftable_ref_record *a,
struct reftable_ref_record *b, int hash_size);
/* reftable_log_record holds a reflog entry */
struct reftable_log_record {
char *refname;
uint64_t update_index; /* logical timestamp of a transactional update.
*/
enum {
/* tombstone to hide deletions from earlier tables */
REFTABLE_LOG_DELETION = 0x0,
/* a simple update */
REFTABLE_LOG_UPDATE = 0x1,
#define REFTABLE_NR_LOG_VALUETYPES 2
} value_type;
union {
struct {
uint8_t *new_hash;
uint8_t *old_hash;
char *name;
char *email;
uint64_t time;
int16_t tz_offset;
char *message;
} update;
} value;
};
/* returns whether 'ref' represents the deletion of a log record. */
int reftable_log_record_is_deletion(const struct reftable_log_record *log);
/* frees and nulls all pointer values. */
void reftable_log_record_release(struct reftable_log_record *log);
/* returns whether two records are equal. Useful for testing. */
int reftable_log_record_equal(struct reftable_log_record *a,
struct reftable_log_record *b, int hash_size);
/* dumps a reftable_log_record on stdout, for debugging/testing. */
void reftable_log_record_print(struct reftable_log_record *log,
uint32_t hash_id);
#endif

View File

@ -4,6 +4,6 @@
int cmd__reftable(int argc, const char **argv) int cmd__reftable(int argc, const char **argv)
{ {
basics_test_main(argc, argv); basics_test_main(argc, argv);
record_test_main(argc, argv);
return 0; return 0;
} }