Add string-specific memory pool
Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
951f316470
commit
1d73b52f5b
1
.gitignore
vendored
1
.gitignore
vendored
@ -173,6 +173,7 @@
|
|||||||
/test-run-command
|
/test-run-command
|
||||||
/test-sha1
|
/test-sha1
|
||||||
/test-sigchain
|
/test-sigchain
|
||||||
|
/test-string-pool
|
||||||
/test-treap
|
/test-treap
|
||||||
/common-cmds.h
|
/common-cmds.h
|
||||||
*.tar.gz
|
*.tar.gz
|
||||||
|
9
Makefile
9
Makefile
@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils
|
|||||||
TEST_PROGRAMS_NEED_X += test-run-command
|
TEST_PROGRAMS_NEED_X += test-run-command
|
||||||
TEST_PROGRAMS_NEED_X += test-sha1
|
TEST_PROGRAMS_NEED_X += test-sha1
|
||||||
TEST_PROGRAMS_NEED_X += test-sigchain
|
TEST_PROGRAMS_NEED_X += test-sigchain
|
||||||
|
TEST_PROGRAMS_NEED_X += test-string-pool
|
||||||
TEST_PROGRAMS_NEED_X += test-treap
|
TEST_PROGRAMS_NEED_X += test-treap
|
||||||
TEST_PROGRAMS_NEED_X += test-index-version
|
TEST_PROGRAMS_NEED_X += test-index-version
|
||||||
|
|
||||||
@ -1742,7 +1743,7 @@ ifndef NO_CURL
|
|||||||
endif
|
endif
|
||||||
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
|
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
|
||||||
xdiff/xmerge.o xdiff/xpatience.o
|
xdiff/xmerge.o xdiff/xpatience.o
|
||||||
VCSSVN_OBJS =
|
VCSSVN_OBJS = vcs-svn/string_pool.o
|
||||||
OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
|
OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
|
||||||
|
|
||||||
dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
|
dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
|
||||||
@ -1867,7 +1868,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
|
|||||||
xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
|
xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
|
||||||
|
|
||||||
$(VCSSVN_OBJS): \
|
$(VCSSVN_OBJS): \
|
||||||
vcs-svn/obj_pool.h vcs-svn/trp.h
|
vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h
|
||||||
endif
|
endif
|
||||||
|
|
||||||
exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
|
exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
|
||||||
@ -2018,10 +2019,12 @@ test-delta$X: diff-delta.o patch-delta.o
|
|||||||
|
|
||||||
test-parse-options$X: parse-options.o
|
test-parse-options$X: parse-options.o
|
||||||
|
|
||||||
|
test-string-pool$X: vcs-svn/lib.a
|
||||||
|
|
||||||
.PRECIOUS: $(TEST_OBJS)
|
.PRECIOUS: $(TEST_OBJS)
|
||||||
|
|
||||||
test-%$X: test-%.o $(GITLIBS)
|
test-%$X: test-%.o $(GITLIBS)
|
||||||
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
|
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
|
||||||
|
|
||||||
check-sha1:: test-sha1$X
|
check-sha1:: test-sha1$X
|
||||||
./test-sha1.sh
|
./test-sha1.sh
|
||||||
|
@ -76,6 +76,22 @@ test_expect_success 'obj pool: high-water mark' '
|
|||||||
test_cmp expected actual
|
test_cmp expected actual
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'string pool' '
|
||||||
|
echo a does not equal b >expected.differ &&
|
||||||
|
echo a equals a >expected.match &&
|
||||||
|
echo equals equals equals >expected.matchmore &&
|
||||||
|
|
||||||
|
test-string-pool "a,--b" >actual.differ &&
|
||||||
|
test-string-pool "a,a" >actual.match &&
|
||||||
|
test-string-pool "equals-equals" >actual.matchmore &&
|
||||||
|
test_must_fail test-string-pool a,a,a &&
|
||||||
|
test_must_fail test-string-pool a &&
|
||||||
|
|
||||||
|
test_cmp expected.differ actual.differ &&
|
||||||
|
test_cmp expected.match actual.match &&
|
||||||
|
test_cmp expected.matchmore actual.matchmore
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success 'treap sort' '
|
test_expect_success 'treap sort' '
|
||||||
cat <<-\EOF >unsorted &&
|
cat <<-\EOF >unsorted &&
|
||||||
68
|
68
|
||||||
|
31
test-string-pool.c
Normal file
31
test-string-pool.c
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
* test-string-pool.c: code to exercise the svn importer's string pool
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "git-compat-util.h"
|
||||||
|
#include "vcs-svn/string_pool.h"
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
const uint32_t unequal = pool_intern("does not equal");
|
||||||
|
const uint32_t equal = pool_intern("equals");
|
||||||
|
uint32_t buf[3];
|
||||||
|
uint32_t n;
|
||||||
|
|
||||||
|
if (argc != 2)
|
||||||
|
usage("test-string-pool <string>,<string>");
|
||||||
|
|
||||||
|
n = pool_tok_seq(3, buf, ",-", argv[1]);
|
||||||
|
if (n >= 3)
|
||||||
|
die("too many strings");
|
||||||
|
if (n <= 1)
|
||||||
|
die("too few strings");
|
||||||
|
|
||||||
|
buf[2] = buf[1];
|
||||||
|
buf[1] = (buf[0] == buf[2]) ? equal : unequal;
|
||||||
|
pool_print_seq(3, buf, ' ', stdout);
|
||||||
|
fputc('\n', stdout);
|
||||||
|
|
||||||
|
pool_reset();
|
||||||
|
return 0;
|
||||||
|
}
|
102
vcs-svn/string_pool.c
Normal file
102
vcs-svn/string_pool.c
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
/*
|
||||||
|
* Licensed under a two-clause BSD-style license.
|
||||||
|
* See LICENSE for details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "git-compat-util.h"
|
||||||
|
#include "trp.h"
|
||||||
|
#include "obj_pool.h"
|
||||||
|
#include "string_pool.h"
|
||||||
|
|
||||||
|
static struct trp_root tree = { ~0 };
|
||||||
|
|
||||||
|
struct node {
|
||||||
|
uint32_t offset;
|
||||||
|
struct trp_node children;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Two memory pools: one for struct node, and another for strings */
|
||||||
|
obj_pool_gen(node, struct node, 4096)
|
||||||
|
obj_pool_gen(string, char, 4096)
|
||||||
|
|
||||||
|
static char *node_value(struct node *node)
|
||||||
|
{
|
||||||
|
return node ? string_pointer(node->offset) : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int node_cmp(struct node *a, struct node *b)
|
||||||
|
{
|
||||||
|
return strcmp(node_value(a), node_value(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Build a Treap from the node structure (a trp_node w/ offset) */
|
||||||
|
trp_gen(static, tree_, struct node, children, node, node_cmp);
|
||||||
|
|
||||||
|
const char *pool_fetch(uint32_t entry)
|
||||||
|
{
|
||||||
|
return node_value(node_pointer(entry));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t pool_intern(const char *key)
|
||||||
|
{
|
||||||
|
/* Canonicalize key */
|
||||||
|
struct node *match = NULL, *node;
|
||||||
|
uint32_t key_len;
|
||||||
|
if (key == NULL)
|
||||||
|
return ~0;
|
||||||
|
key_len = strlen(key) + 1;
|
||||||
|
node = node_pointer(node_alloc(1));
|
||||||
|
node->offset = string_alloc(key_len);
|
||||||
|
strcpy(node_value(node), key);
|
||||||
|
match = tree_search(&tree, node);
|
||||||
|
if (!match) {
|
||||||
|
tree_insert(&tree, node);
|
||||||
|
} else {
|
||||||
|
node_free(1);
|
||||||
|
string_free(key_len);
|
||||||
|
node = match;
|
||||||
|
}
|
||||||
|
return node_offset(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
|
||||||
|
{
|
||||||
|
char *token = strtok_r(str, delim, saveptr);
|
||||||
|
return token ? pool_intern(token) : ~0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
for (i = 0; i < len && ~seq[i]; i++) {
|
||||||
|
fputs(pool_fetch(seq[i]), stream);
|
||||||
|
if (i < len - 1 && ~seq[i + 1])
|
||||||
|
fputc(delim, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
|
||||||
|
{
|
||||||
|
char *context = NULL;
|
||||||
|
uint32_t token = ~0;
|
||||||
|
uint32_t length;
|
||||||
|
|
||||||
|
if (sz == 0)
|
||||||
|
return ~0;
|
||||||
|
if (str)
|
||||||
|
token = pool_tok_r(str, delim, &context);
|
||||||
|
for (length = 0; length < sz; length++) {
|
||||||
|
seq[length] = token;
|
||||||
|
if (token == ~0)
|
||||||
|
return length;
|
||||||
|
token = pool_tok_r(NULL, delim, &context);
|
||||||
|
}
|
||||||
|
seq[sz - 1] = ~0;
|
||||||
|
return sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pool_reset(void)
|
||||||
|
{
|
||||||
|
node_reset();
|
||||||
|
string_reset();
|
||||||
|
}
|
11
vcs-svn/string_pool.h
Normal file
11
vcs-svn/string_pool.h
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#ifndef STRING_POOL_H_
|
||||||
|
#define STRING_POOL_H_
|
||||||
|
|
||||||
|
uint32_t pool_intern(const char *key);
|
||||||
|
const char *pool_fetch(uint32_t entry);
|
||||||
|
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
|
||||||
|
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
|
||||||
|
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
|
||||||
|
void pool_reset(void);
|
||||||
|
|
||||||
|
#endif
|
43
vcs-svn/string_pool.txt
Normal file
43
vcs-svn/string_pool.txt
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
string_pool API
|
||||||
|
===============
|
||||||
|
|
||||||
|
The string_pool API provides facilities for replacing strings
|
||||||
|
with integer keys that can be more easily compared and stored.
|
||||||
|
The facilities are designed so that one could teach Git without
|
||||||
|
too much trouble to store the information needed for these keys to
|
||||||
|
remain valid over multiple executions.
|
||||||
|
|
||||||
|
Functions
|
||||||
|
---------
|
||||||
|
|
||||||
|
pool_intern::
|
||||||
|
Include a string in the string pool and get its key.
|
||||||
|
If that string is already in the pool, retrieves its
|
||||||
|
existing key.
|
||||||
|
|
||||||
|
pool_fetch::
|
||||||
|
Retrieve the string associated to a given key.
|
||||||
|
|
||||||
|
pool_tok_r::
|
||||||
|
Extract the key of the next token from a string.
|
||||||
|
Interface mimics strtok_r.
|
||||||
|
|
||||||
|
pool_print_seq::
|
||||||
|
Print a sequence of strings named by key to a file, using the
|
||||||
|
specified delimiter to separate them.
|
||||||
|
|
||||||
|
If NULL (key ~0) appears in the sequence, the sequence ends
|
||||||
|
early.
|
||||||
|
|
||||||
|
pool_tok_seq::
|
||||||
|
Split a string into tokens, storing the keys of segments
|
||||||
|
into a caller-provided array.
|
||||||
|
|
||||||
|
Unless sz is 0, the array will always be ~0-terminated.
|
||||||
|
If there is not enough room for all the tokens, the
|
||||||
|
array holds as many tokens as fit in the entries before
|
||||||
|
the terminating ~0. Return value is the index after the
|
||||||
|
last token, or sz if the tokens did not fit.
|
||||||
|
|
||||||
|
pool_reset::
|
||||||
|
Deallocate storage for the string pool.
|
Loading…
Reference in New Issue
Block a user