Merge branch 'jn/svn-fe'

* jn/svn-fe:
  t/t9010-svn-fe.sh: add an +x bit to this test
  t9010 (svn-fe): avoid symlinks in test
  t9010 (svn-fe): use Unix-style path in URI
  vcs-svn: Avoid %z in format string
  vcs-svn: Rename dirent pool to build on Windows
  compat: add strtok_r()
  treap: style fix
  vcs-svn: remove build artifacts on "make clean"
  svn-fe manual: Clarify warning about deltas in dump files
  Update svn-fe manual
  SVN dump parser
  Infrastructure to write revisions in fast-export format
  Add stream helper library
  Add string-specific memory pool
  Add treap implementation
  Add memory pool library
  Introduce vcs-svn lib
This commit is contained in:
Junio C Hamano 2010-08-31 16:23:38 -07:00
commit aca35505db
31 changed files with 2108 additions and 13 deletions

5
.gitignore vendored
View File

@ -166,12 +166,17 @@
/test-dump-cache-tree
/test-genrandom
/test-index-version
/test-line-buffer
/test-match-trees
/test-obj-pool
/test-parse-options
/test-path-utils
/test-run-command
/test-sha1
/test-sigchain
/test-string-pool
/test-svn-fe
/test-treap
/common-cmds.h
*.tar.gz
*.dsc

View File

@ -68,6 +68,8 @@ all::
#
# Define NO_MKSTEMPS if you don't have mkstemps in the C library.
#
# Define NO_STRTOK_R if you don't have strtok_r in the C library.
#
# Define NO_LIBGEN_H if you don't have libgen.h.
#
# Define NEEDS_LIBGEN if your libgen needs -lgen when linking
@ -408,12 +410,17 @@ TEST_PROGRAMS_NEED_X += test-date
TEST_PROGRAMS_NEED_X += test-delta
TEST_PROGRAMS_NEED_X += test-dump-cache-tree
TEST_PROGRAMS_NEED_X += test-genrandom
TEST_PROGRAMS_NEED_X += test-line-buffer
TEST_PROGRAMS_NEED_X += test-match-trees
TEST_PROGRAMS_NEED_X += test-obj-pool
TEST_PROGRAMS_NEED_X += test-parse-options
TEST_PROGRAMS_NEED_X += test-path-utils
TEST_PROGRAMS_NEED_X += test-run-command
TEST_PROGRAMS_NEED_X += test-sha1
TEST_PROGRAMS_NEED_X += test-sigchain
TEST_PROGRAMS_NEED_X += test-string-pool
TEST_PROGRAMS_NEED_X += test-svn-fe
TEST_PROGRAMS_NEED_X += test-treap
TEST_PROGRAMS_NEED_X += test-index-version
TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X))
@ -468,6 +475,7 @@ export PYTHON_PATH
LIB_FILE=libgit.a
XDIFF_LIB=xdiff/lib.a
VCSSVN_LIB=vcs-svn/lib.a
LIB_H += advice.h
LIB_H += archive.h
@ -1035,6 +1043,7 @@ ifeq ($(uname_S),Windows)
NO_UNSETENV = YesPlease
NO_STRCASESTR = YesPlease
NO_STRLCPY = YesPlease
NO_STRTOK_R = YesPlease
NO_MEMMEM = YesPlease
# NEEDS_LIBICONV = YesPlease
NO_ICONV = YesPlease
@ -1089,6 +1098,7 @@ ifneq (,$(findstring MINGW,$(uname_S)))
NO_UNSETENV = YesPlease
NO_STRCASESTR = YesPlease
NO_STRLCPY = YesPlease
NO_STRTOK_R = YesPlease
NO_MEMMEM = YesPlease
NEEDS_LIBICONV = YesPlease
OLD_ICONV = YesPlease
@ -1319,6 +1329,10 @@ endif
ifdef NO_STRTOULL
COMPAT_CFLAGS += -DNO_STRTOULL
endif
ifdef NO_STRTOK_R
COMPAT_CFLAGS += -DNO_STRTOK_R
COMPAT_OBJS += compat/strtok_r.o
endif
ifdef NO_SETENV
COMPAT_CFLAGS += -DNO_SETENV
COMPAT_OBJS += compat/setenv.o
@ -1739,7 +1753,9 @@ ifndef NO_CURL
endif
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
xdiff/xmerge.o xdiff/xpatience.o
OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS)
VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \
vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o
OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
dep_dirs := $(addsuffix .depend,$(sort $(dir $(OBJECTS))))
@ -1861,6 +1877,11 @@ http.o http-walker.o http-push.o http-fetch.o remote-curl.o: http.h
xdiff-interface.o $(XDIFF_OBJS): \
xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \
xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
$(VCSSVN_OBJS): \
vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \
vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
vcs-svn/svndump.h
endif
exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
@ -1909,6 +1930,8 @@ $(LIB_FILE): $(LIB_OBJS)
$(XDIFF_LIB): $(XDIFF_OBJS)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS)
$(VCSSVN_LIB): $(VCSSVN_OBJS)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(VCSSVN_OBJS)
doc:
$(MAKE) -C Documentation all
@ -2007,12 +2030,18 @@ test-date$X: date.o ctype.o
test-delta$X: diff-delta.o patch-delta.o
test-line-buffer$X: vcs-svn/lib.a
test-parse-options$X: parse-options.o
test-string-pool$X: vcs-svn/lib.a
test-svn-fe$X: vcs-svn/lib.a
.PRECIOUS: $(TEST_OBJS)
test-%$X: test-%.o $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
check-sha1:: test-sha1$X
./test-sha1.sh
@ -2187,8 +2216,8 @@ distclean: clean
$(RM) configure
clean:
$(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o \
builtin/*.o $(LIB_FILE) $(XDIFF_LIB)
$(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o vcs-svn/*.o \
builtin/*.o $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB)
$(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X
$(RM) $(TEST_PROGRAMS)
$(RM) -r bin-wrappers

61
compat/strtok_r.c Normal file
View File

@ -0,0 +1,61 @@
/* Reentrant string tokenizer. Generic version.
Copyright (C) 1991,1996-1999,2001,2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include "../git-compat-util.h"
/* Parse S into tokens separated by characters in DELIM.
If S is NULL, the saved pointer in SAVE_PTR is used as
the next starting point. For example:
char s[] = "-abc-=-def";
char *sp;
x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def"
x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL
x = strtok_r(NULL, "=", &sp); // x = NULL
// s = "abc\0-def\0"
*/
char *
gitstrtok_r (char *s, const char *delim, char **save_ptr)
{
char *token;
if (s == NULL)
s = *save_ptr;
/* Scan leading delimiters. */
s += strspn (s, delim);
if (*s == '\0')
{
*save_ptr = s;
return NULL;
}
/* Find the end of the token. */
token = s;
s = strpbrk (token, delim);
if (s == NULL)
/* This token finishes the string. */
*save_ptr = token + strlen (token);
else
{
/* Terminate the token and make *SAVE_PTR point past it. */
*s = '\0';
*save_ptr = s + 1;
}
return token;
}

View File

@ -46,6 +46,7 @@ NO_IPV6=@NO_IPV6@
NO_C99_FORMAT=@NO_C99_FORMAT@
NO_HSTRERROR=@NO_HSTRERROR@
NO_STRCASESTR=@NO_STRCASESTR@
NO_STRTOK_R=@NO_STRTOK_R@
NO_MEMMEM=@NO_MEMMEM@
NO_STRLCPY=@NO_STRLCPY@
NO_UINTMAX_T=@NO_UINTMAX_T@

View File

@ -783,6 +783,12 @@ GIT_CHECK_FUNC(strcasestr,
[NO_STRCASESTR=YesPlease])
AC_SUBST(NO_STRCASESTR)
#
# Define NO_STRTOK_R if you don't have strtok_r
GIT_CHECK_FUNC(strtok_r,
[NO_STRTOK_R=],
[NO_STRTOK_R=YesPlease])
AC_SUBST(NO_STRTOK_R)
#
# Define NO_MEMMEM if you don't have memmem.
GIT_CHECK_FUNC(memmem,
[NO_MEMMEM=],

View File

@ -10,6 +10,7 @@ int main(int argc, char **argv)
{
svndump_init(NULL);
svndump_read((argc > 1) ? argv[1] : NULL);
svndump_deinit();
svndump_reset();
return 0;
}

View File

@ -12,7 +12,7 @@ svnadmin dump --incremental REPO | svn-fe [url] | git fast-import
DESCRIPTION
-----------
Converts a Subversion dumpfile (version: 2) into input suitable for
Converts a Subversion dumpfile into input suitable for
git-fast-import(1) and similar importers. REPO is a path to a
Subversion repository mirrored on the local disk. Remote Subversion
repositories can be mirrored on local disk using the `svnsync`
@ -25,6 +25,9 @@ Subversion's repository dump format is documented in full in
Files in this format can be generated using the 'svnadmin dump' or
'svk admin dump' command.
Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3)
are not supported.
OUTPUT FORMAT
-------------
The fast-import format is documented by the git-fast-import(1)
@ -43,11 +46,9 @@ user <user@UUID>
as committer, where 'user' is the value of the `svn:author` property
and 'UUID' the repository's identifier.
To support incremental imports, 'svn-fe' will put a `git-svn-id`
line at the end of each commit log message if passed an url on the
command line. This line has the form `git-svn-id: URL@REVNO UUID`.
Empty directories and unknown properties are silently discarded.
To support incremental imports, 'svn-fe' puts a `git-svn-id` line at
the end of each commit log message if passed an url on the command
line. This line has the form `git-svn-id: URL@REVNO UUID`.
The resulting repository will generally require further processing
to put each project in its own repository and to separate the history
@ -56,9 +57,9 @@ may be useful for this purpose.
BUGS
----
Litters the current working directory with .bin files for
persistence. Will be fixed when the svn-fe infrastructure is aware of
a Git working directory.
Empty directories and unknown properties are silently discarded.
The exit status does not reflect whether an error was detected.
SEE ALSO
--------

View File

@ -312,6 +312,11 @@ extern size_t gitstrlcpy(char *, const char *, size_t);
extern uintmax_t gitstrtoumax(const char *, char **, int);
#endif
#ifdef NO_STRTOK_R
#define strtok_r gitstrtok_r
extern char *gitstrtok_r(char *s, const char *delim, char **save_ptr);
#endif
#ifdef NO_HSTRERROR
#define hstrerror githstrerror
extern const char *githstrerror(int herror);

171
t/t0080-vcs-svn.sh Executable file
View File

@ -0,0 +1,171 @@
#!/bin/sh
test_description='check infrastructure for svn importer'
. ./test-lib.sh
uint32_max=4294967295
test_expect_success 'obj pool: store data' '
cat <<-\EOF >expected &&
0
1
EOF
test-obj-pool <<-\EOF >actual &&
alloc one 16
set one 13
test one 13
reset one
EOF
test_cmp expected actual
'
test_expect_success 'obj pool: NULL is offset ~0' '
echo "$uint32_max" >expected &&
echo null one | test-obj-pool >actual &&
test_cmp expected actual
'
test_expect_success 'obj pool: out-of-bounds access' '
cat <<-EOF >expected &&
0
0
$uint32_max
$uint32_max
16
20
$uint32_max
EOF
test-obj-pool <<-\EOF >actual &&
alloc one 16
alloc two 16
offset one 20
offset two 20
alloc one 5
offset one 20
free one 1
offset one 20
reset one
reset two
EOF
test_cmp expected actual
'
test_expect_success 'obj pool: high-water mark' '
cat <<-\EOF >expected &&
0
0
10
20
20
20
EOF
test-obj-pool <<-\EOF >actual &&
alloc one 10
committed one
alloc one 10
commit one
committed one
alloc one 10
free one 20
committed one
reset one
EOF
test_cmp expected actual
'
test_expect_success 'line buffer' '
echo HELLO >expected1 &&
printf "%s\n" "" HELLO >expected2 &&
echo >expected3 &&
printf "%s\n" "" Q | q_to_nul >expected4 &&
printf "%s\n" foo "" >expected5 &&
printf "%s\n" "" foo >expected6 &&
test-line-buffer <<-\EOF >actual1 &&
5
HELLO
EOF
test-line-buffer <<-\EOF >actual2 &&
0
5
HELLO
EOF
q_to_nul <<-\EOF |
1
Q
EOF
test-line-buffer >actual3 &&
q_to_nul <<-\EOF |
0
1
Q
EOF
test-line-buffer >actual4 &&
test-line-buffer <<-\EOF >actual5 &&
5
foo
EOF
test-line-buffer <<-\EOF >actual6 &&
0
5
foo
EOF
test_cmp expected1 actual1 &&
test_cmp expected2 actual2 &&
test_cmp expected3 actual3 &&
test_cmp expected4 actual4 &&
test_cmp expected5 actual5 &&
test_cmp expected6 actual6
'
test_expect_success 'string pool' '
echo a does not equal b >expected.differ &&
echo a equals a >expected.match &&
echo equals equals equals >expected.matchmore &&
test-string-pool "a,--b" >actual.differ &&
test-string-pool "a,a" >actual.match &&
test-string-pool "equals-equals" >actual.matchmore &&
test_must_fail test-string-pool a,a,a &&
test_must_fail test-string-pool a &&
test_cmp expected.differ actual.differ &&
test_cmp expected.match actual.match &&
test_cmp expected.matchmore actual.matchmore
'
test_expect_success 'treap sort' '
cat <<-\EOF >unsorted &&
68
12
13
13
68
13
13
21
10
11
12
13
13
EOF
sort unsorted >expected &&
test-treap <unsorted >actual &&
test_cmp expected actual
'
test_done

32
t/t9010-svn-fe.sh Executable file
View File

@ -0,0 +1,32 @@
#!/bin/sh
test_description='check svn dumpfile importer'
. ./lib-git-svn.sh
test_dump() {
label=$1
dump=$2
test_expect_success "$dump" '
svnadmin create "$label-svn" &&
svnadmin load "$label-svn" < "$TEST_DIRECTORY/$dump" &&
svn_cmd export "file://$PWD/$label-svn" "$label-svnco" &&
git init "$label-git" &&
test-svn-fe "$TEST_DIRECTORY/$dump" >"$label.fe" &&
(
cd "$label-git" &&
git fast-import < ../"$label.fe"
) &&
(
cd "$label-svnco" &&
git init &&
git add . &&
git fetch "../$label-git" master &&
git diff --exit-code FETCH_HEAD
)
'
}
test_dump simple t9135/svn.dump
test_done

46
test-line-buffer.c Normal file
View File

@ -0,0 +1,46 @@
/*
* test-line-buffer.c: code to exercise the svn importer's input helper
*
* Input format:
* number NL
* (number bytes) NL
* number NL
* ...
*/
#include "git-compat-util.h"
#include "vcs-svn/line_buffer.h"
static uint32_t strtouint32(const char *s)
{
char *end;
uintmax_t n = strtoumax(s, &end, 10);
if (*s == '\0' || *end != '\0')
die("invalid count: %s", s);
return (uint32_t) n;
}
int main(int argc, char *argv[])
{
char *s;
if (argc != 1)
usage("test-line-buffer < input.txt");
if (buffer_init(NULL))
die_errno("open error");
while ((s = buffer_read_line())) {
s = buffer_read_string(strtouint32(s));
fputs(s, stdout);
fputc('\n', stdout);
buffer_skip_bytes(1);
if (!(s = buffer_read_line()))
break;
buffer_copy_bytes(strtouint32(s) + 1);
}
if (buffer_deinit())
die("input error");
if (ferror(stdout))
die("output error");
buffer_reset();
return 0;
}

116
test-obj-pool.c Normal file
View File

@ -0,0 +1,116 @@
/*
* test-obj-pool.c: code to exercise the svn importer's object pool
*/
#include "cache.h"
#include "vcs-svn/obj_pool.h"
enum pool { POOL_ONE, POOL_TWO };
obj_pool_gen(one, int, 1)
obj_pool_gen(two, int, 4096)
static uint32_t strtouint32(const char *s)
{
char *end;
uintmax_t n = strtoumax(s, &end, 10);
if (*s == '\0' || (*end != '\n' && *end != '\0'))
die("invalid offset: %s", s);
return (uint32_t) n;
}
static void handle_command(const char *command, enum pool pool, const char *arg)
{
switch (*command) {
case 'a':
if (!prefixcmp(command, "alloc ")) {
uint32_t n = strtouint32(arg);
printf("%"PRIu32"\n",
pool == POOL_ONE ?
one_alloc(n) : two_alloc(n));
return;
}
case 'c':
if (!prefixcmp(command, "commit ")) {
pool == POOL_ONE ? one_commit() : two_commit();
return;
}
if (!prefixcmp(command, "committed ")) {
printf("%"PRIu32"\n",
pool == POOL_ONE ?
one_pool.committed : two_pool.committed);
return;
}
case 'f':
if (!prefixcmp(command, "free ")) {
uint32_t n = strtouint32(arg);
pool == POOL_ONE ? one_free(n) : two_free(n);
return;
}
case 'n':
if (!prefixcmp(command, "null ")) {
printf("%"PRIu32"\n",
pool == POOL_ONE ?
one_offset(NULL) : two_offset(NULL));
return;
}
case 'o':
if (!prefixcmp(command, "offset ")) {
uint32_t n = strtouint32(arg);
printf("%"PRIu32"\n",
pool == POOL_ONE ?
one_offset(one_pointer(n)) :
two_offset(two_pointer(n)));
return;
}
case 'r':
if (!prefixcmp(command, "reset ")) {
pool == POOL_ONE ? one_reset() : two_reset();
return;
}
case 's':
if (!prefixcmp(command, "set ")) {
uint32_t n = strtouint32(arg);
if (pool == POOL_ONE)
*one_pointer(n) = 1;
else
*two_pointer(n) = 1;
return;
}
case 't':
if (!prefixcmp(command, "test ")) {
uint32_t n = strtouint32(arg);
printf("%d\n", pool == POOL_ONE ?
*one_pointer(n) : *two_pointer(n));
return;
}
default:
die("unrecognized command: %s", command);
}
}
static void handle_line(const char *line)
{
const char *arg = strchr(line, ' ');
enum pool pool;
if (arg && !prefixcmp(arg + 1, "one"))
pool = POOL_ONE;
else if (arg && !prefixcmp(arg + 1, "two"))
pool = POOL_TWO;
else
die("no pool specified: %s", line);
handle_command(line, pool, arg + strlen("one "));
}
int main(int argc, char *argv[])
{
struct strbuf sb = STRBUF_INIT;
if (argc != 1)
usage("test-obj-str < script");
while (strbuf_getline(&sb, stdin, '\n') != EOF)
handle_line(sb.buf);
strbuf_release(&sb);
return 0;
}

31
test-string-pool.c Normal file
View File

@ -0,0 +1,31 @@
/*
* test-string-pool.c: code to exercise the svn importer's string pool
*/
#include "git-compat-util.h"
#include "vcs-svn/string_pool.h"
int main(int argc, char *argv[])
{
const uint32_t unequal = pool_intern("does not equal");
const uint32_t equal = pool_intern("equals");
uint32_t buf[3];
uint32_t n;
if (argc != 2)
usage("test-string-pool <string>,<string>");
n = pool_tok_seq(3, buf, ",-", argv[1]);
if (n >= 3)
die("too many strings");
if (n <= 1)
die("too few strings");
buf[2] = buf[1];
buf[1] = (buf[0] == buf[2]) ? equal : unequal;
pool_print_seq(3, buf, ' ', stdout);
fputc('\n', stdout);
pool_reset();
return 0;
}

17
test-svn-fe.c Normal file
View File

@ -0,0 +1,17 @@
/*
* test-svn-fe: Code to exercise the svn import lib
*/
#include "git-compat-util.h"
#include "vcs-svn/svndump.h"
int main(int argc, char *argv[])
{
if (argc != 2)
usage("test-svn-fe <file>");
svndump_init(argv[1]);
svndump_read(NULL);
svndump_deinit();
svndump_reset();
return 0;
}

65
test-treap.c Normal file
View File

@ -0,0 +1,65 @@
/*
* test-treap.c: code to exercise the svn importer's treap structure
*/
#include "cache.h"
#include "vcs-svn/obj_pool.h"
#include "vcs-svn/trp.h"
struct int_node {
uintmax_t n;
struct trp_node children;
};
obj_pool_gen(node, struct int_node, 3)
static int node_cmp(struct int_node *a, struct int_node *b)
{
return (a->n > b->n) - (a->n < b->n);
}
trp_gen(static, treap_, struct int_node, children, node, node_cmp)
static void strtonode(struct int_node *item, const char *s)
{
char *end;
item->n = strtoumax(s, &end, 10);
if (*s == '\0' || (*end != '\n' && *end != '\0'))
die("invalid integer: %s", s);
}
int main(int argc, char *argv[])
{
struct strbuf sb = STRBUF_INIT;
struct trp_root root = { ~0 };
uint32_t item;
if (argc != 1)
usage("test-treap < ints");
while (strbuf_getline(&sb, stdin, '\n') != EOF) {
item = node_alloc(1);
strtonode(node_pointer(item), sb.buf);
treap_insert(&root, node_pointer(item));
}
item = node_offset(treap_first(&root));
while (~item) {
uint32_t next;
struct int_node *tmp = node_pointer(node_alloc(1));
tmp->n = node_pointer(item)->n;
next = node_offset(treap_next(&root, node_pointer(item)));
treap_remove(&root, node_pointer(item));
item = node_offset(treap_nsearch(&root, tmp));
if (item != next && (!~item || node_pointer(item)->n != tmp->n))
die("found %"PRIuMAX" in place of %"PRIuMAX"",
~item ? node_pointer(item)->n : ~(uintmax_t) 0,
~next ? node_pointer(next)->n : ~(uintmax_t) 0);
printf("%"PRIuMAX"\n", tmp->n);
}
node_reset();
return 0;
}

33
vcs-svn/LICENSE Normal file
View File

@ -0,0 +1,33 @@
Copyright (C) 2010 David Barr <david.barr@cordelta.com>.
All rights reserved.
Copyright (C) 2008 Jason Evans <jasone@canonware.com>.
All rights reserved.
Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH,
Frankfurt/Main, Germany
and others, see http://svn2cc.sarovar.org
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice(s), this list of conditions and the following disclaimer
unmodified other than the allowable addition of one or more
copyright notices.
2. Redistributions in binary form must reproduce the above copyright
notice(s), this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

75
vcs-svn/fast_export.c Normal file
View File

@ -0,0 +1,75 @@
/*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/
#include "git-compat-util.h"
#include "fast_export.h"
#include "line_buffer.h"
#include "repo_tree.h"
#include "string_pool.h"
#define MAX_GITSVN_LINE_LEN 4096
static uint32_t first_commit_done;
void fast_export_delete(uint32_t depth, uint32_t *path)
{
putchar('D');
putchar(' ');
pool_print_seq(depth, path, '/', stdout);
putchar('\n');
}
void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
uint32_t mark)
{
/* Mode must be 100644, 100755, 120000, or 160000. */
printf("M %06o :%d ", mode, mark);
pool_print_seq(depth, path, '/', stdout);
putchar('\n');
}
static char gitsvnline[MAX_GITSVN_LINE_LEN];
void fast_export_commit(uint32_t revision, uint32_t author, char *log,
uint32_t uuid, uint32_t url,
unsigned long timestamp)
{
if (!log)
log = "";
if (~uuid && ~url) {
snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n",
pool_fetch(url), revision, pool_fetch(uuid));
} else {
*gitsvnline = '\0';
}
printf("commit refs/heads/master\n");
printf("committer %s <%s@%s> %ld +0000\n",
~author ? pool_fetch(author) : "nobody",
~author ? pool_fetch(author) : "nobody",
~uuid ? pool_fetch(uuid) : "local", timestamp);
printf("data %"PRIu32"\n%s%s\n",
(uint32_t) (strlen(log) + strlen(gitsvnline)),
log, gitsvnline);
if (!first_commit_done) {
if (revision > 1)
printf("from refs/heads/master^0\n");
first_commit_done = 1;
}
repo_diff(revision - 1, revision);
fputc('\n', stdout);
printf("progress Imported commit %d.\n\n", revision);
}
void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len)
{
if (mode == REPO_MODE_LNK) {
/* svn symlink blobs start with "link " */
buffer_skip_bytes(5);
len -= 5;
}
printf("blob\nmark :%d\ndata %d\n", mark, len);
buffer_copy_bytes(len);
fputc('\n', stdout);
}

11
vcs-svn/fast_export.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef FAST_EXPORT_H_
#define FAST_EXPORT_H_
void fast_export_delete(uint32_t depth, uint32_t *path);
void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
uint32_t mark);
void fast_export_commit(uint32_t revision, uint32_t author, char *log,
uint32_t uuid, uint32_t url, unsigned long timestamp);
void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len);
#endif

97
vcs-svn/line_buffer.c Normal file
View File

@ -0,0 +1,97 @@
/*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/
#include "git-compat-util.h"
#include "line_buffer.h"
#include "obj_pool.h"
#define LINE_BUFFER_LEN 10000
#define COPY_BUFFER_LEN 4096
/* Create memory pool for char sequence of known length */
obj_pool_gen(blob, char, 4096)
static char line_buffer[LINE_BUFFER_LEN];
static char byte_buffer[COPY_BUFFER_LEN];
static FILE *infile;
int buffer_init(const char *filename)
{
infile = filename ? fopen(filename, "r") : stdin;
if (!infile)
return -1;
return 0;
}
int buffer_deinit(void)
{
int err;
if (infile == stdin)
return ferror(infile);
err = ferror(infile);
err |= fclose(infile);
return err;
}
/* Read a line without trailing newline. */
char *buffer_read_line(void)
{
char *end;
if (!fgets(line_buffer, sizeof(line_buffer), infile))
/* Error or data exhausted. */
return NULL;
end = line_buffer + strlen(line_buffer);
if (end[-1] == '\n')
end[-1] = '\0';
else if (feof(infile))
; /* No newline at end of file. That's fine. */
else
/*
* Line was too long.
* There is probably a saner way to deal with this,
* but for now let's return an error.
*/
return NULL;
return line_buffer;
}
char *buffer_read_string(uint32_t len)
{
char *s;
blob_free(blob_pool.size);
s = blob_pointer(blob_alloc(len + 1));
s[fread(s, 1, len, infile)] = '\0';
return ferror(infile) ? NULL : s;
}
void buffer_copy_bytes(uint32_t len)
{
uint32_t in;
while (len > 0 && !feof(infile) && !ferror(infile)) {
in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
in = fread(byte_buffer, 1, in, infile);
len -= in;
fwrite(byte_buffer, 1, in, stdout);
if (ferror(stdout)) {
buffer_skip_bytes(len);
return;
}
}
}
void buffer_skip_bytes(uint32_t len)
{
uint32_t in;
while (len > 0 && !feof(infile) && !ferror(infile)) {
in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
in = fread(byte_buffer, 1, in, infile);
len -= in;
}
}
void buffer_reset(void)
{
blob_reset();
}

12
vcs-svn/line_buffer.h Normal file
View File

@ -0,0 +1,12 @@
#ifndef LINE_BUFFER_H_
#define LINE_BUFFER_H_
int buffer_init(const char *filename);
int buffer_deinit(void);
char *buffer_read_line(void);
char *buffer_read_string(uint32_t len);
void buffer_copy_bytes(uint32_t len);
void buffer_skip_bytes(uint32_t len);
void buffer_reset(void);
#endif

58
vcs-svn/line_buffer.txt Normal file
View File

@ -0,0 +1,58 @@
line_buffer API
===============
The line_buffer library provides a convenient interface for
mostly-line-oriented input.
Each line is not permitted to exceed 10000 bytes. The provided
functions are not thread-safe or async-signal-safe, and like
`fgets()`, they generally do not function correctly if interrupted
by a signal without SA_RESTART set.
Calling sequence
----------------
The calling program:
- specifies a file to read with `buffer_init`
- processes input with `buffer_read_line`, `buffer_read_string`,
`buffer_skip_bytes`, and `buffer_copy_bytes`
- closes the file with `buffer_deinit`, perhaps to start over and
read another file.
Before exiting, the caller can use `buffer_reset` to deallocate
resources for the benefit of profiling tools.
Functions
---------
`buffer_init`::
Open the named file for input. If filename is NULL,
start reading from stdin. On failure, returns -1 (with
errno indicating the nature of the failure).
`buffer_deinit`::
Stop reading from the current file (closing it unless
it was stdin). Returns nonzero if `fclose` fails or
the error indicator was set.
`buffer_read_line`::
Read a line and strip off the trailing newline.
On failure or end of file, returns NULL.
`buffer_read_string`::
Read `len` characters of input or up to the end of the
file, whichever comes first. Returns NULL on error.
Returns whatever characters were read (possibly "")
for end of file.
`buffer_copy_bytes`::
Read `len` bytes of input and dump them to the standard output
stream. Returns early for error or end of file.
`buffer_skip_bytes`::
Discards `len` bytes from the input stream (stopping early
if necessary because of an error or eof).
`buffer_reset`::
Deallocates non-static buffers.

61
vcs-svn/obj_pool.h Normal file
View File

@ -0,0 +1,61 @@
/*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/
#ifndef OBJ_POOL_H_
#define OBJ_POOL_H_
#include "git-compat-util.h"
#define MAYBE_UNUSED __attribute__((__unused__))
#define obj_pool_gen(pre, obj_t, initial_capacity) \
static struct { \
uint32_t committed; \
uint32_t size; \
uint32_t capacity; \
obj_t *base; \
} pre##_pool = {0, 0, 0, NULL}; \
static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \
{ \
uint32_t offset; \
if (pre##_pool.size + count > pre##_pool.capacity) { \
while (pre##_pool.size + count > pre##_pool.capacity) \
if (pre##_pool.capacity) \
pre##_pool.capacity *= 2; \
else \
pre##_pool.capacity = initial_capacity; \
pre##_pool.base = realloc(pre##_pool.base, \
pre##_pool.capacity * sizeof(obj_t)); \
} \
offset = pre##_pool.size; \
pre##_pool.size += count; \
return offset; \
} \
static MAYBE_UNUSED void pre##_free(uint32_t count) \
{ \
pre##_pool.size -= count; \
} \
static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \
{ \
return obj == NULL ? ~0 : obj - pre##_pool.base; \
} \
static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \
{ \
return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \
} \
static MAYBE_UNUSED void pre##_commit(void) \
{ \
pre##_pool.committed = pre##_pool.size; \
} \
static MAYBE_UNUSED void pre##_reset(void) \
{ \
free(pre##_pool.base); \
pre##_pool.base = NULL; \
pre##_pool.size = 0; \
pre##_pool.capacity = 0; \
pre##_pool.committed = 0; \
}
#endif

329
vcs-svn/repo_tree.c Normal file
View File

@ -0,0 +1,329 @@
/*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/
#include "git-compat-util.h"
#include "string_pool.h"
#include "repo_tree.h"
#include "obj_pool.h"
#include "fast_export.h"
#include "trp.h"
struct repo_dirent {
uint32_t name_offset;
struct trp_node children;
uint32_t mode;
uint32_t content_offset;
};
struct repo_dir {
struct trp_root entries;
};
struct repo_commit {
uint32_t root_dir_offset;
};
/* Memory pools for commit, dir and dirent */
obj_pool_gen(commit, struct repo_commit, 4096)
obj_pool_gen(dir, struct repo_dir, 4096)
obj_pool_gen(dent, struct repo_dirent, 4096)
static uint32_t active_commit;
static uint32_t mark;
static int repo_dirent_name_cmp(const void *a, const void *b);
/* Treap for directory entries */
trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp);
uint32_t next_blob_mark(void)
{
return mark++;
}
static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit)
{
return dir_pointer(commit->root_dir_offset);
}
static struct repo_dirent *repo_first_dirent(struct repo_dir *dir)
{
return dent_first(&dir->entries);
}
static int repo_dirent_name_cmp(const void *a, const void *b)
{
const struct repo_dirent *dent1 = a, *dent2 = b;
uint32_t a_offset = dent1->name_offset;
uint32_t b_offset = dent2->name_offset;
return (a_offset > b_offset) - (a_offset < b_offset);
}
static int repo_dirent_is_dir(struct repo_dirent *dent)
{
return dent != NULL && dent->mode == REPO_MODE_DIR;
}
static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent)
{
if (!repo_dirent_is_dir(dent))
return NULL;
return dir_pointer(dent->content_offset);
}
static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir)
{
uint32_t orig_o, new_o;
orig_o = dir_offset(orig_dir);
if (orig_o >= dir_pool.committed)
return orig_dir;
new_o = dir_alloc(1);
orig_dir = dir_pointer(orig_o);
*dir_pointer(new_o) = *orig_dir;
return dir_pointer(new_o);
}
static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path)
{
uint32_t name = 0;
struct repo_dirent *key = dent_pointer(dent_alloc(1));
struct repo_dir *dir = NULL;
struct repo_dirent *dent = NULL;
dir = repo_commit_root_dir(commit_pointer(revision));
while (~(name = *path++)) {
key->name_offset = name;
dent = dent_search(&dir->entries, key);
if (dent == NULL || !repo_dirent_is_dir(dent))
break;
dir = repo_dir_from_dirent(dent);
}
dent_free(1);
return dent;
}
static void repo_write_dirent(uint32_t *path, uint32_t mode,
uint32_t content_offset, uint32_t del)
{
uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0;
struct repo_dir *dir;
struct repo_dirent *key;
struct repo_dirent *dent = NULL;
revision = active_commit;
dir = repo_commit_root_dir(commit_pointer(revision));
dir = repo_clone_dir(dir);
commit_pointer(revision)->root_dir_offset = dir_offset(dir);
while (~(name = *path++)) {
parent_dir_o = dir_offset(dir);
key = dent_pointer(dent_alloc(1));
key->name_offset = name;
dent = dent_search(&dir->entries, key);
if (dent == NULL)
dent = key;
else
dent_free(1);
if (dent == key) {
dent->mode = REPO_MODE_DIR;
dent->content_offset = 0;
dent_insert(&dir->entries, dent);
}
if (dent_offset(dent) < dent_pool.committed) {
dir_o = repo_dirent_is_dir(dent) ?
dent->content_offset : ~0;
dent_remove(&dir->entries, dent);
dent = dent_pointer(dent_alloc(1));
dent->name_offset = name;
dent->mode = REPO_MODE_DIR;
dent->content_offset = dir_o;
dent_insert(&dir->entries, dent);
}
dir = repo_dir_from_dirent(dent);
dir = repo_clone_dir(dir);
dent->content_offset = dir_offset(dir);
}
if (dent == NULL)
return;
dent->mode = mode;
dent->content_offset = content_offset;
if (del && ~parent_dir_o)
dent_remove(&dir_pointer(parent_dir_o)->entries, dent);
}
uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst)
{
uint32_t mode = 0, content_offset = 0;
struct repo_dirent *src_dent;
src_dent = repo_read_dirent(revision, src);
if (src_dent != NULL) {
mode = src_dent->mode;
content_offset = src_dent->content_offset;
repo_write_dirent(dst, mode, content_offset, 0);
}
return mode;
}
void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark)
{
repo_write_dirent(path, mode, blob_mark, 0);
}
uint32_t repo_replace(uint32_t *path, uint32_t blob_mark)
{
uint32_t mode = 0;
struct repo_dirent *src_dent;
src_dent = repo_read_dirent(active_commit, path);
if (src_dent != NULL) {
mode = src_dent->mode;
repo_write_dirent(path, mode, blob_mark, 0);
}
return mode;
}
void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark)
{
struct repo_dirent *src_dent;
src_dent = repo_read_dirent(active_commit, path);
if (src_dent != NULL && blob_mark == 0)
blob_mark = src_dent->content_offset;
repo_write_dirent(path, mode, blob_mark, 0);
}
void repo_delete(uint32_t *path)
{
repo_write_dirent(path, 0, 0, 1);
}
static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir);
static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent)
{
if (repo_dirent_is_dir(dent))
repo_git_add_r(depth, path, repo_dir_from_dirent(dent));
else
fast_export_modify(depth, path,
dent->mode, dent->content_offset);
}
static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir)
{
struct repo_dirent *de = repo_first_dirent(dir);
while (de) {
path[depth] = de->name_offset;
repo_git_add(depth + 1, path, de);
de = dent_next(&dir->entries, de);
}
}
static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1,
struct repo_dir *dir2)
{
struct repo_dirent *de1, *de2;
de1 = repo_first_dirent(dir1);
de2 = repo_first_dirent(dir2);
while (de1 && de2) {
if (de1->name_offset < de2->name_offset) {
path[depth] = de1->name_offset;
fast_export_delete(depth + 1, path);
de1 = dent_next(&dir1->entries, de1);
continue;
}
if (de1->name_offset > de2->name_offset) {
path[depth] = de2->name_offset;
repo_git_add(depth + 1, path, de2);
de2 = dent_next(&dir2->entries, de2);
continue;
}
path[depth] = de1->name_offset;
if (de1->mode == de2->mode &&
de1->content_offset == de2->content_offset) {
; /* No change. */
} else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) {
repo_diff_r(depth + 1, path,
repo_dir_from_dirent(de1),
repo_dir_from_dirent(de2));
} else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) {
repo_git_add(depth + 1, path, de2);
} else {
fast_export_delete(depth + 1, path);
repo_git_add(depth + 1, path, de2);
}
de1 = dent_next(&dir1->entries, de1);
de2 = dent_next(&dir2->entries, de2);
}
while (de1) {
path[depth] = de1->name_offset;
fast_export_delete(depth + 1, path);
de1 = dent_next(&dir1->entries, de1);
}
while (de2) {
path[depth] = de2->name_offset;
repo_git_add(depth + 1, path, de2);
de2 = dent_next(&dir2->entries, de2);
}
}
static uint32_t path_stack[REPO_MAX_PATH_DEPTH];
void repo_diff(uint32_t r1, uint32_t r2)
{
repo_diff_r(0,
path_stack,
repo_commit_root_dir(commit_pointer(r1)),
repo_commit_root_dir(commit_pointer(r2)));
}
void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
uint32_t url, unsigned long timestamp)
{
fast_export_commit(revision, author, log, uuid, url, timestamp);
dent_commit();
dir_commit();
active_commit = commit_alloc(1);
commit_pointer(active_commit)->root_dir_offset =
commit_pointer(active_commit - 1)->root_dir_offset;
}
static void mark_init(void)
{
uint32_t i;
mark = 0;
for (i = 0; i < dent_pool.size; i++)
if (!repo_dirent_is_dir(dent_pointer(i)) &&
dent_pointer(i)->content_offset > mark)
mark = dent_pointer(i)->content_offset;
mark++;
}
void repo_init(void)
{
mark_init();
if (commit_pool.size == 0) {
/* Create empty tree for commit 0. */
commit_alloc(1);
commit_pointer(0)->root_dir_offset = dir_alloc(1);
dir_pointer(0)->entries.trp_root = ~0;
dir_commit();
}
/* Preallocate next commit, ready for changes. */
active_commit = commit_alloc(1);
commit_pointer(active_commit)->root_dir_offset =
commit_pointer(active_commit - 1)->root_dir_offset;
}
void repo_reset(void)
{
pool_reset();
commit_reset();
dir_reset();
dent_reset();
}

26
vcs-svn/repo_tree.h Normal file
View File

@ -0,0 +1,26 @@
#ifndef REPO_TREE_H_
#define REPO_TREE_H_
#include "git-compat-util.h"
#define REPO_MODE_DIR 0040000
#define REPO_MODE_BLB 0100644
#define REPO_MODE_EXE 0100755
#define REPO_MODE_LNK 0120000
#define REPO_MAX_PATH_LEN 4096
#define REPO_MAX_PATH_DEPTH 1000
uint32_t next_blob_mark(void);
uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst);
void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark);
uint32_t repo_replace(uint32_t *path, uint32_t blob_mark);
void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark);
void repo_delete(uint32_t *path);
void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
uint32_t url, long unsigned timestamp);
void repo_diff(uint32_t r1, uint32_t r2);
void repo_init(void);
void repo_reset(void);
#endif

102
vcs-svn/string_pool.c Normal file
View File

@ -0,0 +1,102 @@
/*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/
#include "git-compat-util.h"
#include "trp.h"
#include "obj_pool.h"
#include "string_pool.h"
static struct trp_root tree = { ~0 };
struct node {
uint32_t offset;
struct trp_node children;
};
/* Two memory pools: one for struct node, and another for strings */
obj_pool_gen(node, struct node, 4096)
obj_pool_gen(string, char, 4096)
static char *node_value(struct node *node)
{
return node ? string_pointer(node->offset) : NULL;
}
static int node_cmp(struct node *a, struct node *b)
{
return strcmp(node_value(a), node_value(b));
}
/* Build a Treap from the node structure (a trp_node w/ offset) */
trp_gen(static, tree_, struct node, children, node, node_cmp);
const char *pool_fetch(uint32_t entry)
{
return node_value(node_pointer(entry));
}
uint32_t pool_intern(const char *key)
{
/* Canonicalize key */
struct node *match = NULL, *node;
uint32_t key_len;
if (key == NULL)
return ~0;
key_len = strlen(key) + 1;
node = node_pointer(node_alloc(1));
node->offset = string_alloc(key_len);
strcpy(node_value(node), key);
match = tree_search(&tree, node);
if (!match) {
tree_insert(&tree, node);
} else {
node_free(1);
string_free(key_len);
node = match;
}
return node_offset(node);
}
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
{
char *token = strtok_r(str, delim, saveptr);
return token ? pool_intern(token) : ~0;
}
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
{
uint32_t i;
for (i = 0; i < len && ~seq[i]; i++) {
fputs(pool_fetch(seq[i]), stream);
if (i < len - 1 && ~seq[i + 1])
fputc(delim, stream);
}
}
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
{
char *context = NULL;
uint32_t token = ~0;
uint32_t length;
if (sz == 0)
return ~0;
if (str)
token = pool_tok_r(str, delim, &context);
for (length = 0; length < sz; length++) {
seq[length] = token;
if (token == ~0)
return length;
token = pool_tok_r(NULL, delim, &context);
}
seq[sz - 1] = ~0;
return sz;
}
void pool_reset(void)
{
node_reset();
string_reset();
}

11
vcs-svn/string_pool.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef STRING_POOL_H_
#define STRING_POOL_H_
uint32_t pool_intern(const char *key);
const char *pool_fetch(uint32_t entry);
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
void pool_reset(void);
#endif

43
vcs-svn/string_pool.txt Normal file
View File

@ -0,0 +1,43 @@
string_pool API
===============
The string_pool API provides facilities for replacing strings
with integer keys that can be more easily compared and stored.
The facilities are designed so that one could teach Git without
too much trouble to store the information needed for these keys to
remain valid over multiple executions.
Functions
---------
pool_intern::
Include a string in the string pool and get its key.
If that string is already in the pool, retrieves its
existing key.
pool_fetch::
Retrieve the string associated to a given key.
pool_tok_r::
Extract the key of the next token from a string.
Interface mimics strtok_r.
pool_print_seq::
Print a sequence of strings named by key to a file, using the
specified delimiter to separate them.
If NULL (key ~0) appears in the sequence, the sequence ends
early.
pool_tok_seq::
Split a string into tokens, storing the keys of segments
into a caller-provided array.
Unless sz is 0, the array will always be ~0-terminated.
If there is not enough room for all the tokens, the
array holds as many tokens as fit in the entries before
the terminating ~0. Return value is the index after the
last token, or sz if the tokens did not fit.
pool_reset::
Deallocate storage for the string pool.

302
vcs-svn/svndump.c Normal file
View File

@ -0,0 +1,302 @@
/*
* Parse and rearrange a svnadmin dump.
* Create the dump with:
* svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/
#include "cache.h"
#include "repo_tree.h"
#include "fast_export.h"
#include "line_buffer.h"
#include "obj_pool.h"
#include "string_pool.h"
#define NODEACT_REPLACE 4
#define NODEACT_DELETE 3
#define NODEACT_ADD 2
#define NODEACT_CHANGE 1
#define NODEACT_UNKNOWN 0
#define DUMP_CTX 0
#define REV_CTX 1
#define NODE_CTX 2
#define LENGTH_UNKNOWN (~0)
#define DATE_RFC2822_LEN 31
/* Create memory pool for log messages */
obj_pool_gen(log, char, 4096)
static char* log_copy(uint32_t length, char *log)
{
char *buffer;
log_free(log_pool.size);
buffer = log_pointer(log_alloc(length));
strncpy(buffer, log, length);
return buffer;
}
static struct {
uint32_t action, propLength, textLength, srcRev, srcMode, mark, type;
uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
} node_ctx;
static struct {
uint32_t revision, author;
unsigned long timestamp;
char *log;
} rev_ctx;
static struct {
uint32_t uuid, url;
} dump_ctx;
static struct {
uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
revision_number, node_path, node_kind, node_action,
node_copyfrom_path, node_copyfrom_rev, text_content_length,
prop_content_length, content_length;
} keys;
static void reset_node_ctx(char *fname)
{
node_ctx.type = 0;
node_ctx.action = NODEACT_UNKNOWN;
node_ctx.propLength = LENGTH_UNKNOWN;
node_ctx.textLength = LENGTH_UNKNOWN;
node_ctx.src[0] = ~0;
node_ctx.srcRev = 0;
node_ctx.srcMode = 0;
pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
node_ctx.mark = 0;
}
static void reset_rev_ctx(uint32_t revision)
{
rev_ctx.revision = revision;
rev_ctx.timestamp = 0;
rev_ctx.log = NULL;
rev_ctx.author = ~0;
}
static void reset_dump_ctx(uint32_t url)
{
dump_ctx.url = url;
dump_ctx.uuid = ~0;
}
static void init_keys(void)
{
keys.svn_log = pool_intern("svn:log");
keys.svn_author = pool_intern("svn:author");
keys.svn_date = pool_intern("svn:date");
keys.svn_executable = pool_intern("svn:executable");
keys.svn_special = pool_intern("svn:special");
keys.uuid = pool_intern("UUID");
keys.revision_number = pool_intern("Revision-number");
keys.node_path = pool_intern("Node-path");
keys.node_kind = pool_intern("Node-kind");
keys.node_action = pool_intern("Node-action");
keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
keys.text_content_length = pool_intern("Text-content-length");
keys.prop_content_length = pool_intern("Prop-content-length");
keys.content_length = pool_intern("Content-length");
}
static void read_props(void)
{
uint32_t len;
uint32_t key = ~0;
char *val = NULL;
char *t;
while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) {
if (!strncmp(t, "K ", 2)) {
len = atoi(&t[2]);
key = pool_intern(buffer_read_string(len));
buffer_read_line();
} else if (!strncmp(t, "V ", 2)) {
len = atoi(&t[2]);
val = buffer_read_string(len);
if (key == keys.svn_log) {
/* Value length excludes terminating nul. */
rev_ctx.log = log_copy(len + 1, val);
} else if (key == keys.svn_author) {
rev_ctx.author = pool_intern(val);
} else if (key == keys.svn_date) {
if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
fprintf(stderr, "Invalid timestamp: %s\n", val);
} else if (key == keys.svn_executable) {
node_ctx.type = REPO_MODE_EXE;
} else if (key == keys.svn_special) {
node_ctx.type = REPO_MODE_LNK;
}
key = ~0;
buffer_read_line();
}
}
}
static void handle_node(void)
{
if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength)
read_props();
if (node_ctx.srcRev)
node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
if (node_ctx.textLength != LENGTH_UNKNOWN &&
node_ctx.type != REPO_MODE_DIR)
node_ctx.mark = next_blob_mark();
if (node_ctx.action == NODEACT_DELETE) {
repo_delete(node_ctx.dst);
} else if (node_ctx.action == NODEACT_CHANGE ||
node_ctx.action == NODEACT_REPLACE) {
if (node_ctx.action == NODEACT_REPLACE &&
node_ctx.type == REPO_MODE_DIR)
repo_replace(node_ctx.dst, node_ctx.mark);
else if (node_ctx.propLength != LENGTH_UNKNOWN)
repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
else if (node_ctx.textLength != LENGTH_UNKNOWN)
node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
} else if (node_ctx.action == NODEACT_ADD) {
if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN)
repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN)
node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) ||
node_ctx.textLength != LENGTH_UNKNOWN)
repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark);
}
if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode)
node_ctx.type = node_ctx.srcMode;
if (node_ctx.mark)
fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength);
else if (node_ctx.textLength != LENGTH_UNKNOWN)
buffer_skip_bytes(node_ctx.textLength);
}
static void handle_revision(void)
{
if (rev_ctx.revision)
repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
}
void svndump_read(const char *url)
{
char *val;
char *t;
uint32_t active_ctx = DUMP_CTX;
uint32_t len;
uint32_t key;
reset_dump_ctx(pool_intern(url));
while ((t = buffer_read_line())) {
val = strstr(t, ": ");
if (!val)
continue;
*val++ = '\0';
*val++ = '\0';
key = pool_intern(t);
if (key == keys.uuid) {
dump_ctx.uuid = pool_intern(val);
} else if (key == keys.revision_number) {
if (active_ctx == NODE_CTX)
handle_node();
if (active_ctx != DUMP_CTX)
handle_revision();
active_ctx = REV_CTX;
reset_rev_ctx(atoi(val));
} else if (key == keys.node_path) {
if (active_ctx == NODE_CTX)
handle_node();
active_ctx = NODE_CTX;
reset_node_ctx(val);
} else if (key == keys.node_kind) {
if (!strcmp(val, "dir"))
node_ctx.type = REPO_MODE_DIR;
else if (!strcmp(val, "file"))
node_ctx.type = REPO_MODE_BLB;
else
fprintf(stderr, "Unknown node-kind: %s\n", val);
} else if (key == keys.node_action) {
if (!strcmp(val, "delete")) {
node_ctx.action = NODEACT_DELETE;
} else if (!strcmp(val, "add")) {
node_ctx.action = NODEACT_ADD;
} else if (!strcmp(val, "change")) {
node_ctx.action = NODEACT_CHANGE;
} else if (!strcmp(val, "replace")) {
node_ctx.action = NODEACT_REPLACE;
} else {
fprintf(stderr, "Unknown node-action: %s\n", val);
node_ctx.action = NODEACT_UNKNOWN;
}
} else if (key == keys.node_copyfrom_path) {
pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
} else if (key == keys.node_copyfrom_rev) {
node_ctx.srcRev = atoi(val);
} else if (key == keys.text_content_length) {
node_ctx.textLength = atoi(val);
} else if (key == keys.prop_content_length) {
node_ctx.propLength = atoi(val);
} else if (key == keys.content_length) {
len = atoi(val);
buffer_read_line();
if (active_ctx == REV_CTX) {
read_props();
} else if (active_ctx == NODE_CTX) {
handle_node();
active_ctx = REV_CTX;
} else {
fprintf(stderr, "Unexpected content length header: %d\n", len);
buffer_skip_bytes(len);
}
}
}
if (active_ctx == NODE_CTX)
handle_node();
if (active_ctx != DUMP_CTX)
handle_revision();
}
void svndump_init(const char *filename)
{
buffer_init(filename);
repo_init();
reset_dump_ctx(~0);
reset_rev_ctx(0);
reset_node_ctx(NULL);
init_keys();
}
void svndump_deinit(void)
{
log_reset();
repo_reset();
reset_dump_ctx(~0);
reset_rev_ctx(0);
reset_node_ctx(NULL);
if (buffer_deinit())
fprintf(stderr, "Input error\n");
if (ferror(stdout))
fprintf(stderr, "Output error\n");
}
void svndump_reset(void)
{
log_reset();
buffer_reset();
repo_reset();
reset_dump_ctx(~0);
reset_rev_ctx(0);
reset_node_ctx(NULL);
}

9
vcs-svn/svndump.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef SVNDUMP_H_
#define SVNDUMP_H_
void svndump_init(const char *filename);
void svndump_read(const char *url);
void svndump_deinit(void);
void svndump_reset(void);
#endif

236
vcs-svn/trp.h Normal file
View File

@ -0,0 +1,236 @@
/*
* C macro implementation of treaps.
*
* Usage:
* #include <stdint.h>
* #include "trp.h"
* trp_gen(...)
*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/
#ifndef TRP_H_
#define TRP_H_
#define MAYBE_UNUSED __attribute__((__unused__))
/* Node structure. */
struct trp_node {
uint32_t trpn_left;
uint32_t trpn_right;
};
/* Root structure. */
struct trp_root {
uint32_t trp_root;
};
/* Pointer/Offset conversion. */
#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset))
#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer))
#define trpn_modify(a_base, a_offset) \
do { \
if ((a_offset) < a_base##_pool.committed) { \
uint32_t old_offset = (a_offset);\
(a_offset) = a_base##_alloc(1); \
*trpn_pointer(a_base, a_offset) = \
*trpn_pointer(a_base, old_offset); \
} \
} while (0)
/* Left accessors. */
#define trp_left_get(a_base, a_field, a_node) \
(trpn_pointer(a_base, a_node)->a_field.trpn_left)
#define trp_left_set(a_base, a_field, a_node, a_left) \
do { \
trpn_modify(a_base, a_node); \
trp_left_get(a_base, a_field, a_node) = (a_left); \
} while (0)
/* Right accessors. */
#define trp_right_get(a_base, a_field, a_node) \
(trpn_pointer(a_base, a_node)->a_field.trpn_right)
#define trp_right_set(a_base, a_field, a_node, a_right) \
do { \
trpn_modify(a_base, a_node); \
trp_right_get(a_base, a_field, a_node) = (a_right); \
} while (0)
/*
* Fibonacci hash function.
* The multiplier is the nearest prime to (2^32 times (5 - 1)/2).
* See Knuth §6.4: volume 3, 3rd ed, p518.
*/
#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node))
/* Priority accessors. */
#define trp_prio_get(a_node) trpn_hash(a_node)
/* Node initializer. */
#define trp_node_new(a_base, a_field, a_node) \
do { \
trp_left_set(a_base, a_field, (a_node), ~0); \
trp_right_set(a_base, a_field, (a_node), ~0); \
} while (0)
/* Internal utility macros. */
#define trpn_first(a_base, a_field, a_root, r_node) \
do { \
(r_node) = (a_root); \
if ((r_node) == ~0) \
return NULL; \
while (~trp_left_get(a_base, a_field, (r_node))) \
(r_node) = trp_left_get(a_base, a_field, (r_node)); \
} while (0)
#define trpn_rotate_left(a_base, a_field, a_node, r_node) \
do { \
(r_node) = trp_right_get(a_base, a_field, (a_node)); \
trp_right_set(a_base, a_field, (a_node), \
trp_left_get(a_base, a_field, (r_node))); \
trp_left_set(a_base, a_field, (r_node), (a_node)); \
} while (0)
#define trpn_rotate_right(a_base, a_field, a_node, r_node) \
do { \
(r_node) = trp_left_get(a_base, a_field, (a_node)); \
trp_left_set(a_base, a_field, (a_node), \
trp_right_get(a_base, a_field, (r_node))); \
trp_right_set(a_base, a_field, (r_node), (a_node)); \
} while (0)
#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \
a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \
{ \
uint32_t ret; \
trpn_first(a_base, a_field, treap->trp_root, ret); \
return trpn_pointer(a_base, ret); \
} \
a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \
{ \
uint32_t ret; \
uint32_t offset = trpn_offset(a_base, node); \
if (~trp_right_get(a_base, a_field, offset)) { \
trpn_first(a_base, a_field, \
trp_right_get(a_base, a_field, offset), ret); \
} else { \
uint32_t tnode = treap->trp_root; \
ret = ~0; \
while (1) { \
int cmp = (a_cmp)(trpn_pointer(a_base, offset), \
trpn_pointer(a_base, tnode)); \
if (cmp < 0) { \
ret = tnode; \
tnode = trp_left_get(a_base, a_field, tnode); \
} else if (cmp > 0) { \
tnode = trp_right_get(a_base, a_field, tnode); \
} else { \
break; \
} \
} \
} \
return trpn_pointer(a_base, ret); \
} \
a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \
{ \
int cmp; \
uint32_t ret = treap->trp_root; \
while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \
if (cmp < 0) { \
ret = trp_left_get(a_base, a_field, ret); \
} else { \
ret = trp_right_get(a_base, a_field, ret); \
} \
} \
return trpn_pointer(a_base, ret); \
} \
a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \
{ \
int cmp; \
uint32_t ret = treap->trp_root; \
while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \
if (cmp < 0) { \
if (!~trp_left_get(a_base, a_field, ret)) \
break; \
ret = trp_left_get(a_base, a_field, ret); \
} else { \
ret = trp_right_get(a_base, a_field, ret); \
} \
} \
return trpn_pointer(a_base, ret); \
} \
a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \
{ \
if (cur_node == ~0) { \
return ins_node; \
} else { \
uint32_t ret; \
int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \
trpn_pointer(a_base, cur_node)); \
if (cmp < 0) { \
uint32_t left = a_pre##insert_recurse( \
trp_left_get(a_base, a_field, cur_node), ins_node); \
trp_left_set(a_base, a_field, cur_node, left); \
if (trp_prio_get(left) < trp_prio_get(cur_node)) \
trpn_rotate_right(a_base, a_field, cur_node, ret); \
else \
ret = cur_node; \
} else { \
uint32_t right = a_pre##insert_recurse( \
trp_right_get(a_base, a_field, cur_node), ins_node); \
trp_right_set(a_base, a_field, cur_node, right); \
if (trp_prio_get(right) < trp_prio_get(cur_node)) \
trpn_rotate_left(a_base, a_field, cur_node, ret); \
else \
ret = cur_node; \
} \
return ret; \
} \
} \
a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \
{ \
uint32_t offset = trpn_offset(a_base, node); \
trp_node_new(a_base, a_field, offset); \
treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \
} \
a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \
{ \
int cmp = a_cmp(trpn_pointer(a_base, rem_node), \
trpn_pointer(a_base, cur_node)); \
if (cmp == 0) { \
uint32_t ret; \
uint32_t left = trp_left_get(a_base, a_field, cur_node); \
uint32_t right = trp_right_get(a_base, a_field, cur_node); \
if (left == ~0) { \
if (right == ~0) \
return ~0; \
} else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \
trpn_rotate_right(a_base, a_field, cur_node, ret); \
right = a_pre##remove_recurse(cur_node, rem_node); \
trp_right_set(a_base, a_field, ret, right); \
return ret; \
} \
trpn_rotate_left(a_base, a_field, cur_node, ret); \
left = a_pre##remove_recurse(cur_node, rem_node); \
trp_left_set(a_base, a_field, ret, left); \
return ret; \
} else if (cmp < 0) { \
uint32_t left = a_pre##remove_recurse( \
trp_left_get(a_base, a_field, cur_node), rem_node); \
trp_left_set(a_base, a_field, cur_node, left); \
return cur_node; \
} else { \
uint32_t right = a_pre##remove_recurse( \
trp_right_get(a_base, a_field, cur_node), rem_node); \
trp_right_set(a_base, a_field, cur_node, right); \
return cur_node; \
} \
} \
a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \
{ \
treap->trp_root = a_pre##remove_recurse(treap->trp_root, \
trpn_offset(a_base, node)); \
} \
#endif

103
vcs-svn/trp.txt Normal file
View File

@ -0,0 +1,103 @@
Motivation
==========
Treaps provide a memory-efficient binary search tree structure.
Insertion/deletion/search are about as about as fast in the average
case as red-black trees and the chances of worst-case behavior are
vanishingly small, thanks to (pseudo-)randomness. The bad worst-case
behavior is a small price to pay, given that treaps are much simpler
to implement.
API
===
The trp API generates a data structure and functions to handle a
large growing set of objects stored in a pool.
The caller:
. Specifies parameters for the generated functions with the
trp_gen(static, foo_, ...) macro.
. Allocates a `struct trp_root` variable and sets it to {~0}.
. Adds new nodes to the set using `foo_insert`.
. Can find a specific item in the set using `foo_search`.
. Can iterate over items in the set using `foo_first` and `foo_next`.
. Can remove an item from the set using `foo_remove`.
Example:
----
struct ex_node {
const char *s;
struct trp_node ex_link;
};
static struct trp_root ex_base = {~0};
obj_pool_gen(ex, struct ex_node, 4096);
trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp)
struct ex_node *item;
item = ex_pointer(ex_alloc(1));
item->s = "hello";
ex_insert(&ex_base, item);
item = ex_pointer(ex_alloc(1));
item->s = "goodbye";
ex_insert(&ex_base, item);
for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item))
printf("%s\n", item->s);
----
Functions
---------
trp_gen(attr, foo_, node_type, link_field, pool, cmp)::
Generate a type-specific treap implementation.
+
. The storage class for generated functions will be 'attr' (e.g., `static`).
. Generated function names are prefixed with 'foo_' (e.g., `treap_`).
. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`).
This type must be a struct with at least one `struct trp_node` field
to point to its children.
. The field used to access child nodes will be 'link_field'.
. All treap nodes must lie in the 'pool' object pool.
. Treap nodes must be totally ordered by the 'cmp' relation, with the
following prototype:
+
int (*cmp)(node_type \*a, node_type \*b)
+
and returning a value less than, equal to, or greater than zero
according to the result of comparison.
void foo_insert(struct trp_root *treap, node_type \*node)::
Insert node into treap. If inserted multiple times,
a node will appear in the treap multiple times.
void foo_remove(struct trp_root *treap, node_type \*node)::
Remove node from treap. Caller must ensure node is
present in treap before using this function.
node_type *foo_search(struct trp_root \*treap, node_type \*key)::
Search for a node that matches key. If no match is found,
result is NULL.
node_type *foo_nsearch(struct trp_root \*treap, node_type \*key)::
Like `foo_search`, but if if the key is missing return what
would be key's successor, were key in treap (NULL if no
successor).
node_type *foo_first(struct trp_root \*treap)::
Find the first item from the treap, in sorted order.
node_type *foo_next(struct trp_root \*treap, node_type \*node)::
Find the next item.