From e6812cfa9aba69a8c9d83b0710291b27bff0f7a3 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Sun, 5 May 2013 17:38:53 -0500 Subject: [PATCH 1/3] fast-export: do not parse non-commit objects while reading marks file We read from the marks file and keep only marked commits, but in order to find the type of object, we are parsing the whole thing, which is slow, specially in big repositories with lots of big files. There's no need for that, we can query the object information with sha1_object_info(). Before this, loading the objects of a fresh emacs import, with 260598 blobs took 14 minutes, after this patch, it takes 3 seconds. This is the way fast-import does it. Also die if the object is not found (like fast-import). Signed-off-by: Felipe Contreras Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index d60d675f6f..dd561e5031 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -613,6 +613,7 @@ static void import_marks(char *input_file) char *line_end, *mark_end; unsigned char sha1[20]; struct object *object; + enum object_type type; line_end = strchr(line, '\n'); if (line[0] != ':' || !line_end) @@ -627,17 +628,19 @@ static void import_marks(char *input_file) if (last_idnum < mark) last_idnum = mark; - object = parse_object(sha1); - if (!object) + type = sha1_object_info(sha1, NULL); + if (type < 0) + die("object not found: %s", sha1_to_hex(sha1)); + + if (type != OBJ_COMMIT) + /* only commits */ continue; + object = parse_object(sha1); + if (object->flags & SHOWN) error("Object %s already has a mark", sha1_to_hex(sha1)); - if (object->type != OBJ_COMMIT) - /* only commits */ - continue; - mark_object(object, mark); object->flags |= SHOWN; From 47bd9bf82daeac71b28a5a697ecc44e70b205e18 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Sun, 5 May 2013 17:38:54 -0500 Subject: [PATCH 2/3] fast-export: don't parse commits while reading marks file We don't need the parsed objects at this point, merely the information that they have marks. Seems to be three times faster in my setup with lots of objects. Signed-off-by: Felipe Contreras Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index dd561e5031..18fdfb31af 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -613,6 +613,7 @@ static void import_marks(char *input_file) char *line_end, *mark_end; unsigned char sha1[20]; struct object *object; + struct commit *commit; enum object_type type; line_end = strchr(line, '\n'); @@ -636,7 +637,11 @@ static void import_marks(char *input_file) /* only commits */ continue; - object = parse_object(sha1); + commit = lookup_commit(sha1); + if (!commit) + die("not a commit? can't happen: %s", sha1_to_hex(sha1)); + + object = &commit->object; if (object->flags & SHOWN) error("Object %s already has a mark", sha1_to_hex(sha1)); From 45c5d4a56bc3ef3b5088a07bdab12cef8163e61d Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Sun, 5 May 2013 17:38:52 -0500 Subject: [PATCH 3/3] fast-{import,export}: use get_sha1_hex() to read from marks file It's wrong to call get_sha1() if they should be SHA-1s, plus inefficient. Signed-off-by: Felipe Contreras Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 2 +- fast-import.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 18fdfb31af..d1d68e9fc6 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -623,7 +623,7 @@ static void import_marks(char *input_file) mark = strtoumax(line + 1, &mark_end, 10); if (!mark || mark_end == line + 1 - || *mark_end != ' ' || get_sha1(mark_end + 1, sha1)) + || *mark_end != ' ' || get_sha1_hex(mark_end + 1, sha1)) die("corrupt mark line: %s", line); if (last_idnum < mark) diff --git a/fast-import.c b/fast-import.c index 5f539d7d8f..3f3214935f 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1822,7 +1822,7 @@ static void read_marks(void) *end = 0; mark = strtoumax(line + 1, &end, 10); if (!mark || end == line + 1 - || *end != ' ' || get_sha1(end + 1, sha1)) + || *end != ' ' || get_sha1_hex(end + 1, sha1)) die("corrupt mark line: %s", line); e = find_object(sha1); if (!e) {