From 0db71e0fa94c1857f98890928098e8f4c8ac6f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 1 Apr 2012 00:10:11 +0200 Subject: [PATCH 1/4] add mergesort() for linked lists This adds a generic bottom-up mergesort implementation for singly linked lists. It was inspired by Simon Tatham's webpage on the topic[1], but not so much by his implementation -- for no good reason, really, just a case of NIH. [1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- .gitignore | 1 + Makefile | 3 ++ mergesort.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ mergesort.h | 9 ++++++ test-mergesort.c | 52 ++++++++++++++++++++++++++++++++++ 5 files changed, 138 insertions(+) create mode 100644 mergesort.c create mode 100644 mergesort.h create mode 100644 test-mergesort.c diff --git a/.gitignore b/.gitignore index 3b7680ea1e..1787c8185d 100644 --- a/.gitignore +++ b/.gitignore @@ -180,6 +180,7 @@ /test-index-version /test-line-buffer /test-match-trees +/test-mergesort /test-mktemp /test-obj-pool /test-parse-options diff --git a/Makefile b/Makefile index a782409306..330a7d5ae9 100644 --- a/Makefile +++ b/Makefile @@ -465,6 +465,7 @@ TEST_PROGRAMS_NEED_X += test-genrandom TEST_PROGRAMS_NEED_X += test-index-version TEST_PROGRAMS_NEED_X += test-line-buffer TEST_PROGRAMS_NEED_X += test-match-trees +TEST_PROGRAMS_NEED_X += test-mergesort TEST_PROGRAMS_NEED_X += test-mktemp TEST_PROGRAMS_NEED_X += test-obj-pool TEST_PROGRAMS_NEED_X += test-parse-options @@ -578,6 +579,7 @@ LIB_H += log-tree.h LIB_H += mailmap.h LIB_H += merge-file.h LIB_H += merge-recursive.h +LIB_H += mergesort.h LIB_H += notes.h LIB_H += notes-cache.h LIB_H += notes-merge.h @@ -681,6 +683,7 @@ LIB_OBJS += mailmap.o LIB_OBJS += match-trees.o LIB_OBJS += merge-file.o LIB_OBJS += merge-recursive.o +LIB_OBJS += mergesort.o LIB_OBJS += name-hash.o LIB_OBJS += notes.o LIB_OBJS += notes-cache.o diff --git a/mergesort.c b/mergesort.c new file mode 100644 index 0000000000..d084c602d5 --- /dev/null +++ b/mergesort.c @@ -0,0 +1,73 @@ +#include "cache.h" +#include "mergesort.h" + +struct mergesort_sublist { + void *ptr; + unsigned long len; +}; + +static void *get_nth_next(void *list, unsigned long n, + void *(*get_next_fn)(const void *)) +{ + while (n-- && list) + list = get_next_fn(list); + return list; +} + +static void *pop_item(struct mergesort_sublist *l, + void *(*get_next_fn)(const void *)) +{ + void *p = l->ptr; + l->ptr = get_next_fn(l->ptr); + l->len = l->ptr ? (l->len - 1) : 0; + return p; +} + +void *mergesort(void *list, + void *(*get_next_fn)(const void *), + void (*set_next_fn)(void *, void *), + int (*compare_fn)(const void *, const void *)) +{ + unsigned long l; + + if (!list) + return NULL; + for (l = 1; ; l *= 2) { + void *curr; + struct mergesort_sublist p, q; + + p.ptr = list; + q.ptr = get_nth_next(p.ptr, l, get_next_fn); + if (!q.ptr) + break; + p.len = q.len = l; + + if (compare_fn(p.ptr, q.ptr) > 0) + list = curr = pop_item(&q, get_next_fn); + else + list = curr = pop_item(&p, get_next_fn); + + while (p.ptr) { + while (p.len || q.len) { + void *prev = curr; + + if (!p.len) + curr = pop_item(&q, get_next_fn); + else if (!q.len) + curr = pop_item(&p, get_next_fn); + else if (compare_fn(p.ptr, q.ptr) > 0) + curr = pop_item(&q, get_next_fn); + else + curr = pop_item(&p, get_next_fn); + set_next_fn(prev, curr); + } + p.ptr = q.ptr; + p.len = l; + q.ptr = get_nth_next(p.ptr, l, get_next_fn); + q.len = q.ptr ? l : 0; + + } + set_next_fn(curr, NULL); + } + return list; +} diff --git a/mergesort.h b/mergesort.h new file mode 100644 index 0000000000..d6e5f4a732 --- /dev/null +++ b/mergesort.h @@ -0,0 +1,9 @@ +#ifndef MERGESORT_H +#define MERGESORT_H + +void *mergesort(void *list, + void *(*get_next_fn)(const void *), + void (*set_next_fn)(void *, void *), + int (*compare_fn)(const void *, const void *)); + +#endif diff --git a/test-mergesort.c b/test-mergesort.c new file mode 100644 index 0000000000..1dd82fd67f --- /dev/null +++ b/test-mergesort.c @@ -0,0 +1,52 @@ +#include "cache.h" +#include "mergesort.h" + +struct line { + char *text; + struct line *next; +}; + +static void *get_next(const void *a) +{ + return ((const struct line *)a)->next; +} + +static void set_next(void *a, void *b) +{ + ((struct line *)a)->next = b; +} + +static int compare_strings(const void *a, const void *b) +{ + const struct line *x = a, *y = b; + return strcmp(x->text, y->text); +} + +int main(int argc, const char **argv) +{ + struct line *line, *p = NULL, *lines = NULL; + struct strbuf sb = STRBUF_INIT; + + for (;;) { + if (strbuf_getwholeline(&sb, stdin, '\n')) + break; + line = xmalloc(sizeof(struct line)); + line->text = strbuf_detach(&sb, NULL); + if (p) { + line->next = p->next; + p->next = line; + } else { + line->next = NULL; + lines = line; + } + p = line; + } + + lines = mergesort(lines, get_next, set_next, compare_strings); + + while (lines) { + printf("%s", lines->text); + lines = lines->next; + } + return 0; +} From 46905893b20ac2a044c06a0eecc12425a8405e69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 1 Apr 2012 00:10:39 +0200 Subject: [PATCH 2/4] commit: use mergesort() in commit_list_sort_by_date() Replace the insertion sort in commit_list_sort_by_date() with a call to the generic mergesort function. This sets the stage for using commit_list_sort_by_date() for larger lists, as shown in the next patch. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- commit.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/commit.c b/commit.c index 35af4988f0..b9ce569442 100644 --- a/commit.c +++ b/commit.c @@ -7,6 +7,7 @@ #include "revision.h" #include "notes.h" #include "gpg-interface.h" +#include "mergesort.h" int save_commit_buffer = 1; @@ -390,15 +391,31 @@ struct commit_list * commit_list_insert_by_date(struct commit *item, struct comm return commit_list_insert(item, pp); } +static int commit_list_compare_by_date(const void *a, const void *b) +{ + unsigned long a_date = ((const struct commit_list *)a)->item->date; + unsigned long b_date = ((const struct commit_list *)b)->item->date; + if (a_date < b_date) + return 1; + if (a_date > b_date) + return -1; + return 0; +} + +static void *commit_list_get_next(const void *a) +{ + return ((const struct commit_list *)a)->next; +} + +static void commit_list_set_next(void *a, void *next) +{ + ((struct commit_list *)a)->next = next; +} void commit_list_sort_by_date(struct commit_list **list) { - struct commit_list *ret = NULL; - while (*list) { - commit_list_insert_by_date((*list)->item, &ret); - *list = (*list)->next; - } - *list = ret; + *list = mergesort(*list, commit_list_get_next, commit_list_set_next, + commit_list_compare_by_date); } struct commit *pop_most_recent_commit(struct commit_list **list, From fbc08ea177f8284d10c62ad39de51edb21af88b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 1 Apr 2012 00:11:01 +0200 Subject: [PATCH 3/4] revision: insert unsorted, then sort in prepare_revision_walk() Speed up prepare_revision_walk() by adding commits without sorting to the commit_list and at the end sort the list in one go. Thanks to mergesort() working behind the scenes, this is a lot faster for large numbers of commits than the current insert sort. Also introduce and use commit_list_reverse(), to keep the ordering of commits sharing the same commit date unchanged. That's because commit_list_insert_by_date() sorts commits with descending date, but adds later entries with the same date entries last, while commit_list_insert() always inserts entries at the top. The following commit_list_sort_by_date() keeps the order of entries sharing the same date. Jeff's test case, in a repo with lots of refs, was to run: # make a new commit on top of HEAD, but not yet referenced sha1=`git commit-tree HEAD^{tree} -p HEAD Signed-off-by: Junio C Hamano --- commit.c | 15 +++++++++++++++ commit.h | 1 + revision.c | 4 +++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/commit.c b/commit.c index b9ce569442..0759b2ca65 100644 --- a/commit.c +++ b/commit.c @@ -361,6 +361,21 @@ struct commit_list *commit_list_insert(struct commit *item, struct commit_list * return new_list; } +void commit_list_reverse(struct commit_list **list_p) +{ + struct commit_list *prev = NULL, *curr = *list_p, *next; + + if (!list_p) + return; + while (curr) { + next = curr->next; + curr->next = prev; + prev = curr; + curr = next; + } + *list_p = prev; +} + unsigned commit_list_count(const struct commit_list *l) { unsigned c = 0; diff --git a/commit.h b/commit.h index 154c0e34ff..f8d250d6f6 100644 --- a/commit.h +++ b/commit.h @@ -57,6 +57,7 @@ unsigned commit_list_count(const struct commit_list *l); struct commit_list *commit_list_insert_by_date(struct commit *item, struct commit_list **list); void commit_list_sort_by_date(struct commit_list **list); +void commit_list_reverse(struct commit_list **list_p); void free_commit_list(struct commit_list *list); diff --git a/revision.c b/revision.c index 064e351084..a75a1d7201 100644 --- a/revision.c +++ b/revision.c @@ -2054,11 +2054,13 @@ int prepare_revision_walk(struct rev_info *revs) if (commit) { if (!(commit->object.flags & SEEN)) { commit->object.flags |= SEEN; - commit_list_insert_by_date(commit, &revs->commits); + commit_list_insert(commit, &revs->commits); } } e++; } + commit_list_reverse(&revs->commits); + commit_list_sort_by_date(&revs->commits); if (!revs->leak_pending) free(list); From 7365c95d2d67cbbb74c2040918d2ecde06231d93 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 17 Apr 2012 11:07:01 -0700 Subject: [PATCH 4/4] mergesort: rename it to llist_mergesort() Even though the function is generic enough, sort() inherits connotations from the standard function qsort() that sorts an array. Rename it to llist_mergesort() and describe the external interface in its header file. This incidentally avoids name clashes with mergesort() some platforms declare in, and contaminate user namespace with, their . Reported-by: Brian Gernhardt Signed-off-by: Junio C Hamano --- commit.c | 4 ++-- mergesort.c | 8 ++++---- mergesort.h | 16 ++++++++++++---- test-mergesort.c | 2 +- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/commit.c b/commit.c index 0759b2ca65..84304c00ff 100644 --- a/commit.c +++ b/commit.c @@ -429,8 +429,8 @@ static void commit_list_set_next(void *a, void *next) void commit_list_sort_by_date(struct commit_list **list) { - *list = mergesort(*list, commit_list_get_next, commit_list_set_next, - commit_list_compare_by_date); + *list = llist_mergesort(*list, commit_list_get_next, commit_list_set_next, + commit_list_compare_by_date); } struct commit *pop_most_recent_commit(struct commit_list **list, diff --git a/mergesort.c b/mergesort.c index d084c602d5..e5fdf2ee4a 100644 --- a/mergesort.c +++ b/mergesort.c @@ -23,10 +23,10 @@ static void *pop_item(struct mergesort_sublist *l, return p; } -void *mergesort(void *list, - void *(*get_next_fn)(const void *), - void (*set_next_fn)(void *, void *), - int (*compare_fn)(const void *, const void *)) +void *llist_mergesort(void *list, + void *(*get_next_fn)(const void *), + void (*set_next_fn)(void *, void *), + int (*compare_fn)(const void *, const void *)) { unsigned long l; diff --git a/mergesort.h b/mergesort.h index d6e5f4a732..644cff1f96 100644 --- a/mergesort.h +++ b/mergesort.h @@ -1,9 +1,17 @@ #ifndef MERGESORT_H #define MERGESORT_H -void *mergesort(void *list, - void *(*get_next_fn)(const void *), - void (*set_next_fn)(void *, void *), - int (*compare_fn)(const void *, const void *)); +/* + * Sort linked list in place. + * - get_next_fn() returns the next element given an element of a linked list. + * - set_next_fn() takes two elements A and B, and makes B the "next" element + * of A on the list. + * - compare_fn() takes two elements A and B, and returns negative, 0, positive + * as the same sign as "subtracting" B from A. + */ +void *llist_mergesort(void *list, + void *(*get_next_fn)(const void *), + void (*set_next_fn)(void *, void *), + int (*compare_fn)(const void *, const void *)); #endif diff --git a/test-mergesort.c b/test-mergesort.c index 1dd82fd67f..3f388b4ce0 100644 --- a/test-mergesort.c +++ b/test-mergesort.c @@ -42,7 +42,7 @@ int main(int argc, const char **argv) p = line; } - lines = mergesort(lines, get_next, set_next, compare_strings); + lines = llist_mergesort(lines, get_next, set_next, compare_strings); while (lines) { printf("%s", lines->text);