Merge branch 'rs/mergesort'
The mergesort implementation used to sort linked list has been optimized. * rs/mergesort: test-mergesort: use repeatable random numbers mergesort: use ranks stack p0071: test performance of llist_mergesort() p0071: measure sorting of already sorted and reversed files test-mergesort: add unriffle_skewed mode test-mergesort: add unriffle mode test-mergesort: add generate subcommand test-mergesort: add test subcommand test-mergesort: add sort subcommand test-mergesort: use strbuf_getline()
This commit is contained in:
commit
0ef08090d2
123
mergesort.c
123
mergesort.c
@ -1,73 +1,84 @@
|
||||
#include "cache.h"
|
||||
#include "mergesort.h"
|
||||
|
||||
struct mergesort_sublist {
|
||||
void *ptr;
|
||||
unsigned long len;
|
||||
};
|
||||
|
||||
static void *get_nth_next(void *list, unsigned long n,
|
||||
void *(*get_next_fn)(const void *))
|
||||
/* Combine two sorted lists. Take from `list` on equality. */
|
||||
static void *llist_merge(void *list, void *other,
|
||||
void *(*get_next_fn)(const void *),
|
||||
void (*set_next_fn)(void *, void *),
|
||||
int (*compare_fn)(const void *, const void *))
|
||||
{
|
||||
while (n-- && list)
|
||||
list = get_next_fn(list);
|
||||
return list;
|
||||
}
|
||||
|
||||
static void *pop_item(struct mergesort_sublist *l,
|
||||
void *(*get_next_fn)(const void *))
|
||||
{
|
||||
void *p = l->ptr;
|
||||
l->ptr = get_next_fn(l->ptr);
|
||||
l->len = l->ptr ? (l->len - 1) : 0;
|
||||
return p;
|
||||
void *result = list, *tail;
|
||||
|
||||
if (compare_fn(list, other) > 0) {
|
||||
result = other;
|
||||
goto other;
|
||||
}
|
||||
for (;;) {
|
||||
do {
|
||||
tail = list;
|
||||
list = get_next_fn(list);
|
||||
if (!list) {
|
||||
set_next_fn(tail, other);
|
||||
return result;
|
||||
}
|
||||
} while (compare_fn(list, other) <= 0);
|
||||
set_next_fn(tail, other);
|
||||
other:
|
||||
do {
|
||||
tail = other;
|
||||
other = get_next_fn(other);
|
||||
if (!other) {
|
||||
set_next_fn(tail, list);
|
||||
return result;
|
||||
}
|
||||
} while (compare_fn(list, other) > 0);
|
||||
set_next_fn(tail, list);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform an iterative mergesort using an array of sublists.
|
||||
*
|
||||
* n is the number of items.
|
||||
* ranks[i] is undefined if n & 2^i == 0, and assumed empty.
|
||||
* ranks[i] contains a sublist of length 2^i otherwise.
|
||||
*
|
||||
* The number of bits in a void pointer limits the number of objects
|
||||
* that can be created, and thus the number of array elements necessary
|
||||
* to be able to sort any valid list.
|
||||
*
|
||||
* Adding an item to this array is like incrementing a binary number;
|
||||
* positional values for set bits correspond to sublist lengths.
|
||||
*/
|
||||
void *llist_mergesort(void *list,
|
||||
void *(*get_next_fn)(const void *),
|
||||
void (*set_next_fn)(void *, void *),
|
||||
int (*compare_fn)(const void *, const void *))
|
||||
{
|
||||
unsigned long l;
|
||||
void *ranks[bitsizeof(void *)];
|
||||
size_t n = 0;
|
||||
int i;
|
||||
|
||||
if (!list)
|
||||
return NULL;
|
||||
for (l = 1; ; l *= 2) {
|
||||
void *curr;
|
||||
struct mergesort_sublist p, q;
|
||||
while (list) {
|
||||
void *next = get_next_fn(list);
|
||||
if (next)
|
||||
set_next_fn(list, NULL);
|
||||
for (i = 0; n & (1 << i); i++)
|
||||
list = llist_merge(ranks[i], list, get_next_fn,
|
||||
set_next_fn, compare_fn);
|
||||
n++;
|
||||
ranks[i] = list;
|
||||
list = next;
|
||||
}
|
||||
|
||||
p.ptr = list;
|
||||
q.ptr = get_nth_next(p.ptr, l, get_next_fn);
|
||||
if (!q.ptr)
|
||||
break;
|
||||
p.len = q.len = l;
|
||||
|
||||
if (compare_fn(p.ptr, q.ptr) > 0)
|
||||
list = curr = pop_item(&q, get_next_fn);
|
||||
for (i = 0; n; i++, n >>= 1) {
|
||||
if (!(n & 1))
|
||||
continue;
|
||||
if (list)
|
||||
list = llist_merge(ranks[i], list, get_next_fn,
|
||||
set_next_fn, compare_fn);
|
||||
else
|
||||
list = curr = pop_item(&p, get_next_fn);
|
||||
|
||||
while (p.ptr) {
|
||||
while (p.len || q.len) {
|
||||
void *prev = curr;
|
||||
|
||||
if (!p.len)
|
||||
curr = pop_item(&q, get_next_fn);
|
||||
else if (!q.len)
|
||||
curr = pop_item(&p, get_next_fn);
|
||||
else if (compare_fn(p.ptr, q.ptr) > 0)
|
||||
curr = pop_item(&q, get_next_fn);
|
||||
else
|
||||
curr = pop_item(&p, get_next_fn);
|
||||
set_next_fn(prev, curr);
|
||||
}
|
||||
p.ptr = q.ptr;
|
||||
p.len = l;
|
||||
q.ptr = get_nth_next(p.ptr, l, get_next_fn);
|
||||
q.len = q.ptr ? l : 0;
|
||||
|
||||
}
|
||||
set_next_fn(curr, NULL);
|
||||
list = ranks[i];
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
@ -2,6 +2,12 @@
|
||||
#include "cache.h"
|
||||
#include "mergesort.h"
|
||||
|
||||
static uint32_t minstd_rand(uint32_t *state)
|
||||
{
|
||||
*state = (uint64_t)*state * 48271 % 2147483647;
|
||||
return *state;
|
||||
}
|
||||
|
||||
struct line {
|
||||
char *text;
|
||||
struct line *next;
|
||||
@ -23,14 +29,12 @@ static int compare_strings(const void *a, const void *b)
|
||||
return strcmp(x->text, y->text);
|
||||
}
|
||||
|
||||
int cmd__mergesort(int argc, const char **argv)
|
||||
static int sort_stdin(void)
|
||||
{
|
||||
struct line *line, *p = NULL, *lines = NULL;
|
||||
struct strbuf sb = STRBUF_INIT;
|
||||
|
||||
for (;;) {
|
||||
if (strbuf_getwholeline(&sb, stdin, '\n'))
|
||||
break;
|
||||
while (!strbuf_getline(&sb, stdin)) {
|
||||
line = xmalloc(sizeof(struct line));
|
||||
line->text = strbuf_detach(&sb, NULL);
|
||||
if (p) {
|
||||
@ -46,8 +50,362 @@ int cmd__mergesort(int argc, const char **argv)
|
||||
lines = llist_mergesort(lines, get_next, set_next, compare_strings);
|
||||
|
||||
while (lines) {
|
||||
printf("%s", lines->text);
|
||||
puts(lines->text);
|
||||
lines = lines->next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dist_sawtooth(int *arr, int n, int m)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; i++)
|
||||
arr[i] = i % m;
|
||||
}
|
||||
|
||||
static void dist_rand(int *arr, int n, int m)
|
||||
{
|
||||
int i;
|
||||
uint32_t seed = 1;
|
||||
for (i = 0; i < n; i++)
|
||||
arr[i] = minstd_rand(&seed) % m;
|
||||
}
|
||||
|
||||
static void dist_stagger(int *arr, int n, int m)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; i++)
|
||||
arr[i] = (i * m + i) % n;
|
||||
}
|
||||
|
||||
static void dist_plateau(int *arr, int n, int m)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; i++)
|
||||
arr[i] = (i < m) ? i : m;
|
||||
}
|
||||
|
||||
static void dist_shuffle(int *arr, int n, int m)
|
||||
{
|
||||
int i, j, k;
|
||||
uint32_t seed = 1;
|
||||
for (i = j = 0, k = 1; i < n; i++)
|
||||
arr[i] = minstd_rand(&seed) % m ? (j += 2) : (k += 2);
|
||||
}
|
||||
|
||||
#define DIST(name) { #name, dist_##name }
|
||||
|
||||
static struct dist {
|
||||
const char *name;
|
||||
void (*fn)(int *arr, int n, int m);
|
||||
} dist[] = {
|
||||
DIST(sawtooth),
|
||||
DIST(rand),
|
||||
DIST(stagger),
|
||||
DIST(plateau),
|
||||
DIST(shuffle),
|
||||
};
|
||||
|
||||
static const struct dist *get_dist_by_name(const char *name)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ARRAY_SIZE(dist); i++) {
|
||||
if (!strcmp(dist[i].name, name))
|
||||
return &dist[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void mode_copy(int *arr, int n)
|
||||
{
|
||||
/* nothing */
|
||||
}
|
||||
|
||||
static void mode_reverse(int *arr, int n)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0, j = n - 1; i < j; i++, j--)
|
||||
SWAP(arr[i], arr[j]);
|
||||
}
|
||||
|
||||
static void mode_reverse_1st_half(int *arr, int n)
|
||||
{
|
||||
mode_reverse(arr, n / 2);
|
||||
}
|
||||
|
||||
static void mode_reverse_2nd_half(int *arr, int n)
|
||||
{
|
||||
int half = n / 2;
|
||||
mode_reverse(arr + half, n - half);
|
||||
}
|
||||
|
||||
static int compare_ints(const void *av, const void *bv)
|
||||
{
|
||||
const int *ap = av, *bp = bv;
|
||||
int a = *ap, b = *bp;
|
||||
return (a > b) - (a < b);
|
||||
}
|
||||
|
||||
static void mode_sort(int *arr, int n)
|
||||
{
|
||||
QSORT(arr, n, compare_ints);
|
||||
}
|
||||
|
||||
static void mode_dither(int *arr, int n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; i++)
|
||||
arr[i] += i % 5;
|
||||
}
|
||||
|
||||
static void unriffle(int *arr, int n, int *tmp)
|
||||
{
|
||||
int i, j;
|
||||
COPY_ARRAY(tmp, arr, n);
|
||||
for (i = j = 0; i < n; i += 2)
|
||||
arr[j++] = tmp[i];
|
||||
for (i = 1; i < n; i += 2)
|
||||
arr[j++] = tmp[i];
|
||||
}
|
||||
|
||||
static void unriffle_recursively(int *arr, int n, int *tmp)
|
||||
{
|
||||
if (n > 1) {
|
||||
int half = n / 2;
|
||||
unriffle(arr, n, tmp);
|
||||
unriffle_recursively(arr, half, tmp);
|
||||
unriffle_recursively(arr + half, n - half, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
static void mode_unriffle(int *arr, int n)
|
||||
{
|
||||
int *tmp;
|
||||
ALLOC_ARRAY(tmp, n);
|
||||
unriffle_recursively(arr, n, tmp);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
static unsigned int prev_pow2(unsigned int n)
|
||||
{
|
||||
unsigned int pow2 = 1;
|
||||
while (pow2 * 2 < n)
|
||||
pow2 *= 2;
|
||||
return pow2;
|
||||
}
|
||||
|
||||
static void unriffle_recursively_skewed(int *arr, int n, int *tmp)
|
||||
{
|
||||
if (n > 1) {
|
||||
int pow2 = prev_pow2(n);
|
||||
int rest = n - pow2;
|
||||
unriffle(arr + pow2 - rest, rest * 2, tmp);
|
||||
unriffle_recursively_skewed(arr, pow2, tmp);
|
||||
unriffle_recursively_skewed(arr + pow2, rest, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
static void mode_unriffle_skewed(int *arr, int n)
|
||||
{
|
||||
int *tmp;
|
||||
ALLOC_ARRAY(tmp, n);
|
||||
unriffle_recursively_skewed(arr, n, tmp);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
#define MODE(name) { #name, mode_##name }
|
||||
|
||||
static struct mode {
|
||||
const char *name;
|
||||
void (*fn)(int *arr, int n);
|
||||
} mode[] = {
|
||||
MODE(copy),
|
||||
MODE(reverse),
|
||||
MODE(reverse_1st_half),
|
||||
MODE(reverse_2nd_half),
|
||||
MODE(sort),
|
||||
MODE(dither),
|
||||
MODE(unriffle),
|
||||
MODE(unriffle_skewed),
|
||||
};
|
||||
|
||||
static const struct mode *get_mode_by_name(const char *name)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ARRAY_SIZE(mode); i++) {
|
||||
if (!strcmp(mode[i].name, name))
|
||||
return &mode[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int generate(int argc, const char **argv)
|
||||
{
|
||||
const struct dist *dist = NULL;
|
||||
const struct mode *mode = NULL;
|
||||
int i, n, m, *arr;
|
||||
|
||||
if (argc != 4)
|
||||
return 1;
|
||||
|
||||
dist = get_dist_by_name(argv[0]);
|
||||
mode = get_mode_by_name(argv[1]);
|
||||
n = strtol(argv[2], NULL, 10);
|
||||
m = strtol(argv[3], NULL, 10);
|
||||
if (!dist || !mode)
|
||||
return 1;
|
||||
|
||||
ALLOC_ARRAY(arr, n);
|
||||
dist->fn(arr, n, m);
|
||||
mode->fn(arr, n);
|
||||
for (i = 0; i < n; i++)
|
||||
printf("%08x\n", arr[i]);
|
||||
free(arr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct stats {
|
||||
int get_next, set_next, compare;
|
||||
} stats;
|
||||
|
||||
struct number {
|
||||
int value, rank;
|
||||
struct number *next;
|
||||
};
|
||||
|
||||
static void *get_next_number(const void *a)
|
||||
{
|
||||
stats.get_next++;
|
||||
return ((const struct number *)a)->next;
|
||||
}
|
||||
|
||||
static void set_next_number(void *a, void *b)
|
||||
{
|
||||
stats.set_next++;
|
||||
((struct number *)a)->next = b;
|
||||
}
|
||||
|
||||
static int compare_numbers(const void *av, const void *bv)
|
||||
{
|
||||
const struct number *an = av, *bn = bv;
|
||||
int a = an->value, b = bn->value;
|
||||
stats.compare++;
|
||||
return (a > b) - (a < b);
|
||||
}
|
||||
|
||||
static void clear_numbers(struct number *list)
|
||||
{
|
||||
while (list) {
|
||||
struct number *next = list->next;
|
||||
free(list);
|
||||
list = next;
|
||||
}
|
||||
}
|
||||
|
||||
static int test(const struct dist *dist, const struct mode *mode, int n, int m)
|
||||
{
|
||||
int *arr;
|
||||
size_t i;
|
||||
struct number *curr, *list, **tail;
|
||||
int is_sorted = 1;
|
||||
int is_stable = 1;
|
||||
const char *verdict;
|
||||
int result = -1;
|
||||
|
||||
ALLOC_ARRAY(arr, n);
|
||||
dist->fn(arr, n, m);
|
||||
mode->fn(arr, n);
|
||||
for (i = 0, tail = &list; i < n; i++) {
|
||||
curr = xmalloc(sizeof(*curr));
|
||||
curr->value = arr[i];
|
||||
curr->rank = i;
|
||||
*tail = curr;
|
||||
tail = &curr->next;
|
||||
}
|
||||
*tail = NULL;
|
||||
|
||||
stats.get_next = stats.set_next = stats.compare = 0;
|
||||
list = llist_mergesort(list, get_next_number, set_next_number,
|
||||
compare_numbers);
|
||||
|
||||
QSORT(arr, n, compare_ints);
|
||||
for (i = 0, curr = list; i < n && curr; i++, curr = curr->next) {
|
||||
if (arr[i] != curr->value)
|
||||
is_sorted = 0;
|
||||
if (curr->next && curr->value == curr->next->value &&
|
||||
curr->rank >= curr->next->rank)
|
||||
is_stable = 0;
|
||||
}
|
||||
if (i < n) {
|
||||
verdict = "too short";
|
||||
} else if (curr) {
|
||||
verdict = "too long";
|
||||
} else if (!is_sorted) {
|
||||
verdict = "not sorted";
|
||||
} else if (!is_stable) {
|
||||
verdict = "unstable";
|
||||
} else {
|
||||
verdict = "OK";
|
||||
result = 0;
|
||||
}
|
||||
|
||||
printf("%-9s %-16s %8d %8d %8d %8d %8d %s\n",
|
||||
dist->name, mode->name, n, m, stats.get_next, stats.set_next,
|
||||
stats.compare, verdict);
|
||||
|
||||
clear_numbers(list);
|
||||
free(arr);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* A version of the qsort certification program from "Engineering a Sort
|
||||
* Function" by Bentley and McIlroy, Software—Practice and Experience,
|
||||
* Volume 23, Issue 11, 1249–1265 (November 1993).
|
||||
*/
|
||||
static int run_tests(int argc, const char **argv)
|
||||
{
|
||||
const char *argv_default[] = { "100", "1023", "1024", "1025" };
|
||||
if (!argc)
|
||||
return run_tests(ARRAY_SIZE(argv_default), argv_default);
|
||||
printf("%-9s %-16s %8s %8s %8s %8s %8s %s\n",
|
||||
"distribut", "mode", "n", "m", "get_next", "set_next",
|
||||
"compare", "verdict");
|
||||
while (argc--) {
|
||||
int i, j, m, n = strtol(*argv++, NULL, 10);
|
||||
for (i = 0; i < ARRAY_SIZE(dist); i++) {
|
||||
for (j = 0; j < ARRAY_SIZE(mode); j++) {
|
||||
for (m = 1; m < 2 * n; m *= 2) {
|
||||
if (test(&dist[i], &mode[j], n, m))
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmd__mergesort(int argc, const char **argv)
|
||||
{
|
||||
int i;
|
||||
const char *sep;
|
||||
|
||||
if (argc == 6 && !strcmp(argv[1], "generate"))
|
||||
return generate(argc - 2, argv + 2);
|
||||
if (argc == 2 && !strcmp(argv[1], "sort"))
|
||||
return sort_stdin();
|
||||
if (argc > 1 && !strcmp(argv[1], "test"))
|
||||
return run_tests(argc - 2, argv + 2);
|
||||
fprintf(stderr, "usage: test-tool mergesort generate <distribution> <mode> <n> <m>\n");
|
||||
fprintf(stderr, " or: test-tool mergesort sort\n");
|
||||
fprintf(stderr, " or: test-tool mergesort test [<n>...]\n");
|
||||
fprintf(stderr, "\n");
|
||||
for (i = 0, sep = "distributions: "; i < ARRAY_SIZE(dist); i++, sep = ", ")
|
||||
fprintf(stderr, "%s%s", sep, dist[i].name);
|
||||
fprintf(stderr, "\n");
|
||||
for (i = 0, sep = "modes: "; i < ARRAY_SIZE(mode); i++, sep = ", ")
|
||||
fprintf(stderr, "%s%s", sep, mode[i].name);
|
||||
fprintf(stderr, "\n");
|
||||
return 129;
|
||||
}
|
||||
|
@ -11,16 +11,42 @@ test_expect_success 'setup' '
|
||||
git cat-file --batch >unsorted
|
||||
'
|
||||
|
||||
test_perf 'sort(1)' '
|
||||
sort <unsorted >expect
|
||||
test_perf 'sort(1) unsorted' '
|
||||
sort <unsorted >sorted
|
||||
'
|
||||
|
||||
test_perf 'string_list_sort()' '
|
||||
test-tool string-list sort <unsorted >actual
|
||||
test_expect_success 'reverse' '
|
||||
sort -r <unsorted >reversed
|
||||
'
|
||||
|
||||
test_expect_success 'string_list_sort() sorts like sort(1)' '
|
||||
test_cmp_bin expect actual
|
||||
'
|
||||
for file in sorted reversed
|
||||
do
|
||||
test_perf "sort(1) $file" "
|
||||
sort <$file >actual
|
||||
"
|
||||
done
|
||||
|
||||
for file in unsorted sorted reversed
|
||||
do
|
||||
|
||||
test_perf "string_list_sort() $file" "
|
||||
test-tool string-list sort <$file >actual
|
||||
"
|
||||
|
||||
test_expect_success "string_list_sort() $file sorts like sort(1)" "
|
||||
test_cmp_bin sorted actual
|
||||
"
|
||||
done
|
||||
|
||||
for file in unsorted sorted reversed
|
||||
do
|
||||
test_perf "llist_mergesort() $file" "
|
||||
test-tool mergesort sort <$file >actual
|
||||
"
|
||||
|
||||
test_expect_success "llist_mergesort() $file sorts like sort(1)" "
|
||||
test_cmp_bin sorted actual
|
||||
"
|
||||
done
|
||||
|
||||
test_done
|
||||
|
11
t/t0071-sort.sh
Executable file
11
t/t0071-sort.sh
Executable file
@ -0,0 +1,11 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='verify sort functions'
|
||||
|
||||
. ./test-lib.sh
|
||||
|
||||
test_expect_success 'llist_mergesort()' '
|
||||
test-tool mergesort test
|
||||
'
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue
Block a user