Try using Geert similarity code in pack-objects.
It appears the fingerprinting itself is too expensive to be worth doing for this purpose. A failed experiment. Signed-off-by: Junio C Hamano <junkio@cox.net>
This commit is contained in:
parent
9a305b67f8
commit
ca9de6cadf
2
Makefile
2
Makefile
@ -204,7 +204,7 @@ DIFF_OBJS = \
|
|||||||
diffcore-delta.o log-tree.o
|
diffcore-delta.o log-tree.o
|
||||||
|
|
||||||
LIB_OBJS = \
|
LIB_OBJS = \
|
||||||
blob.o commit.o connect.o csum-file.o \
|
blob.o commit.o connect.o csum-file.o gsimm.o rabinpoly.o \
|
||||||
date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \
|
date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \
|
||||||
object.o pack-check.o patch-delta.o path.o pkt-line.o \
|
object.o pack-check.o patch-delta.o path.o pkt-line.o \
|
||||||
quote.o read-cache.o refs.o run-command.o \
|
quote.o read-cache.o refs.o run-command.o \
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
#include "pack.h"
|
#include "pack.h"
|
||||||
#include "csum-file.h"
|
#include "csum-file.h"
|
||||||
#include "tree-walk.h"
|
#include "tree-walk.h"
|
||||||
|
#include "rabinpoly.h"
|
||||||
|
#include "gsimm.h"
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
|
||||||
@ -993,6 +995,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
|
|||||||
|
|
||||||
struct unpacked {
|
struct unpacked {
|
||||||
struct object_entry *entry;
|
struct object_entry *entry;
|
||||||
|
unsigned char fingerprint[MD_LENGTH];
|
||||||
void *data;
|
void *data;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1041,6 +1044,9 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
|
|||||||
if (old_entry->depth >= max_depth)
|
if (old_entry->depth >= max_depth)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (gb_simm_score(cur->fingerprint, old->fingerprint) < 0.4)
|
||||||
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NOTE!
|
* NOTE!
|
||||||
*
|
*
|
||||||
@ -1077,6 +1083,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
|
|||||||
unsigned processed = 0;
|
unsigned processed = 0;
|
||||||
unsigned last_percent = 999;
|
unsigned last_percent = 999;
|
||||||
|
|
||||||
|
rabin_reset ();
|
||||||
memset(array, 0, array_size);
|
memset(array, 0, array_size);
|
||||||
i = nr_objects;
|
i = nr_objects;
|
||||||
idx = 0;
|
idx = 0;
|
||||||
@ -1115,6 +1122,8 @@ static void find_deltas(struct object_entry **list, int window, int depth)
|
|||||||
if (size != entry->size)
|
if (size != entry->size)
|
||||||
die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
|
die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
|
||||||
|
|
||||||
|
gb_simm_process(n->data, size, n->fingerprint);
|
||||||
|
|
||||||
j = window;
|
j = window;
|
||||||
while (--j > 0) {
|
while (--j > 0) {
|
||||||
unsigned int other_idx = idx + j;
|
unsigned int other_idx = idx + j;
|
||||||
@ -1124,6 +1133,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
|
|||||||
m = array + other_idx;
|
m = array + other_idx;
|
||||||
if (!m->entry)
|
if (!m->entry)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (try_delta(n, m, depth) < 0)
|
if (try_delta(n, m, depth) < 0)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user