Merge branch 'jk/prune-optim'

"git prune" has been taught to take advantage of reachability
bitmap when able.

* jk/prune-optim:
  t5304: rename "sha1" variables to "oid"
  prune: check SEEN flag for reachability
  prune: use bitmaps for reachability traversal
  prune: lazily perform reachability traversal
This commit is contained in:
Junio C Hamano 2019-03-07 09:59:56 +09:00
commit f7213a3d33
4 changed files with 129 additions and 20 deletions

View File

@ -31,16 +31,39 @@ static int prune_tmp_file(const char *fullpath)
return 0;
}
static void perform_reachability_traversal(struct rev_info *revs)
{
static int initialized;
struct progress *progress = NULL;
if (initialized)
return;
if (show_progress)
progress = start_delayed_progress(_("Checking connectivity"), 0);
mark_reachable_objects(revs, 1, expire, progress);
stop_progress(&progress);
initialized = 1;
}
static int is_object_reachable(const struct object_id *oid,
struct rev_info *revs)
{
struct object *obj;
perform_reachability_traversal(revs);
obj = lookup_object(the_repository, oid->hash);
return obj && (obj->flags & SEEN);
}
static int prune_object(const struct object_id *oid, const char *fullpath,
void *data)
{
struct rev_info *revs = data;
struct stat st;
/*
* Do we know about this object?
* It must have been reachable
*/
if (lookup_object(the_repository, oid->hash))
if (is_object_reachable(oid, revs))
return 0;
if (lstat(fullpath, &st)) {
@ -102,7 +125,6 @@ static void remove_temporary_files(const char *path)
int cmd_prune(int argc, const char **argv, const char *prefix)
{
struct rev_info revs;
struct progress *progress = NULL;
int exclude_promisor_objects = 0;
const struct option options[] = {
OPT__DRY_RUN(&show_only, N_("do not remove, show only")),
@ -142,17 +164,13 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
if (show_progress == -1)
show_progress = isatty(2);
if (show_progress)
progress = start_delayed_progress(_("Checking connectivity"), 0);
if (exclude_promisor_objects) {
fetch_if_missing = 0;
revs.exclude_promisor_objects = 1;
}
mark_reachable_objects(&revs, 1, expire, progress);
stop_progress(&progress);
for_each_loose_file_in_objdir(get_object_directory(), prune_object,
prune_cruft, prune_subdir, NULL);
prune_cruft, prune_subdir, &revs);
prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0);
remove_temporary_files(get_object_directory());
@ -160,8 +178,10 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
remove_temporary_files(s);
free(s);
if (is_repository_shallow(the_repository))
if (is_repository_shallow(the_repository)) {
perform_reachability_traversal(&revs);
prune_shallow(show_only ? PRUNE_SHOW_ONLY : 0);
}
return 0;
}

View File

@ -12,6 +12,7 @@
#include "packfile.h"
#include "worktree.h"
#include "object-store.h"
#include "pack-bitmap.h"
struct connectivity_progress {
struct progress *progress;
@ -158,10 +159,44 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
FOR_EACH_OBJECT_LOCAL_ONLY);
}
static void *lookup_object_by_type(struct repository *r,
const struct object_id *oid,
enum object_type type)
{
switch (type) {
case OBJ_COMMIT:
return lookup_commit(r, oid);
case OBJ_TREE:
return lookup_tree(r, oid);
case OBJ_TAG:
return lookup_tag(r, oid);
case OBJ_BLOB:
return lookup_blob(r, oid);
default:
die("BUG: unknown object type %d", type);
}
}
static int mark_object_seen(const struct object_id *oid,
enum object_type type,
int exclude,
uint32_t name_hash,
struct packed_git *found_pack,
off_t found_offset)
{
struct object *obj = lookup_object_by_type(the_repository, oid, type);
if (!obj)
die("unable to create object '%s'", oid_to_hex(oid));
obj->flags |= SEEN;
return 0;
}
void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
timestamp_t mark_recent, struct progress *progress)
{
struct connectivity_progress cp;
struct bitmap_index *bitmap_git;
/*
* Set up revision parsing, and mark us as being interested
@ -188,6 +223,13 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
cp.progress = progress;
cp.count = 0;
bitmap_git = prepare_bitmap_walk(revs);
if (bitmap_git) {
traverse_bitmap_commit_list(bitmap_git, mark_object_seen);
free_bitmap_index(bitmap_git);
return;
}
/*
* Set up the revision walk - this will move all commits
* from the pending list to the commit walking list.

35
t/perf/p5304-prune.sh Executable file
View File

@ -0,0 +1,35 @@
#!/bin/sh
test_description='performance tests of prune'
. ./perf-lib.sh
test_perf_default_repo
test_expect_success 'remove reachable loose objects' '
git repack -ad
'
test_expect_success 'remove unreachable loose objects' '
git prune
'
test_expect_success 'confirm there are no loose objects' '
git count-objects | grep ^0
'
test_perf 'prune with no objects' '
git prune
'
test_expect_success 'repack with bitmaps' '
git repack -adb
'
# We have to create the object in each trial run, since otherwise
# runs after the first see no object and just skip the traversal entirely!
test_perf 'prune with bitmaps' '
echo "probably not present in repo" | git hash-object -w --stdin &&
git prune
'
test_done

View File

@ -118,10 +118,10 @@ test_expect_success 'prune: do not prune detached HEAD with no reflog' '
test_expect_success 'prune: prune former HEAD after checking out branch' '
head_sha1=$(git rev-parse HEAD) &&
head_oid=$(git rev-parse HEAD) &&
git checkout --quiet master &&
git prune -v >prune_actual &&
grep "$head_sha1" prune_actual
grep "$head_oid" prune_actual
'
@ -265,15 +265,27 @@ EOF
'
test_expect_success 'prune .git/shallow' '
SHA1=$(echo hi|git commit-tree HEAD^{tree}) &&
echo $SHA1 >.git/shallow &&
oid=$(echo hi|git commit-tree HEAD^{tree}) &&
echo $oid >.git/shallow &&
git prune --dry-run >out &&
grep $SHA1 .git/shallow &&
grep $SHA1 out &&
grep $oid .git/shallow &&
grep $oid out &&
git prune &&
test_path_is_missing .git/shallow
'
test_expect_success 'prune .git/shallow when there are no loose objects' '
oid=$(echo hi|git commit-tree HEAD^{tree}) &&
echo $oid >.git/shallow &&
git update-ref refs/heads/shallow-tip $oid &&
git repack -ad &&
# verify assumption that all loose objects are gone
git count-objects | grep ^0 &&
git prune &&
echo $oid >expect &&
test_cmp expect .git/shallow
'
test_expect_success 'prune: handle alternate object database' '
test_create_repo A &&
git -C A commit --allow-empty -m "initial commit" &&
@ -314,8 +326,8 @@ test_expect_success 'prune: handle HEAD reflog in multiple worktrees' '
git reset --hard HEAD^
) &&
git prune --expire=now &&
SHA1=`git hash-object expected` &&
git -C third-worktree show "$SHA1" >actual &&
oid=`git hash-object expected` &&
git -C third-worktree show "$oid" >actual &&
test_cmp expected actual
'