Merge branch 'jc/autogc' into js/rebase-i

* jc/autogc:
  git-gc --auto: run "repack -A -d -l" as necessary.
  git-gc --auto: restructure the way "repack" command line is built.
  git-gc --auto: protect ourselves from accumulated cruft
  git-gc --auto: add documentation.
  git-gc --auto: move threshold check to need_to_gc() function.
  repack -A -d: use --keep-unreachable when repacking
  pack-objects --keep-unreachable
  Export matches_pack_name() and fix its return value
  Invoke "git gc --auto" from commit, merge, am and rebase.
  Implement git gc --auto

Conflicts:

	builtin-pack-objects.c
This commit is contained in:
Junio C Hamano 2007-09-26 00:42:12 -07:00
commit 61ab92df40
11 changed files with 276 additions and 14 deletions

View File

@ -439,6 +439,19 @@ gc.aggressiveWindow::
algorithm used by 'git gc --aggressive'. This defaults
to 10.
gc.auto::
When there are approximately more than this many loose
objects in the repository, `git gc --auto` will pack them.
Some Porcelain commands use this command to perform a
light-weight garbage collection from time to time. Setting
this to 0 disables it.
gc.autopacklimit::
When there are more than this many packs that are not
marked with `*.keep` file in the repository, `git gc
--auto` consolidates them into one larger pack. Setting
this to 0 disables this.
gc.packrefs::
`git gc` does not run `git pack-refs` in a bare repository by
default so that older dumb-transport clients can still fetch

View File

@ -8,7 +8,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository
SYNOPSIS
--------
'git-gc' [--prune] [--aggressive]
'git-gc' [--prune] [--aggressive] [--auto]
DESCRIPTION
-----------
@ -43,6 +43,20 @@ OPTIONS
persistent, so this option only needs to be used occasionally; every
few hundred changesets or so.
--auto::
With this option, `git gc` checks if there are too many
loose objects in the repository and runs
gitlink:git-repack[1] with `-d -l` option to pack them.
The threshold for loose objects is set with `gc.auto` configuration
variable, and can be disabled by setting it to 0. Some
Porcelain commands use this after they perform operation
that could create many loose objects automatically.
Additionally, when there are too many packs are present,
they are consolidated into one larger pack by running
the `git-repack` command with `-A` option. The
threshold for number of packs is set with
`gc.autopacklimit` configuration variable.
Configuration
-------------

View File

@ -20,6 +20,8 @@ static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]";
static int pack_refs = 1;
static int aggressive_window = -1;
static int gc_auto_threshold = 6700;
static int gc_auto_pack_limit = 20;
#define MAX_ADD 10
static const char *argv_pack_refs[] = {"pack-refs", "--all", "--prune", NULL};
@ -41,6 +43,14 @@ static int gc_config(const char *var, const char *value)
aggressive_window = git_config_int(var, value);
return 0;
}
if (!strcmp(var, "gc.auto")) {
gc_auto_threshold = git_config_int(var, value);
return 0;
}
if (!strcmp(var, "gc.autopacklimit")) {
gc_auto_pack_limit = git_config_int(var, value);
return 0;
}
return git_default_config(var, value);
}
@ -57,10 +67,113 @@ static void append_option(const char **cmd, const char *opt, int max_length)
cmd[i] = NULL;
}
static int too_many_loose_objects(void)
{
/*
* Quickly check if a "gc" is needed, by estimating how
* many loose objects there are. Because SHA-1 is evenly
* distributed, we can check only one and get a reasonable
* estimate.
*/
char path[PATH_MAX];
const char *objdir = get_object_directory();
DIR *dir;
struct dirent *ent;
int auto_threshold;
int num_loose = 0;
int needed = 0;
if (gc_auto_threshold <= 0)
return 0;
if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) {
warning("insanely long object directory %.*s", 50, objdir);
return 0;
}
dir = opendir(path);
if (!dir)
return 0;
auto_threshold = (gc_auto_threshold + 255) / 256;
while ((ent = readdir(dir)) != NULL) {
if (strspn(ent->d_name, "0123456789abcdef") != 38 ||
ent->d_name[38] != '\0')
continue;
if (++num_loose > auto_threshold) {
needed = 1;
break;
}
}
closedir(dir);
return needed;
}
static int too_many_packs(void)
{
struct packed_git *p;
int cnt;
if (gc_auto_pack_limit <= 0)
return 0;
prepare_packed_git();
for (cnt = 0, p = packed_git; p; p = p->next) {
char path[PATH_MAX];
size_t len;
int keep;
if (!p->pack_local)
continue;
len = strlen(p->pack_name);
if (PATH_MAX <= len + 1)
continue; /* oops, give up */
memcpy(path, p->pack_name, len-5);
memcpy(path + len - 5, ".keep", 6);
keep = access(p->pack_name, F_OK) && (errno == ENOENT);
if (keep)
continue;
/*
* Perhaps check the size of the pack and count only
* very small ones here?
*/
cnt++;
}
return gc_auto_pack_limit <= cnt;
}
static int need_to_gc(void)
{
int ac = 0;
/*
* Setting gc.auto and gc.autopacklimit to 0 or negative can
* disable the automatic gc.
*/
if (gc_auto_threshold <= 0 && gc_auto_pack_limit <= 0)
return 0;
/*
* If there are too many loose objects, but not too many
* packs, we run "repack -d -l". If there are too many packs,
* we run "repack -A -d -l". Otherwise we tell the caller
* there is no need.
*/
argv_repack[ac++] = "repack";
if (too_many_packs())
argv_repack[ac++] = "-A";
else if (!too_many_loose_objects())
return 0;
argv_repack[ac++] = "-d";
argv_repack[ac++] = "-l";
argv_repack[ac++] = NULL;
return 1;
}
int cmd_gc(int argc, const char **argv, const char *prefix)
{
int i;
int prune = 0;
int auto_gc = 0;
char buf[80];
git_config(gc_config);
@ -82,12 +195,24 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
}
continue;
}
/* perhaps other parameters later... */
if (!strcmp(arg, "--auto")) {
auto_gc = 1;
continue;
}
break;
}
if (i != argc)
usage(builtin_gc_usage);
if (auto_gc) {
/*
* Auto-gc should be least intrusive as possible.
*/
prune = 0;
if (!need_to_gc())
return 0;
}
if (pack_refs && run_command_v_opt(argv_pack_refs, RUN_GIT_CMD))
return error(FAILED_RUN, argv_pack_refs[0]);
@ -103,5 +228,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
if (run_command_v_opt(argv_rerere, RUN_GIT_CMD))
return error(FAILED_RUN, argv_rerere[0]);
if (auto_gc && too_many_loose_objects())
warning("There are too many unreachable loose objects; "
"run 'git prune' to remove them.");
return 0;
}

View File

@ -25,7 +25,7 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\
[--window=N] [--window-memory=N] [--depth=N] \n\
[--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
[--threads=N] [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
[--stdout | base-name] [<ref-list | <object-list]";
[--stdout | base-name] [--keep-unreachable] [<ref-list | <object-list]";
struct object_entry {
struct pack_idx_entry idx;
@ -61,7 +61,7 @@ static struct object_entry **written_list;
static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
static int non_empty;
static int no_reuse_delta, no_reuse_object;
static int no_reuse_delta, no_reuse_object, keep_unreachable;
static int local;
static int incremental;
static int allow_ofs_delta;
@ -1807,15 +1807,19 @@ static void read_object_list_from_stdin(void)
}
}
#define OBJECT_ADDED (1u<<20)
static void show_commit(struct commit *commit)
{
add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0);
commit->object.flags |= OBJECT_ADDED;
}
static void show_object(struct object_array_entry *p)
{
add_preferred_base_object(p->name);
add_object_entry(p->item->sha1, p->item->type, p->name, 0);
p->item->flags |= OBJECT_ADDED;
}
static void show_edge(struct commit *commit)
@ -1823,6 +1827,86 @@ static void show_edge(struct commit *commit)
add_preferred_base(commit->object.sha1);
}
struct in_pack_object {
off_t offset;
struct object *object;
};
struct in_pack {
int alloc;
int nr;
struct in_pack_object *array;
};
static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
{
in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->sha1, p);
in_pack->array[in_pack->nr].object = object;
in_pack->nr++;
}
/*
* Compare the objects in the offset order, in order to emulate the
* "git-rev-list --objects" output that produced the pack originally.
*/
static int ofscmp(const void *a_, const void *b_)
{
struct in_pack_object *a = (struct in_pack_object *)a_;
struct in_pack_object *b = (struct in_pack_object *)b_;
if (a->offset < b->offset)
return -1;
else if (a->offset > b->offset)
return 1;
else
return hashcmp(a->object->sha1, b->object->sha1);
}
static void add_objects_in_unpacked_packs(struct rev_info *revs)
{
struct packed_git *p;
struct in_pack in_pack;
uint32_t i;
memset(&in_pack, 0, sizeof(in_pack));
for (p = packed_git; p; p = p->next) {
const unsigned char *sha1;
struct object *o;
for (i = 0; i < revs->num_ignore_packed; i++) {
if (matches_pack_name(p, revs->ignore_packed[i]))
break;
}
if (revs->num_ignore_packed <= i)
continue;
if (open_pack_index(p))
die("cannot open pack index");
ALLOC_GROW(in_pack.array,
in_pack.nr + p->num_objects,
in_pack.alloc);
for (i = 0; i < p->num_objects; i++) {
sha1 = nth_packed_object_sha1(p, i);
o = lookup_unknown_object(sha1);
if (!(o->flags & OBJECT_ADDED))
mark_in_pack_object(o, p, &in_pack);
o->flags |= OBJECT_ADDED;
}
}
if (in_pack.nr) {
qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),
ofscmp);
for (i = 0; i < in_pack.nr; i++) {
struct object *o = in_pack.array[i].object;
add_object_entry(o->sha1, o->type, "", 0);
}
}
free(in_pack.array);
}
static void get_object_list(int ac, const char **av)
{
struct rev_info revs;
@ -1854,6 +1938,9 @@ static void get_object_list(int ac, const char **av)
prepare_revision_walk(&revs);
mark_edges_uninteresting(revs.commits, &revs, show_edge);
traverse_commit_list(&revs, show_commit, show_object);
if (keep_unreachable)
add_objects_in_unpacked_packs(&revs);
}
static int adjust_perm(const char *path, mode_t mode)
@ -1983,6 +2070,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
use_internal_rev_list = 1;
continue;
}
if (!strcmp("--keep-unreachable", arg)) {
keep_unreachable = 1;
continue;
}
if (!strcmp("--unpacked", arg) ||
!prefixcmp(arg, "--unpacked=") ||
!strcmp("--reflog", arg) ||

View File

@ -530,6 +530,7 @@ extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsign
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
extern int matches_pack_name(struct packed_git *p, const char *name);
/* Dumb servers support */
extern int update_server_info(int);

View File

@ -464,6 +464,8 @@ do
"$GIT_DIR"/hooks/post-applypatch
fi
git gc --auto
go_next
done

View File

@ -611,6 +611,7 @@ git rerere
if test "$ret" = 0
then
git gc --auto
if test -x "$GIT_DIR"/hooks/post-commit
then
"$GIT_DIR"/hooks/post-commit

View File

@ -82,6 +82,7 @@ finish () {
;;
*)
git update-ref -m "$rlogm" HEAD "$1" "$head" || exit 1
git gc --auto
;;
esac
;;

View File

@ -326,6 +326,8 @@ do_next () {
rm -rf "$DOTEST" &&
warn "Successfully rebased and updated $HEADNAME."
git gc --auto
exit
}

View File

@ -3,17 +3,19 @@
# Copyright (c) 2005 Linus Torvalds
#
USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]'
USAGE='[-a|-A] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]'
SUBDIRECTORY_OK='Yes'
. git-sh-setup
no_update_info= all_into_one= remove_redundant=
no_update_info= all_into_one= remove_redundant= keep_unreachable=
local= quiet= no_reuse= extra=
while test $# != 0
do
case "$1" in
-n) no_update_info=t ;;
-a) all_into_one=t ;;
-A) all_into_one=t
keep_unreachable=--keep-unreachable ;;
-d) remove_redundant=t ;;
-q) quiet=-q ;;
-f) no_reuse=--no-reuse-object ;;
@ -59,7 +61,13 @@ case ",$all_into_one," in
fi
done
fi
[ -z "$args" ] && args='--unpacked --incremental'
if test -z "$args"
then
args='--unpacked --incremental'
elif test -n "$keep_unreachable"
then
args="$args $keep_unreachable"
fi
;;
esac

View File

@ -1684,22 +1684,22 @@ off_t find_pack_entry_one(const unsigned char *sha1,
return 0;
}
static int matches_pack_name(struct packed_git *p, const char *ig)
int matches_pack_name(struct packed_git *p, const char *name)
{
const char *last_c, *c;
if (!strcmp(p->pack_name, ig))
return 0;
if (!strcmp(p->pack_name, name))
return 1;
for (c = p->pack_name, last_c = c; *c;)
if (*c == '/')
last_c = ++c;
else
++c;
if (!strcmp(last_c, ig))
return 0;
if (!strcmp(last_c, name))
return 1;
return 1;
return 0;
}
static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed)
@ -1717,7 +1717,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons
if (ignore_packed) {
const char **ig;
for (ig = ignore_packed; *ig; ig++)
if (!matches_pack_name(p, *ig))
if (matches_pack_name(p, *ig))
break;
if (*ig)
goto next;