From 6a7bcb54714c55234a292047ea6bb657bd5ab1b5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 13 Jun 2016 00:33:54 -0400 Subject: [PATCH 1/3] repack: document --unpack-unreachable option This was added back in 7e52f56 (gc: do not explode objects which will be immediately pruned, 2012-04-07), but not documented at the time, since it was an internal detail between git-gc and git-repack. However, as people with complicated setups may want to effectively reimplement the steps of git-gc themselves, it is nice for us to document these interfaces. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/git-repack.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index b9c02ce481..cde7b4441d 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -128,6 +128,12 @@ other objects in that pack they already have locally. with `-b` or `repack.writeBitmaps`, as it ensures that the bitmapped packfile has the necessary objects. +--unpack-unreachable=:: + When loosening unreachable objects, do not bother loosening any + objects older than ``. This can be used to optimize out + the write of any objects that would be immediately pruned by + a follow-up `git prune`. + Configuration ------------- From 905f27b86ac1f50c6870a064c3b5b9d82c97a145 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 13 Jun 2016 00:36:28 -0400 Subject: [PATCH 2/3] repack: add --keep-unreachable option The usual way to do a full repack (and what is done by git-gc) is to run "repack -Ad --unpack-unreachable=", which will loosen any unreachable objects newer than "", and drop any older ones. This is a safer alternative to "repack -ad", because "" becomes a grace period during which we will not drop any new objects that are about to be referenced. However, it isn't perfectly safe. It's always possible that a process is about to reference an old object. Even if that process were to take care to update the timestamp on the object, there is no atomicity with a simultaneously running "repack" process. So while unlikely, there is a small race wherein we may drop an object that is in the process of being referenced. If you do automated repacking on a large number of active repositories, you may hit it eventually, and the result is a corrupted repository. It would be nice to fix that race in the long run, but it's complicated. In the meantime, there is a much simpler strategy for automated repository maintenance: do not drop objects at all. We already have a "--keep-unreachable" option in pack-objects; we just need to plumb it through from git-repack. Note that this _isn't_ plumbed through from git-gc, so at this point it's strictly a tool for people doing their own advanced repository maintenance strategy. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/git-repack.txt | 6 ++++++ builtin/repack.c | 9 +++++++++ t/t7701-repack-unpack-unreachable.sh | 15 +++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index cde7b4441d..68702eab19 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -134,6 +134,12 @@ other objects in that pack they already have locally. the write of any objects that would be immediately pruned by a follow-up `git prune`. +-k:: +--keep-unreachable:: + When used with `-ad`, any unreachable objects from existing + packs will be appended to the end of the packfile instead of + being removed. + Configuration ------------- diff --git a/builtin/repack.c b/builtin/repack.c index 858db38f52..573e66c1b4 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -146,6 +146,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) int pack_everything = 0; int delete_redundant = 0; const char *unpack_unreachable = NULL; + int keep_unreachable = 0; const char *window = NULL, *window_memory = NULL; const char *depth = NULL; const char *max_pack_size = NULL; @@ -175,6 +176,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) N_("write bitmap index")), OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, N_("approxidate"), N_("with -A, do not loosen objects older than this")), + OPT_BOOL('k', "keep-unreachable", &keep_unreachable, + N_("with -a, repack unreachable objects")), OPT_STRING(0, "window", &window, N_("n"), N_("size of the window used for delta compression")), OPT_STRING(0, "window-memory", &window_memory, N_("bytes"), @@ -196,6 +199,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (delete_redundant && repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); + if (keep_unreachable && + (unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE))) + die(_("--keep-unreachable and -A are incompatible")); + if (pack_kept_objects < 0) pack_kept_objects = write_bitmaps; @@ -239,6 +246,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) } else if (pack_everything & LOOSEN_UNREACHABLE) { argv_array_push(&cmd.args, "--unpack-unreachable"); + } else if (keep_unreachable) { + argv_array_push(&cmd.args, "--keep-unreachable"); } else { argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1"); } diff --git a/t/t7701-repack-unpack-unreachable.sh b/t/t7701-repack-unpack-unreachable.sh index b66e383866..f13df43299 100755 --- a/t/t7701-repack-unpack-unreachable.sh +++ b/t/t7701-repack-unpack-unreachable.sh @@ -122,4 +122,19 @@ test_expect_success 'keep packed objects found only in index' ' git cat-file blob :file ' +test_expect_success 'repack -k keeps unreachable packed objects' ' + # create packed-but-unreachable object + sha1=$(echo unreachable-packed | git hash-object -w --stdin) && + pack=$(echo $sha1 | git pack-objects .git/objects/pack/pack) && + git prune-packed && + + # -k should keep it + git repack -adk && + git cat-file -p $sha1 && + + # and double check that without -k it would have been removed + git repack -ad && + test_must_fail git cat-file -p $sha1 +' + test_done From e26a8c4721ceaf4c59e33bbd4e60f777b7ea9b62 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 13 Jun 2016 00:38:04 -0400 Subject: [PATCH 3/3] repack: extend --keep-unreachable to loose objects If you use "repack -adk" currently, we will pack all objects that are already packed into the new pack, and then drop the old packs. However, loose unreachable objects will be left as-is. In theory these are meant to expire eventually with "git prune". But if you are using "repack -k", you probably want to keep things forever and therefore do not run "git prune" at all. Meaning those loose objects may build up over time and end up fooling any object-count heuristics (such as the one done by "gc --auto", though since git-gc does not support "repack -k", this really applies to whatever custom scripts people might have driving "repack -k"). With this patch, we instead stuff any loose unreachable objects into the pack along with the already-packed unreachable objects. This may seem wasteful, but it is really no more so than using "repack -k" in the first place. We are at a slight disadvantage, in that we have no useful ordering for the result, or names to hand to the delta code. However, this is again no worse than what "repack -k" is already doing for the packed objects. The packing of these objects doesn't matter much because they should not be accessed frequently (unless they actually _do_ become referenced, but then they would get moved to a different part of the packfile during the next repack). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/git-repack.txt | 3 ++- builtin/pack-objects.c | 31 ++++++++++++++++++++++++++++ builtin/repack.c | 1 + t/t7701-repack-unpack-unreachable.sh | 13 ++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index 68702eab19..b58b6b5972 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -138,7 +138,8 @@ other objects in that pack they already have locally. --keep-unreachable:: When used with `-ad`, any unreachable objects from existing packs will be appended to the end of the packfile instead of - being removed. + being removed. In addition, any unreachable loose objects will + be packed (and their loose counterparts removed). Configuration ------------- diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8f5e358e22..a2f8cfdec0 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -44,6 +44,7 @@ static int non_empty; static int reuse_delta = 1, reuse_object = 1; static int keep_unreachable, unpack_unreachable, include_tag; static unsigned long unpack_unreachable_expiration; +static int pack_loose_unreachable; static int local; static int incremental; static int ignore_packed_keep; @@ -2378,6 +2379,32 @@ static void add_objects_in_unpacked_packs(struct rev_info *revs) free(in_pack.array); } +static int add_loose_object(const unsigned char *sha1, const char *path, + void *data) +{ + enum object_type type = sha1_object_info(sha1, NULL); + + if (type < 0) { + warning("loose object at %s could not be examined", path); + return 0; + } + + add_object_entry(sha1, type, "", 0); + return 0; +} + +/* + * We actually don't even have to worry about reachability here. + * add_object_entry will weed out duplicates, so we just add every + * loose object we find. + */ +static void add_unreachable_loose_objects(void) +{ + for_each_loose_file_in_objdir(get_object_directory(), + add_loose_object, + NULL, NULL, NULL); +} + static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) { static struct packed_git *last_found = (void *)1; @@ -2547,6 +2574,8 @@ static void get_object_list(int ac, const char **av) if (keep_unreachable) add_objects_in_unpacked_packs(&revs); + if (pack_loose_unreachable) + add_unreachable_loose_objects(); if (unpack_unreachable) loosen_unused_packed_objects(&revs); @@ -2647,6 +2676,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) N_("include tag objects that refer to objects to be packed")), OPT_BOOL(0, "keep-unreachable", &keep_unreachable, N_("keep unreachable objects")), + OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable, + N_("pack loose unreachable objects")), { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"), N_("unpack unreachable objects newer than