git-commit-vandalism/git-repack.sh

146 lines
3.4 KiB
Bash
Raw Normal View History

#!/bin/sh
#
# Copyright (c) 2005 Linus Torvalds
#
OPTIONS_KEEPDASHDASH=
OPTIONS_SPEC="\
git-repack [options]
--
a pack everything in a single pack
let pack-objects do the writing of unreachable objects as loose objects Commit ccc1297226b184c40459e9d373cc9eebfb7bd898 changed the behavior of 'git repack -A' so unreachable objects are stored as loose objects. However it did so in a naive and inn efficient way by making packs about to be deleted inaccessible and feeding their content through 'git unpack-objects'. While this works, there are major flaws with this approach: - It is unacceptably sloooooooooooooow. In the Linux kernel repository with no actual unreachable objects, doing 'git repack -A -d' before: real 2m33.220s user 2m21.675s sys 0m3.510s And with this change: real 0m36.849s user 0m24.365s sys 0m1.950s For reference, here's the timing for 'git repack -a -d': real 0m35.816s user 0m22.571s sys 0m2.011s This is explained by the fact that 'git unpack-objects' was used to unpack _every_ objects even if (almost) 100% of them were thrown away. - There is a black out period. Between the removal of the .idx file for the redundant pack and the completion of its unpacking, the unreachable objects become completely unaccessible. This is not a big issue as we're talking about unreachable objects, but some consistency is always good. - There is no way to easily set a sensible mtime for the newly created unreachable loose objects. So, while having a command called "pack-objects" to perform object unpacking looks really odd, this is probably the best compromize to be able to solve the above issues in an efficient way. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-05-14 07:33:53 +02:00
A same as -a, and turn unreachable objects loose
d remove redundant packs, and run git-prune-packed
f pass --no-reuse-delta to git-pack-objects
n do not run git-update-server-info
q,quiet be quiet
l pass --local to git-pack-objects
Packing constraints
window= size of the window used for delta compression
window-memory= same as the above, but limit memory size instead of entries count
depth= limits the maximum delta depth
max-pack-size= maximum size of each packfile
"
SUBDIRECTORY_OK='Yes'
. git-sh-setup
let pack-objects do the writing of unreachable objects as loose objects Commit ccc1297226b184c40459e9d373cc9eebfb7bd898 changed the behavior of 'git repack -A' so unreachable objects are stored as loose objects. However it did so in a naive and inn efficient way by making packs about to be deleted inaccessible and feeding their content through 'git unpack-objects'. While this works, there are major flaws with this approach: - It is unacceptably sloooooooooooooow. In the Linux kernel repository with no actual unreachable objects, doing 'git repack -A -d' before: real 2m33.220s user 2m21.675s sys 0m3.510s And with this change: real 0m36.849s user 0m24.365s sys 0m1.950s For reference, here's the timing for 'git repack -a -d': real 0m35.816s user 0m22.571s sys 0m2.011s This is explained by the fact that 'git unpack-objects' was used to unpack _every_ objects even if (almost) 100% of them were thrown away. - There is a black out period. Between the removal of the .idx file for the redundant pack and the completion of its unpacking, the unreachable objects become completely unaccessible. This is not a big issue as we're talking about unreachable objects, but some consistency is always good. - There is no way to easily set a sensible mtime for the newly created unreachable loose objects. So, while having a command called "pack-objects" to perform object unpacking looks really odd, this is probably the best compromize to be able to solve the above issues in an efficient way. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-05-14 07:33:53 +02:00
no_update_info= all_into_one= remove_redundant= unpack_unreachable=
local= quiet= no_reuse= extra=
while test $# != 0
do
case "$1" in
-n) no_update_info=t ;;
-a) all_into_one=t ;;
-A) all_into_one=t
let pack-objects do the writing of unreachable objects as loose objects Commit ccc1297226b184c40459e9d373cc9eebfb7bd898 changed the behavior of 'git repack -A' so unreachable objects are stored as loose objects. However it did so in a naive and inn efficient way by making packs about to be deleted inaccessible and feeding their content through 'git unpack-objects'. While this works, there are major flaws with this approach: - It is unacceptably sloooooooooooooow. In the Linux kernel repository with no actual unreachable objects, doing 'git repack -A -d' before: real 2m33.220s user 2m21.675s sys 0m3.510s And with this change: real 0m36.849s user 0m24.365s sys 0m1.950s For reference, here's the timing for 'git repack -a -d': real 0m35.816s user 0m22.571s sys 0m2.011s This is explained by the fact that 'git unpack-objects' was used to unpack _every_ objects even if (almost) 100% of them were thrown away. - There is a black out period. Between the removal of the .idx file for the redundant pack and the completion of its unpacking, the unreachable objects become completely unaccessible. This is not a big issue as we're talking about unreachable objects, but some consistency is always good. - There is no way to easily set a sensible mtime for the newly created unreachable loose objects. So, while having a command called "pack-objects" to perform object unpacking looks really odd, this is probably the best compromize to be able to solve the above issues in an efficient way. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-05-14 07:33:53 +02:00
unpack_unreachable=--unpack-unreachable ;;
-d) remove_redundant=t ;;
-q) quiet=-q ;;
-f) no_reuse=--no-reuse-object ;;
-l) local=--local ;;
--max-pack-size|--window|--window-memory|--depth)
extra="$extra $1=$2"; shift ;;
--) shift; break;;
*) usage ;;
esac
shift
done
# Later we will default repack.UseDeltaBaseOffset to true
default_dbo=false
case "`git config --bool repack.usedeltabaseoffset ||
echo $default_dbo`" in
true)
extra="$extra --delta-base-offset" ;;
esac
PACKDIR="$GIT_OBJECT_DIRECTORY/pack"
PACKTMP="$GIT_OBJECT_DIRECTORY/.tmp-$$-pack"
rm -f "$PACKTMP"-*
trap 'rm -f "$PACKTMP"-*' 0 1 2 3 15
# There will be more repacking strategies to come...
case ",$all_into_one," in
,,)
args='--unpacked --incremental'
;;
,t,)
if [ -d "$PACKDIR" ]; then
for e in `cd "$PACKDIR" && find . -type f -name '*.pack' \
| sed -e 's/^\.\///' -e 's/\.pack$//'`
do
if [ -e "$PACKDIR/$e.keep" ]; then
: keep
else
args="$args --unpacked=$e.pack"
existing="$existing $e"
fi
done
fi
if test -z "$args"
then
args='--unpacked --incremental'
let pack-objects do the writing of unreachable objects as loose objects Commit ccc1297226b184c40459e9d373cc9eebfb7bd898 changed the behavior of 'git repack -A' so unreachable objects are stored as loose objects. However it did so in a naive and inn efficient way by making packs about to be deleted inaccessible and feeding their content through 'git unpack-objects'. While this works, there are major flaws with this approach: - It is unacceptably sloooooooooooooow. In the Linux kernel repository with no actual unreachable objects, doing 'git repack -A -d' before: real 2m33.220s user 2m21.675s sys 0m3.510s And with this change: real 0m36.849s user 0m24.365s sys 0m1.950s For reference, here's the timing for 'git repack -a -d': real 0m35.816s user 0m22.571s sys 0m2.011s This is explained by the fact that 'git unpack-objects' was used to unpack _every_ objects even if (almost) 100% of them were thrown away. - There is a black out period. Between the removal of the .idx file for the redundant pack and the completion of its unpacking, the unreachable objects become completely unaccessible. This is not a big issue as we're talking about unreachable objects, but some consistency is always good. - There is no way to easily set a sensible mtime for the newly created unreachable loose objects. So, while having a command called "pack-objects" to perform object unpacking looks really odd, this is probably the best compromize to be able to solve the above issues in an efficient way. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-05-14 07:33:53 +02:00
elif test -n "$unpack_unreachable"
then
let pack-objects do the writing of unreachable objects as loose objects Commit ccc1297226b184c40459e9d373cc9eebfb7bd898 changed the behavior of 'git repack -A' so unreachable objects are stored as loose objects. However it did so in a naive and inn efficient way by making packs about to be deleted inaccessible and feeding their content through 'git unpack-objects'. While this works, there are major flaws with this approach: - It is unacceptably sloooooooooooooow. In the Linux kernel repository with no actual unreachable objects, doing 'git repack -A -d' before: real 2m33.220s user 2m21.675s sys 0m3.510s And with this change: real 0m36.849s user 0m24.365s sys 0m1.950s For reference, here's the timing for 'git repack -a -d': real 0m35.816s user 0m22.571s sys 0m2.011s This is explained by the fact that 'git unpack-objects' was used to unpack _every_ objects even if (almost) 100% of them were thrown away. - There is a black out period. Between the removal of the .idx file for the redundant pack and the completion of its unpacking, the unreachable objects become completely unaccessible. This is not a big issue as we're talking about unreachable objects, but some consistency is always good. - There is no way to easily set a sensible mtime for the newly created unreachable loose objects. So, while having a command called "pack-objects" to perform object unpacking looks really odd, this is probably the best compromize to be able to solve the above issues in an efficient way. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-05-14 07:33:53 +02:00
args="$args $unpack_unreachable"
fi
;;
esac
args="$args $local $quiet $no_reuse$extra"
names=$(git pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
exit 1
if [ -z "$names" ]; then
if test -z "$quiet"; then
echo Nothing new to pack.
fi
fi
for name in $names ; do
fullbases="$fullbases pack-$name"
chmod a-w "$PACKTMP-$name.pack"
chmod a-w "$PACKTMP-$name.idx"
mkdir -p "$PACKDIR" || exit
for sfx in pack idx
do
if test -f "$PACKDIR/pack-$name.$sfx"
then
mv -f "$PACKDIR/pack-$name.$sfx" \
"$PACKDIR/old-pack-$name.$sfx"
fi
done &&
mv -f "$PACKTMP-$name.pack" "$PACKDIR/pack-$name.pack" &&
mv -f "$PACKTMP-$name.idx" "$PACKDIR/pack-$name.idx" &&
test -f "$PACKDIR/pack-$name.pack" &&
test -f "$PACKDIR/pack-$name.idx" || {
echo >&2 "Couldn't replace the existing pack with updated one."
echo >&2 "The original set of packs have been saved as"
echo >&2 "old-pack-$name.{pack,idx} in $PACKDIR."
exit 1
}
rm -f "$PACKDIR/old-pack-$name.pack" "$PACKDIR/old-pack-$name.idx"
done
if test "$remove_redundant" = t
then
# We know $existing are all redundant.
if [ -n "$existing" ]
then
sync
( cd "$PACKDIR" &&
for e in $existing
do
case " $fullbases " in
*" $e "*) ;;
*) rm -f "$e.pack" "$e.idx" "$e.keep" ;;
esac
done
)
fi
git prune-packed $quiet
fi
case "$no_update_info" in
t) : ;;
*) git-update-server-info ;;
esac