2006-08-02 23:52:00 +02:00
|
|
|
#include "builtin.h"
|
2005-07-03 23:27:34 +02:00
|
|
|
#include "cache.h"
|
2007-10-19 06:08:37 +02:00
|
|
|
#include "progress.h"
|
2005-07-03 23:27:34 +02:00
|
|
|
|
2005-08-20 06:38:36 +02:00
|
|
|
static const char prune_packed_usage[] =
|
2007-01-18 17:11:13 +01:00
|
|
|
"git-prune-packed [-n] [-q]";
|
2005-08-20 06:38:36 +02:00
|
|
|
|
2007-01-13 00:00:13 +01:00
|
|
|
#define DRY_RUN 01
|
|
|
|
#define VERBOSE 02
|
|
|
|
|
2007-10-30 19:57:32 +01:00
|
|
|
static struct progress *progress;
|
2007-10-19 06:08:37 +02:00
|
|
|
|
2007-01-13 00:00:13 +01:00
|
|
|
static void prune_dir(int i, DIR *dir, char *pathname, int len, int opts)
|
2005-07-03 23:27:34 +02:00
|
|
|
{
|
|
|
|
struct dirent *de;
|
|
|
|
char hex[40];
|
|
|
|
|
|
|
|
sprintf(hex, "%02x", i);
|
|
|
|
while ((de = readdir(dir)) != NULL) {
|
|
|
|
unsigned char sha1[20];
|
|
|
|
if (strlen(de->d_name) != 38)
|
|
|
|
continue;
|
|
|
|
memcpy(hex+2, de->d_name, 38);
|
|
|
|
if (get_sha1_hex(hex, sha1))
|
|
|
|
continue;
|
2006-09-06 11:12:09 +02:00
|
|
|
if (!has_sha1_pack(sha1, NULL))
|
2005-07-03 23:27:34 +02:00
|
|
|
continue;
|
|
|
|
memcpy(pathname + len, de->d_name, 38);
|
2007-01-13 00:00:13 +01:00
|
|
|
if (opts & DRY_RUN)
|
2005-08-20 06:38:36 +02:00
|
|
|
printf("rm -f %s\n", pathname);
|
|
|
|
else if (unlink(pathname) < 0)
|
2005-07-03 23:27:34 +02:00
|
|
|
error("unable to unlink %s", pathname);
|
2007-11-01 21:59:55 +01:00
|
|
|
display_progress(progress, i + 1);
|
2005-07-03 23:27:34 +02:00
|
|
|
}
|
Create object subdirectories on demand
This makes it possible to have a "sparse" git object subdirectory
structure, something that has become much more attractive now that people
use pack-files all the time.
As a result of pack-files, a git object directory doesn't necessarily have
any individual objects lying around, and in that case it's just wasting
space to keep the empty first-level object directories around: on many
filesystems the 256 empty directories will be aboue 1MB of diskspace.
Even more importantly, after you re-pack a project that _used_ to be
unpacked, you could be left with huge directories that no longer contain
anything, but that waste space and take time to look through.
With this change, "git prune-packed" can just do an rmdir() on the
directories, and they'll get removed if empty, and re-created on demand.
This patch also tries to fix up "write_sha1_from_fd()" to use the new
common infrastructure for creating the object files, closing a hole where
we might otherwise leave half-written objects in the object database.
[jc: I unoptimized the part that really removes the fan-out directories
to ease transition. init-db still wastes 1MB of diskspace to hold 256
empty fan-outs, and prune-packed rmdir()'s the grown but empty directories,
but runs mkdir() immediately after that -- reducing the saving from 150KB
to 146KB. These parts will be re-introduced when everybody has the
on-demand capability.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
|
|
|
pathname[len] = 0;
|
2005-10-09 11:30:17 +02:00
|
|
|
rmdir(pathname);
|
2005-07-03 23:27:34 +02:00
|
|
|
}
|
|
|
|
|
2007-01-13 00:00:13 +01:00
|
|
|
void prune_packed_objects(int opts)
|
2005-07-03 23:27:34 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
static char pathname[PATH_MAX];
|
|
|
|
const char *dir = get_object_directory();
|
|
|
|
int len = strlen(dir);
|
|
|
|
|
2007-10-19 06:08:37 +02:00
|
|
|
if (opts == VERBOSE)
|
2007-10-30 19:57:32 +01:00
|
|
|
progress = start_progress_delay("Removing duplicate objects",
|
2007-10-19 06:08:37 +02:00
|
|
|
256, 95, 2);
|
|
|
|
|
2005-07-03 23:27:34 +02:00
|
|
|
if (len > PATH_MAX - 42)
|
|
|
|
die("impossible object directory");
|
|
|
|
memcpy(pathname, dir, len);
|
|
|
|
if (len && pathname[len-1] != '/')
|
|
|
|
pathname[len++] = '/';
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
|
|
DIR *d;
|
|
|
|
|
|
|
|
sprintf(pathname + len, "%02x/", i);
|
|
|
|
d = opendir(pathname);
|
|
|
|
if (!d)
|
Create object subdirectories on demand
This makes it possible to have a "sparse" git object subdirectory
structure, something that has become much more attractive now that people
use pack-files all the time.
As a result of pack-files, a git object directory doesn't necessarily have
any individual objects lying around, and in that case it's just wasting
space to keep the empty first-level object directories around: on many
filesystems the 256 empty directories will be aboue 1MB of diskspace.
Even more importantly, after you re-pack a project that _used_ to be
unpacked, you could be left with huge directories that no longer contain
anything, but that waste space and take time to look through.
With this change, "git prune-packed" can just do an rmdir() on the
directories, and they'll get removed if empty, and re-created on demand.
This patch also tries to fix up "write_sha1_from_fd()" to use the new
common infrastructure for creating the object files, closing a hole where
we might otherwise leave half-written objects in the object database.
[jc: I unoptimized the part that really removes the fan-out directories
to ease transition. init-db still wastes 1MB of diskspace to hold 256
empty fan-outs, and prune-packed rmdir()'s the grown but empty directories,
but runs mkdir() immediately after that -- reducing the saving from 150KB
to 146KB. These parts will be re-introduced when everybody has the
on-demand capability.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
|
|
|
continue;
|
2007-01-13 00:00:13 +01:00
|
|
|
prune_dir(i, d, pathname, len + 3, opts);
|
2005-07-03 23:27:34 +02:00
|
|
|
closedir(d);
|
|
|
|
}
|
2007-10-30 19:57:33 +01:00
|
|
|
stop_progress(&progress);
|
2005-07-03 23:27:34 +02:00
|
|
|
}
|
|
|
|
|
2006-08-02 23:52:00 +02:00
|
|
|
int cmd_prune_packed(int argc, const char **argv, const char *prefix)
|
2005-07-03 23:27:34 +02:00
|
|
|
{
|
|
|
|
int i;
|
2007-01-13 00:00:13 +01:00
|
|
|
int opts = VERBOSE;
|
2005-07-03 23:27:34 +02:00
|
|
|
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
|
|
|
|
if (*arg == '-') {
|
2005-08-20 06:38:36 +02:00
|
|
|
if (!strcmp(arg, "-n"))
|
2007-01-13 00:00:13 +01:00
|
|
|
opts |= DRY_RUN;
|
|
|
|
else if (!strcmp(arg, "-q"))
|
|
|
|
opts &= ~VERBOSE;
|
2005-08-20 06:38:36 +02:00
|
|
|
else
|
|
|
|
usage(prune_packed_usage);
|
|
|
|
continue;
|
2005-07-03 23:27:34 +02:00
|
|
|
}
|
|
|
|
/* Handle arguments here .. */
|
|
|
|
usage(prune_packed_usage);
|
|
|
|
}
|
Be marginally more careful about removing objects
The git philosophy when it comes to disk accesses is "Laugh in the face of
danger".
Notably, since we never modify an existing object, we don't really care
that deeply about flushing things to disk, since even if the machine
crashes in the middle of a git operation, you can never really have lost
any old work. At most, you'd need to figure out the proper heads (which
git-fsck-objects can do for you) and re-do the operation.
However, there's two exceptions to this: pruning and repacking. Those
operations will actually _delete_ old objects that they know about in
other ways (ie that they just repacked, or that they have found in other
places).
However, since they actually modify old state, we should thus be a bit
more careful about them. If the machine crashes and the duplicate new
objects haven't been flushed to disk, you can actually be in trouble.
This is trivially stupid about it by calling "sync" before removing the
objects. Not very smart, but we're talking about special operations than
are usually done once a week if that.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-28 18:45:53 +02:00
|
|
|
sync();
|
2007-01-13 00:00:13 +01:00
|
|
|
prune_packed_objects(opts);
|
2005-07-03 23:27:34 +02:00
|
|
|
return 0;
|
|
|
|
}
|