Merge branch 'jk/rev-list-disk-usage'
"git rev-list" command learned "--disk-usage" option. * jk/rev-list-disk-usage: docs/rev-list: add some examples of --disk-usage docs/rev-list: add an examples section rev-list: add --disk-usage option for calculating disk usage t: add --no-tag option to test_commit
This commit is contained in:
commit
6fe12b5215
@ -31,6 +31,99 @@ include::rev-list-options.txt[]
|
||||
|
||||
include::pretty-formats.txt[]
|
||||
|
||||
EXAMPLES
|
||||
--------
|
||||
|
||||
* Print the list of commits reachable from the current branch.
|
||||
+
|
||||
----------
|
||||
git rev-list HEAD
|
||||
----------
|
||||
|
||||
* Print the list of commits on this branch, but not present in the
|
||||
upstream branch.
|
||||
+
|
||||
----------
|
||||
git rev-list @{upstream}..HEAD
|
||||
----------
|
||||
|
||||
* Format commits with their author and commit message (see also the
|
||||
porcelain linkgit:git-log[1]).
|
||||
+
|
||||
----------
|
||||
git rev-list --format=medium HEAD
|
||||
----------
|
||||
|
||||
* Format commits along with their diffs (see also the porcelain
|
||||
linkgit:git-log[1], which can do this in a single process).
|
||||
+
|
||||
----------
|
||||
git rev-list HEAD |
|
||||
git diff-tree --stdin --format=medium -p
|
||||
----------
|
||||
|
||||
* Print the list of commits on the current branch that touched any
|
||||
file in the `Documentation` directory.
|
||||
+
|
||||
----------
|
||||
git rev-list HEAD -- Documentation/
|
||||
----------
|
||||
|
||||
* Print the list of commits authored by you in the past year, on
|
||||
any branch, tag, or other ref.
|
||||
+
|
||||
----------
|
||||
git rev-list --author=you@example.com --since=1.year.ago --all
|
||||
----------
|
||||
|
||||
* Print the list of objects reachable from the current branch (i.e., all
|
||||
commits and the blobs and trees they contain).
|
||||
+
|
||||
----------
|
||||
git rev-list --objects HEAD
|
||||
----------
|
||||
|
||||
* Compare the disk size of all reachable objects, versus those
|
||||
reachable from reflogs, versus the total packed size. This can tell
|
||||
you whether running `git repack -ad` might reduce the repository size
|
||||
(by dropping unreachable objects), and whether expiring reflogs might
|
||||
help.
|
||||
+
|
||||
----------
|
||||
# reachable objects
|
||||
git rev-list --disk-usage --objects --all
|
||||
# plus reflogs
|
||||
git rev-list --disk-usage --objects --all --reflog
|
||||
# total disk size used
|
||||
du -c .git/objects/pack/*.pack .git/objects/??/*
|
||||
# alternative to du: add up "size" and "size-pack" fields
|
||||
git count-objects -v
|
||||
----------
|
||||
|
||||
* Report the disk size of each branch, not including objects used by the
|
||||
current branch. This can find outliers that are contributing to a
|
||||
bloated repository size (e.g., because somebody accidentally committed
|
||||
large build artifacts).
|
||||
+
|
||||
----------
|
||||
git for-each-ref --format='%(refname)' |
|
||||
while read branch
|
||||
do
|
||||
size=$(git rev-list --disk-usage --objects HEAD..$branch)
|
||||
echo "$size $branch"
|
||||
done |
|
||||
sort -n
|
||||
----------
|
||||
|
||||
* Compare the on-disk size of branches in one group of refs, excluding
|
||||
another. If you co-mingle objects from multiple remotes in a single
|
||||
repository, this can show which remotes are contributing to the
|
||||
repository size (taking the size of `origin` as a baseline).
|
||||
+
|
||||
----------
|
||||
git rev-list --disk-usage --objects --remotes=$suspect --not --remotes=origin
|
||||
----------
|
||||
|
||||
GIT
|
||||
---
|
||||
Part of the linkgit:git[1] suite
|
||||
|
@ -227,6 +227,15 @@ ifdef::git-rev-list[]
|
||||
test the exit status to see if a range of objects is fully
|
||||
connected (or not). It is faster than redirecting stdout
|
||||
to `/dev/null` as the output does not have to be formatted.
|
||||
|
||||
--disk-usage::
|
||||
Suppress normal output; instead, print the sum of the bytes used
|
||||
for on-disk storage by the selected commits or objects. This is
|
||||
equivalent to piping the output into `git cat-file
|
||||
--batch-check='%(objectsize:disk)'`, except that it runs much
|
||||
faster (especially with `--use-bitmap-index`). See the `CAVEATS`
|
||||
section in linkgit:git-cat-file[1] for the limitations of what
|
||||
"on-disk storage" means.
|
||||
endif::git-rev-list[]
|
||||
|
||||
--cherry-mark::
|
||||
|
@ -80,6 +80,19 @@ static int arg_show_object_names = 1;
|
||||
|
||||
#define DEFAULT_OIDSET_SIZE (16*1024)
|
||||
|
||||
static int show_disk_usage;
|
||||
static off_t total_disk_usage;
|
||||
|
||||
static off_t get_object_disk_usage(struct object *obj)
|
||||
{
|
||||
off_t size;
|
||||
struct object_info oi = OBJECT_INFO_INIT;
|
||||
oi.disk_sizep = &size;
|
||||
if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
|
||||
die(_("unable to get disk usage of %s"), oid_to_hex(&obj->oid));
|
||||
return size;
|
||||
}
|
||||
|
||||
static void finish_commit(struct commit *commit);
|
||||
static void show_commit(struct commit *commit, void *data)
|
||||
{
|
||||
@ -88,6 +101,9 @@ static void show_commit(struct commit *commit, void *data)
|
||||
|
||||
display_progress(progress, ++progress_counter);
|
||||
|
||||
if (show_disk_usage)
|
||||
total_disk_usage += get_object_disk_usage(&commit->object);
|
||||
|
||||
if (info->flags & REV_LIST_QUIET) {
|
||||
finish_commit(commit);
|
||||
return;
|
||||
@ -258,6 +274,8 @@ static void show_object(struct object *obj, const char *name, void *cb_data)
|
||||
if (finish_object(obj, name, cb_data))
|
||||
return;
|
||||
display_progress(progress, ++progress_counter);
|
||||
if (show_disk_usage)
|
||||
total_disk_usage += get_object_disk_usage(obj);
|
||||
if (info->flags & REV_LIST_QUIET)
|
||||
return;
|
||||
|
||||
@ -452,6 +470,23 @@ static int try_bitmap_traversal(struct rev_info *revs,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int try_bitmap_disk_usage(struct rev_info *revs,
|
||||
struct list_objects_filter_options *filter)
|
||||
{
|
||||
struct bitmap_index *bitmap_git;
|
||||
|
||||
if (!show_disk_usage)
|
||||
return -1;
|
||||
|
||||
bitmap_git = prepare_bitmap_walk(revs, filter);
|
||||
if (!bitmap_git)
|
||||
return -1;
|
||||
|
||||
printf("%"PRIuMAX"\n",
|
||||
(uintmax_t)get_disk_usage_from_bitmap(bitmap_git, revs));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmd_rev_list(int argc, const char **argv, const char *prefix)
|
||||
{
|
||||
struct rev_info revs;
|
||||
@ -584,6 +619,12 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(arg, "--disk-usage")) {
|
||||
show_disk_usage = 1;
|
||||
info.flags |= REV_LIST_QUIET;
|
||||
continue;
|
||||
}
|
||||
|
||||
usage(rev_list_usage);
|
||||
|
||||
}
|
||||
@ -626,6 +667,8 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
|
||||
if (use_bitmap_index) {
|
||||
if (!try_bitmap_count(&revs, &filter_options))
|
||||
return 0;
|
||||
if (!try_bitmap_disk_usage(&revs, &filter_options))
|
||||
return 0;
|
||||
if (!try_bitmap_traversal(&revs, &filter_options))
|
||||
return 0;
|
||||
}
|
||||
@ -690,5 +733,8 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
|
||||
printf("%d\n", revs.count_left + revs.count_right);
|
||||
}
|
||||
|
||||
if (show_disk_usage)
|
||||
printf("%"PRIuMAX"\n", (uintmax_t)total_disk_usage);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1430,3 +1430,84 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git,
|
||||
return bitmap_git &&
|
||||
bitmap_walk_contains(bitmap_git, bitmap_git->haves, oid);
|
||||
}
|
||||
|
||||
static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
|
||||
enum object_type object_type)
|
||||
{
|
||||
struct bitmap *result = bitmap_git->result;
|
||||
struct packed_git *pack = bitmap_git->pack;
|
||||
off_t total = 0;
|
||||
struct ewah_iterator it;
|
||||
eword_t filter;
|
||||
size_t i;
|
||||
|
||||
init_type_iterator(&it, bitmap_git, object_type);
|
||||
for (i = 0; i < result->word_alloc &&
|
||||
ewah_iterator_next(&filter, &it); i++) {
|
||||
eword_t word = result->words[i] & filter;
|
||||
size_t base = (i * BITS_IN_EWORD);
|
||||
unsigned offset;
|
||||
|
||||
if (!word)
|
||||
continue;
|
||||
|
||||
for (offset = 0; offset < BITS_IN_EWORD; offset++) {
|
||||
size_t pos;
|
||||
|
||||
if ((word >> offset) == 0)
|
||||
break;
|
||||
|
||||
offset += ewah_bit_ctz64(word >> offset);
|
||||
pos = base + offset;
|
||||
total += pack_pos_to_offset(pack, pos + 1) -
|
||||
pack_pos_to_offset(pack, pos);
|
||||
}
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
|
||||
{
|
||||
struct bitmap *result = bitmap_git->result;
|
||||
struct packed_git *pack = bitmap_git->pack;
|
||||
struct eindex *eindex = &bitmap_git->ext_index;
|
||||
off_t total = 0;
|
||||
struct object_info oi = OBJECT_INFO_INIT;
|
||||
off_t object_size;
|
||||
size_t i;
|
||||
|
||||
oi.disk_sizep = &object_size;
|
||||
|
||||
for (i = 0; i < eindex->count; i++) {
|
||||
struct object *obj = eindex->objects[i];
|
||||
|
||||
if (!bitmap_get(result, pack->num_objects + i))
|
||||
continue;
|
||||
|
||||
if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
|
||||
die(_("unable to get disk usage of %s"),
|
||||
oid_to_hex(&obj->oid));
|
||||
|
||||
total += object_size;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git,
|
||||
struct rev_info *revs)
|
||||
{
|
||||
off_t total = 0;
|
||||
|
||||
total += get_disk_usage_for_type(bitmap_git, OBJ_COMMIT);
|
||||
if (revs->tree_objects)
|
||||
total += get_disk_usage_for_type(bitmap_git, OBJ_TREE);
|
||||
if (revs->blob_objects)
|
||||
total += get_disk_usage_for_type(bitmap_git, OBJ_BLOB);
|
||||
if (revs->tag_objects)
|
||||
total += get_disk_usage_for_type(bitmap_git, OBJ_TAG);
|
||||
|
||||
total += get_disk_usage_for_extended(bitmap_git);
|
||||
|
||||
return total;
|
||||
}
|
||||
|
@ -68,6 +68,8 @@ int bitmap_walk_contains(struct bitmap_index *,
|
||||
*/
|
||||
int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_id *oid);
|
||||
|
||||
off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *);
|
||||
|
||||
void bitmap_writer_show_progress(int show);
|
||||
void bitmap_writer_set_checksum(unsigned char *sha1);
|
||||
void bitmap_writer_build_type_index(struct packing_data *to_pack,
|
||||
|
@ -31,13 +31,8 @@ test_expect_success '"git log :/a -- " should not be ambiguous' '
|
||||
test_expect_success '"git log :/detached -- " should find a commit only in HEAD' '
|
||||
test_when_finished "git checkout main" &&
|
||||
git checkout --detach &&
|
||||
# Must manually call `test_tick` instead of using `test_commit`,
|
||||
# because the latter additionally creates a tag, which would make
|
||||
# the commit reachable not only via HEAD.
|
||||
test_tick &&
|
||||
git commit --allow-empty -m detached &&
|
||||
test_tick &&
|
||||
git commit --allow-empty -m something-else &&
|
||||
test_commit --no-tag detached &&
|
||||
test_commit --no-tag something-else &&
|
||||
git log :/detached --
|
||||
'
|
||||
|
||||
|
51
t/t6115-rev-list-du.sh
Executable file
51
t/t6115-rev-list-du.sh
Executable file
@ -0,0 +1,51 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='basic tests of rev-list --disk-usage'
|
||||
. ./test-lib.sh
|
||||
|
||||
# we want a mix of reachable and unreachable, as well as
|
||||
# objects in the bitmapped pack and some outside of it
|
||||
test_expect_success 'set up repository' '
|
||||
test_commit --no-tag one &&
|
||||
test_commit --no-tag two &&
|
||||
git repack -adb &&
|
||||
git reset --hard HEAD^ &&
|
||||
test_commit --no-tag three &&
|
||||
test_commit --no-tag four &&
|
||||
git reset --hard HEAD^
|
||||
'
|
||||
|
||||
# We don't want to hardcode sizes, because they depend on the exact details of
|
||||
# packing, zlib, etc. We'll assume that the regular rev-list and cat-file
|
||||
# machinery works and compare the --disk-usage output to that.
|
||||
disk_usage_slow () {
|
||||
git rev-list --no-object-names "$@" |
|
||||
git cat-file --batch-check="%(objectsize:disk)" |
|
||||
perl -lne '$total += $_; END { print $total}'
|
||||
}
|
||||
|
||||
# check behavior with given rev-list options; note that
|
||||
# whitespace is not preserved in args
|
||||
check_du () {
|
||||
args=$*
|
||||
|
||||
test_expect_success "generate expected size ($args)" "
|
||||
disk_usage_slow $args >expect
|
||||
"
|
||||
|
||||
test_expect_success "rev-list --disk-usage without bitmaps ($args)" "
|
||||
git rev-list --disk-usage $args >actual &&
|
||||
test_cmp expect actual
|
||||
"
|
||||
|
||||
test_expect_success "rev-list --disk-usage with bitmaps ($args)" "
|
||||
git rev-list --disk-usage --use-bitmap-index $args >actual &&
|
||||
test_cmp expect actual
|
||||
"
|
||||
}
|
||||
|
||||
check_du HEAD
|
||||
check_du --objects HEAD
|
||||
check_du --objects HEAD^..HEAD
|
||||
|
||||
test_done
|
@ -190,6 +190,7 @@ test_commit () {
|
||||
author= &&
|
||||
signoff= &&
|
||||
indir= &&
|
||||
no_tag= &&
|
||||
while test $# != 0
|
||||
do
|
||||
case "$1" in
|
||||
@ -216,6 +217,9 @@ test_commit () {
|
||||
indir="$2"
|
||||
shift
|
||||
;;
|
||||
--no-tag)
|
||||
no_tag=yes
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
@ -238,7 +242,10 @@ test_commit () {
|
||||
git ${indir:+ -C "$indir"} commit \
|
||||
${author:+ --author "$author"} \
|
||||
$signoff -m "$1" &&
|
||||
git ${indir:+ -C "$indir"} tag "${4:-$1}"
|
||||
if test -z "$no_tag"
|
||||
then
|
||||
git ${indir:+ -C "$indir"} tag "${4:-$1}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Call test_merge with the arguments "<message> <commit>", where <commit>
|
||||
|
Loading…
Reference in New Issue
Block a user