p0006-read-tree-checkout: perf test to time read-tree
Created t/perf/repos/many-files.sh to generate large, but artificial repositories. Created t/perf/inflate-repo.sh to alter an EXISTING repo to have a set of large commits. This can be used to create a branch with 1M+ files in repositories like git.git or linux.git, but with more realistic content. It does this by making multiple copies of the entire worktree in a series of sub-directories. The branch name and ballast structure created by both scripts match, so either script can be used to generate very large test repositories for the following perf test. Created t/perf/p0006-read-tree-checkout.sh to measure performance on various read-tree, checkout, and update-index operations. This test can run using either normal repos or ones from the above scripts. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
a6db3fbb6e
commit
350d870143
67
t/perf/p0006-read-tree-checkout.sh
Executable file
67
t/perf/p0006-read-tree-checkout.sh
Executable file
@ -0,0 +1,67 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# This test measures the performance of various read-tree
|
||||
# and checkout operations. It is primarily interested in
|
||||
# the algorithmic costs of index operations and recursive
|
||||
# tree traversal -- and NOT disk I/O on thousands of files.
|
||||
|
||||
test_description="Tests performance of read-tree"
|
||||
|
||||
. ./perf-lib.sh
|
||||
|
||||
test_perf_default_repo
|
||||
|
||||
# If the test repo was generated by ./repos/many-files.sh
|
||||
# then we know something about the data shape and branches,
|
||||
# so we can isolate testing to the ballast-related commits
|
||||
# and setup sparse-checkout so we don't have to populate
|
||||
# the ballast files and directories.
|
||||
#
|
||||
# Otherwise, we make some general assumptions about the
|
||||
# repo and consider the entire history of the current
|
||||
# branch to be the ballast.
|
||||
|
||||
test_expect_success "setup repo" '
|
||||
if git rev-parse --verify refs/heads/p0006-ballast^{commit}
|
||||
then
|
||||
echo Assuming synthetic repo from many-files.sh
|
||||
git branch br_base master
|
||||
git branch br_ballast p0006-ballast^
|
||||
git branch br_ballast_alias p0006-ballast^
|
||||
git branch br_ballast_plus_1 p0006-ballast
|
||||
git config --local core.sparsecheckout 1
|
||||
cat >.git/info/sparse-checkout <<-EOF
|
||||
/*
|
||||
!ballast/*
|
||||
EOF
|
||||
else
|
||||
echo Assuming non-synthetic repo...
|
||||
git branch br_base $(git rev-list HEAD | tail -n 1)
|
||||
git branch br_ballast HEAD^ || error "no ancestor commit from current head"
|
||||
git branch br_ballast_alias HEAD^
|
||||
git branch br_ballast_plus_1 HEAD
|
||||
fi &&
|
||||
git checkout -q br_ballast &&
|
||||
nr_files=$(git ls-files | wc -l)
|
||||
'
|
||||
|
||||
test_perf "read-tree br_base br_ballast ($nr_files)" '
|
||||
git read-tree -m br_base br_ballast -n
|
||||
'
|
||||
|
||||
test_perf "switch between br_base br_ballast ($nr_files)" '
|
||||
git checkout -q br_base &&
|
||||
git checkout -q br_ballast
|
||||
'
|
||||
|
||||
test_perf "switch between br_ballast br_ballast_plus_1 ($nr_files)" '
|
||||
git checkout -q br_ballast_plus_1 &&
|
||||
git checkout -q br_ballast
|
||||
'
|
||||
|
||||
test_perf "switch between aliases ($nr_files)" '
|
||||
git checkout -q br_ballast_alias &&
|
||||
git checkout -q br_ballast
|
||||
'
|
||||
|
||||
test_done
|
1
t/perf/repos/.gitignore
vendored
Normal file
1
t/perf/repos/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
gen-*/
|
85
t/perf/repos/inflate-repo.sh
Executable file
85
t/perf/repos/inflate-repo.sh
Executable file
@ -0,0 +1,85 @@
|
||||
#!/bin/sh
|
||||
# Inflate the size of an EXISTING repo.
|
||||
#
|
||||
# This script should be run inside the worktree of a TEST repo.
|
||||
# It will use the contents of the current HEAD to generate a
|
||||
# commit containing copies of the current worktree such that the
|
||||
# total size of the commit has at least <target_size> files.
|
||||
#
|
||||
# Usage: [-t target_size] [-b branch_name]
|
||||
|
||||
set -e
|
||||
|
||||
target_size=10000
|
||||
branch_name=p0006-ballast
|
||||
ballast=ballast
|
||||
|
||||
while test "$#" -ne 0
|
||||
do
|
||||
case "$1" in
|
||||
-b)
|
||||
shift;
|
||||
test "$#" -ne 0 || { echo 'error: -b requires an argument' >&2; exit 1; }
|
||||
branch_name=$1;
|
||||
shift ;;
|
||||
-t)
|
||||
shift;
|
||||
test "$#" -ne 0 || { echo 'error: -t requires an argument' >&2; exit 1; }
|
||||
target_size=$1;
|
||||
shift ;;
|
||||
*)
|
||||
echo "error: unknown option '$1'" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
git ls-tree -r HEAD >GEN_src_list
|
||||
nr_src_files=$(cat GEN_src_list | wc -l)
|
||||
|
||||
src_branch=$(git symbolic-ref --short HEAD)
|
||||
|
||||
echo "Branch $src_branch initially has $nr_src_files files."
|
||||
|
||||
if test $target_size -le $nr_src_files
|
||||
then
|
||||
echo "Repository already exceeds target size $target_size."
|
||||
rm GEN_src_list
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create well-known branch and add 1 file change to start
|
||||
# if off before the ballast.
|
||||
git checkout -b $branch_name HEAD
|
||||
echo "$target_size" > inflate-repo.params
|
||||
git add inflate-repo.params
|
||||
git commit -q -m params
|
||||
|
||||
# Create ballast for in our branch.
|
||||
copy=1
|
||||
nr_files=$nr_src_files
|
||||
while test $nr_files -lt $target_size
|
||||
do
|
||||
sed -e "s| | $ballast/$copy/|" <GEN_src_list |
|
||||
git update-index --index-info
|
||||
|
||||
nr_files=$(expr $nr_files + $nr_src_files)
|
||||
copy=$(expr $copy + 1)
|
||||
done
|
||||
rm GEN_src_list
|
||||
git commit -q -m "ballast"
|
||||
|
||||
# Modify 1 file and commit.
|
||||
echo "$target_size" >> inflate-repo.params
|
||||
git add inflate-repo.params
|
||||
git commit -q -m "ballast plus 1"
|
||||
|
||||
nr_files=$(git ls-files | wc -l)
|
||||
|
||||
# Checkout master to put repo in canonical state (because
|
||||
# the perf test may need to clone and enable sparse-checkout
|
||||
# before attempting to checkout a commit with the ballast
|
||||
# (because it may contain 100K directories and 1M files)).
|
||||
git checkout $src_branch
|
||||
|
||||
echo "Repository inflated. Branch $branch_name has $nr_files files."
|
||||
|
||||
exit 0
|
110
t/perf/repos/many-files.sh
Executable file
110
t/perf/repos/many-files.sh
Executable file
@ -0,0 +1,110 @@
|
||||
#!/bin/sh
|
||||
# Generate test data repository using the given parameters.
|
||||
# When omitted, we create "gen-many-files-d-w-f.git".
|
||||
#
|
||||
# Usage: [-r repo] [-d depth] [-w width] [-f files]
|
||||
#
|
||||
# -r repo: path to the new repo to be generated
|
||||
# -d depth: the depth of sub-directories
|
||||
# -w width: the number of sub-directories at each level
|
||||
# -f files: the number of files created in each directory
|
||||
#
|
||||
# Note that all files will have the same SHA-1 and each
|
||||
# directory at a level will have the same SHA-1, so we
|
||||
# will potentially have a large index, but not a large
|
||||
# ODB.
|
||||
#
|
||||
# Ballast will be created under "ballast/".
|
||||
|
||||
EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
|
||||
|
||||
set -e
|
||||
|
||||
# (5, 10, 9) will create 999,999 ballast files.
|
||||
# (4, 10, 9) will create 99,999 ballast files.
|
||||
depth=5
|
||||
width=10
|
||||
files=9
|
||||
|
||||
while test "$#" -ne 0
|
||||
do
|
||||
case "$1" in
|
||||
-r)
|
||||
shift;
|
||||
test "$#" -ne 0 || { echo 'error: -r requires an argument' >&2; exit 1; }
|
||||
repo=$1;
|
||||
shift ;;
|
||||
-d)
|
||||
shift;
|
||||
test "$#" -ne 0 || { echo 'error: -d requires an argument' >&2; exit 1; }
|
||||
depth=$1;
|
||||
shift ;;
|
||||
-w)
|
||||
shift;
|
||||
test "$#" -ne 0 || { echo 'error: -w requires an argument' >&2; exit 1; }
|
||||
width=$1;
|
||||
shift ;;
|
||||
-f)
|
||||
shift;
|
||||
test "$#" -ne 0 || { echo 'error: -f requires an argument' >&2; exit 1; }
|
||||
files=$1;
|
||||
shift ;;
|
||||
*)
|
||||
echo "error: unknown option '$1'" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Inflate the index with thousands of empty files.
|
||||
# usage: dir depth width files
|
||||
fill_index() {
|
||||
awk -v arg_dir=$1 -v arg_depth=$2 -v arg_width=$3 -v arg_files=$4 '
|
||||
function make_paths(dir, depth, width, files, f, w) {
|
||||
for (f = 1; f <= files; f++) {
|
||||
print dir "/file" f
|
||||
}
|
||||
if (depth > 0) {
|
||||
for (w = 1; w <= width; w++) {
|
||||
make_paths(dir "/dir" w, depth - 1, width, files)
|
||||
}
|
||||
}
|
||||
}
|
||||
END { make_paths(arg_dir, arg_depth, arg_width, arg_files) }
|
||||
' </dev/null |
|
||||
sed "s/^/100644 $EMPTY_BLOB /" |
|
||||
git update-index --index-info
|
||||
return 0
|
||||
}
|
||||
|
||||
[ -z "$repo" ] && repo=gen-many-files-$depth.$width.$files.git
|
||||
|
||||
mkdir $repo
|
||||
cd $repo
|
||||
git init .
|
||||
|
||||
# Create an initial commit just to define master.
|
||||
touch many-files.empty
|
||||
echo "$depth $width $files" >many-files.params
|
||||
git add many-files.*
|
||||
git commit -q -m params
|
||||
|
||||
# Create ballast for p0006 based upon the given params and
|
||||
# inflate the index with thousands of empty files and commit.
|
||||
git checkout -b p0006-ballast
|
||||
fill_index "ballast" $depth $width $files
|
||||
git commit -q -m "ballast"
|
||||
|
||||
nr_files=$(git ls-files | wc -l)
|
||||
|
||||
# Modify 1 file and commit.
|
||||
echo "$depth $width $files" >>many-files.params
|
||||
git add many-files.params
|
||||
git commit -q -m "ballast plus 1"
|
||||
|
||||
# Checkout master to put repo in canonical state (because
|
||||
# the perf test may need to clone and enable sparse-checkout
|
||||
# before attempting to checkout a commit with the ballast
|
||||
# (because it may contain 100K directories and 1M files)).
|
||||
git checkout master
|
||||
|
||||
echo "Repository "$repo" ($depth, $width, $files) created. Ballast $nr_files."
|
||||
exit 0
|
Loading…
Reference in New Issue
Block a user