From 6f6826c52bb751450e2bfa28f07c817dfa5802d6 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 3 Jun 2007 01:31:28 +0100 Subject: [PATCH 01/11] Add git-filter-branch This script is derived from Pasky's cg-admin-rewritehist. In fact, it _is_ the same script, minimally adapted to work without cogito. It _should_ be able to perform the same tasks, even if only relying on core-git programs. All the work is Pasky's, just the adaption is mine. Signed-off-by: Johannes Schindelin Hopefully-signed-off-by: Petr "cogito master" Baudis Signed-off-by: Junio C Hamano --- Makefile | 3 +- git-filter-branch.sh | 430 +++++++++++++++++++++++++++++++++++++++ t/t7003-filter-branch.sh | 47 +++++ 3 files changed, 479 insertions(+), 1 deletion(-) create mode 100644 git-filter-branch.sh create mode 100755 t/t7003-filter-branch.sh diff --git a/Makefile b/Makefile index a11ff60549..69f3b66714 100644 --- a/Makefile +++ b/Makefile @@ -209,7 +209,8 @@ SCRIPT_SH = \ git-am.sh \ git-merge.sh git-merge-stupid.sh git-merge-octopus.sh \ git-merge-resolve.sh git-merge-ours.sh \ - git-lost-found.sh git-quiltimport.sh git-submodule.sh + git-lost-found.sh git-quiltimport.sh git-submodule.sh \ + git-filter-branch.sh SCRIPT_PERL = \ git-add--interactive.perl \ diff --git a/git-filter-branch.sh b/git-filter-branch.sh new file mode 100644 index 0000000000..0c8a7dfd3e --- /dev/null +++ b/git-filter-branch.sh @@ -0,0 +1,430 @@ +#!/bin/sh +# +# Rewrite revision history +# Copyright (c) Petr Baudis, 2006 +# Minimal changes to "port" it to core-git (c) Johannes Schindelin, 2007 +# +# Lets you rewrite GIT revision history by creating a new branch from +# your current branch by applying custom filters on each revision. +# Those filters can modify each tree (e.g. removing a file or running +# a perl rewrite on all files) or information about each commit. +# Otherwise, all information (including original commit times or merge +# information) will be preserved. +# +# The command takes the new branch name as a mandatory argument and +# the filters as optional arguments. If you specify no filters, the +# commits will be recommitted without any changes, which would normally +# have no effect and result with the new branch pointing to the same +# branch as your current branch. (Nevertheless, this may be useful in +# the future for compensating for some Git bugs or such, therefore +# such a usage is permitted.) +# +# WARNING! The rewritten history will have different ids for all the +# objects and will not converge with the original branch. You will not +# be able to easily push and distribute the rewritten branch. Please do +# not use this command if you do not know the full implications, and +# avoid using it anyway - do not do what a simple single commit on top +# of the current version would fix. +# +# Always verify that the rewritten version is correct before disposing +# the original branch. +# +# Note that since this operation is extensively I/O expensive, it might +# be a good idea to do it off-disk, e.g. on tmpfs. Reportedly the speedup +# is very noticeable. +# +# OPTIONS +# ------- +# -d TEMPDIR:: The path to the temporary tree used for rewriting +# When applying a tree filter, the command needs to temporary +# checkout the tree to some directory, which may consume +# considerable space in case of large projects. By default it +# does this in the '.git-rewrite/' directory but you can override +# that choice by this parameter. +# +# -r STARTREV:: The commit id to start the rewrite at +# Normally, the command will rewrite the entire history. If you +# pass this argument, though, this will be the first commit it +# will rewrite and keep the previous commits intact. +# +# -k KEEPREV:: A commit id until which _not_ to rewrite history +# If you pass this argument, this commit and all of its +# predecessors are kept intact. +# +# Filters +# ~~~~~~~ +# The filters are applied in the order as listed below. The COMMAND +# argument is always evaluated in shell using the 'eval' command. +# The $GIT_COMMIT environment variable is permanently set to contain +# the id of the commit being rewritten. The author/committer environment +# variables are set before the first filter is run. +# +# A 'map' function is available that takes an "original sha1 id" argument +# and outputs a "rewritten sha1 id" if the commit has been already +# rewritten, fails otherwise; the 'map' function can return several +# ids on separate lines if your commit filter emitted multiple commits +# (see below). +# +# --env-filter COMMAND:: The filter for modifying environment +# This is the filter for modifying the environment in which +# the commit will be performed. Specifically, you might want +# to rewrite the author/committer name/email/time environment +# variables (see `git-commit` for details). Do not forget to +# re-export the variables. +# +# --tree-filter COMMAND:: The filter for rewriting tree (and its contents) +# This is the filter for rewriting the tree and its contents. +# The COMMAND argument is evaluated in shell with the working +# directory set to the root of the checked out tree. The new tree +# is then used as-is (new files are auto-added, disappeared files +# are auto-removed - .gitignore files nor any other ignore rules +# HAVE NO EFFECT!). +# +# --index-filter COMMAND:: The filter for rewriting index +# This is the filter for rewriting the Git's directory index. +# It is similar to the tree filter but does not check out the +# tree, which makes it much faster. However, you must use the +# lowlevel Git index manipulation commands to do your work. +# +# --parent-filter COMMAND:: The filter for rewriting parents +# This is the filter for rewriting the commit's parent list. +# It will receive the parent string on stdin and shall output +# the new parent string on stdout. The parent string is in +# format accepted by `git-commit-tree`: empty for initial +# commit, "-p parent" for a normal commit and "-p parent1 +# -p parent2 -p parent3 ..." for a merge commit. +# +# --msg-filter COMMAND:: The filter for rewriting commit message +# This is the filter for rewriting the commit messages. +# The COMMAND argument is evaluated in shell with the original +# commit message on standard input; its standard output is +# is used as the new commit message. +# +# --commit-filter COMMAND:: The filter for performing the commit +# If this filter is passed, it will be called instead of the +# `git-commit-tree` command, with those arguments: +# +# TREE_ID [-p PARENT_COMMIT_ID]... +# +# and the log message on stdin. The commit id is expected on +# stdout. As a special extension, the commit filter may emit +# multiple commit ids; in that case, all of them will be used +# as parents instead of the original commit in further commits. +# +# --tag-name-filter COMMAND:: The filter for rewriting tag names. +# If this filter is passed, it will be called for every tag ref +# that points to a rewritten object (or to a tag object which +# points to a rewritten object). The original tag name is passed +# via standard input, and the new tag name is expected on standard +# output. +# +# The original tags are not deleted, but can be overwritten; +# use "--tag-name-filter=cat" to simply update the tags. In this +# case, be very careful and make sure you have the old tags +# backed up in case the conversion has run afoul. +# +# Note that there is currently no support for proper rewriting of +# tag objects; in layman terms, if the tag has a message or signature +# attached, the rewritten tag won't have it. Sorry. (It is by +# definition impossible to preserve signatures at any rate, though.) +# +# EXAMPLE USAGE +# ------------- +# Suppose you want to remove a file (containing confidential information +# or copyright violation) from all commits: +# +# git-filter-branch --tree-filter 'rm filename' newbranch +# +# A significantly faster version: +# +# git-filter-branch --index-filter 'git-update-index --remove filename' newbranch +# +# Now, you will get the rewritten history saved in the branch 'newbranch' +# (your current branch is left untouched). +# +# To "etch-graft" a commit to the revision history (set a commit to be +# the parent of the current initial commit and propagate that): +# +# git-filter-branch --parent-filter sed\ 's/^$/-p graftcommitid/' newbranch +# +# (if the parent string is empty - therefore we are dealing with the +# initial commit - add graftcommit as a parent). Note that this assumes +# history with a single root (that is, no git-merge without common ancestors +# happened). If this is not the case, use: +# +# git-filter-branch --parent-filter 'cat; [ "$GIT_COMMIT" = "COMMIT" ] && echo "-p GRAFTCOMMIT"' newbranch +# +# To remove commits authored by "Darl McBribe" from the history: +# +# git-filter-branch --commit-filter 'if [ "$GIT_AUTHOR_NAME" = "Darl McBribe" ]; then shift; while [ -n "$1" ]; do shift; echo "$1"; shift; done; else git-commit-tree "$@"; fi' newbranch +# +# (the shift magic first throws away the tree id and then the -p +# parameters). Note that this handles merges properly! In case Darl +# committed a merge between P1 and P2, it will be propagated properly +# and all children of the merge will become merge commits with P1,P2 +# as their parents instead of the merge commit. +# +# To restrict rewriting to only part of the history, use -r or -k or both. +# Consider this history: +# +# D--E--F--G--H +# / / +# A--B-----C +# +# To rewrite only commits F,G,H, use: +# +# git-filter-branch -r F ... +# +# To rewrite commits E,F,G,H, use one of these: +# +# git-filter-branch -r E -k C ... +# git-filter-branch -k D -k C ... + +# Testsuite: TODO + +set -e + +USAGE="git-filter-branch [-d TEMPDIR] [-r STARTREV]... [-k KEEPREV]... [-s SRCBRANCH] [FILTERS] DESTBRANCH" +. git-sh-setup + +map() +{ + [ -r "$workdir/../map/$1" ] || return 1 + cat "$workdir/../map/$1" +} + +# When piped a commit, output a script to set the ident of either +# "author" or "committer + +set_ident () { + lid="$(echo "$1" | tr "A-Z" "a-z")" + uid="$(echo "$1" | tr "a-z" "A-Z")" + pick_id_script=' + /^'$lid' /{ + s/'\''/'\''\\'\'\''/g + h + s/^'$lid' \([^<]*\) <[^>]*> .*$/\1/ + s/'\''/'\''\'\'\''/g + s/.*/export GIT_'$uid'_NAME='\''&'\''/p + + g + s/^'$lid' [^<]* <\([^>]*\)> .*$/\1/ + s/'\''/'\''\'\'\''/g + s/.*/export GIT_'$uid'_EMAIL='\''&'\''/p + + g + s/^'$lid' [^<]* <[^>]*> \(.*\)$/\1/ + s/'\''/'\''\'\'\''/g + s/.*/export GIT_'$uid'_DATE='\''&'\''/p + + q + } + ' + + LANG=C LC_ALL=C sed -ne "$pick_id_script" + # Ensure non-empty id name. + echo "[ -n \"\$GIT_${uid}_NAME\" ] || export GIT_${uid}_NAME=\"\${GIT_${uid}_EMAIL%%@*}\"" +} + +# list all parent's object names for a given commit +get_parents () { + git-rev-list -1 --parents "$1" | sed "s/^[0-9a-f]*//" +} + +tempdir=.git-rewrite +unchanged=" " +filter_env= +filter_tree= +filter_index= +filter_parent= +filter_msg=cat +filter_commit='git-commit-tree "$@"' +filter_tag_name= +srcbranch=HEAD +while case "$#" in 0) usage;; esac +do + case "$1" in + --) + shift + break + ;; + -*) + ;; + *) + break; + esac + + # all switches take one argument + ARG="$1" + case "$#" in 1) usage ;; esac + shift + OPTARG="$1" + shift + + case "$ARG" in + -d) + tempdir="$OPTARG" + ;; + -r) + unchanged="$(get_parents "$OPTARG") $unchanged" + ;; + -k) + unchanged="$(git-rev-parse "$OPTARG"^{commit}) $unchanged" + ;; + --env-filter) + filter_env="$OPTARG" + ;; + --tree-filter) + filter_tree="$OPTARG" + ;; + --index-filter) + filter_index="$OPTARG" + ;; + --parent-filter) + filter_parent="$OPTARG" + ;; + --msg-filter) + filter_msg="$OPTARG" + ;; + --commit-filter) + filter_commit="$OPTARG" + ;; + --tag-name-filter) + filter_tag_name="$OPTARG" + ;; + -s) + srcbranch="$OPTARG" + ;; + *) + usage + ;; + esac +done + +dstbranch="$1" +test -n "$dstbranch" || die "missing branch name" +git-show-ref "refs/heads/$dstbranch" 2> /dev/null && + die "branch $dstbranch already exists" + +test ! -e "$tempdir" || die "$tempdir already exists, please remove it" +mkdir -p "$tempdir/t" +cd "$tempdir/t" +workdir="$(pwd)" + +case "$GIT_DIR" in +/*) + ;; +*) + export GIT_DIR="$(pwd)/../../$GIT_DIR" + ;; +esac + +export GIT_INDEX_FILE="$(pwd)/../index" +git-read-tree # seed the index file + +ret=0 + + +mkdir ../map # map old->new commit ids for rewriting parents + +# seed with identity mappings for the parents where we start off +for commit in $unchanged; do + echo $commit > ../map/$commit +done + +git-rev-list --reverse --topo-order $srcbranch --not $unchanged >../revs +commits=$(cat ../revs | wc -l | tr -d " ") + +test $commits -eq 0 && die "Found nothing to rewrite" + +i=0 +while read commit; do + i=$((i+1)) + printf "$commit ($i/$commits) " + + git-read-tree -i -m $commit + + export GIT_COMMIT=$commit + git-cat-file commit "$commit" >../commit + + eval "$(set_ident AUTHOR <../commit)" + eval "$(set_ident COMMITTER <../commit)" + eval "$filter_env" + + if [ "$filter_tree" ]; then + git-checkout-index -f -u -a + # files that $commit removed are now still in the working tree; + # remove them, else they would be added again + git-ls-files -z --others | xargs -0 rm -f + eval "$filter_tree" + git-diff-index -r $commit | cut -f 2- | tr '\n' '\0' | \ + xargs -0 git-update-index --add --replace --remove + git-ls-files -z --others | \ + xargs -0 git-update-index --add --replace --remove + fi + + eval "$filter_index" + + parentstr= + for parent in $(get_parents $commit); do + if [ -r "../map/$parent" ]; then + for reparent in $(cat "../map/$parent"); do + parentstr="$parentstr -p $reparent" + done + else + die "assertion failed: parent $parent for commit $commit not found in rewritten ones" + fi + done + if [ "$filter_parent" ]; then + parentstr="$(echo "$parentstr" | eval "$filter_parent")" + fi + + sed -e '1,/^$/d' <../commit | \ + eval "$filter_msg" | \ + sh -c "$filter_commit" git-commit-tree $(git-write-tree) $parentstr | \ + tee ../map/$commit +done <../revs + +git-update-ref refs/heads/"$dstbranch" $(head -n 1 ../map/$(tail -n 1 ../revs)) +if [ "$(cat ../map/$(tail -n 1 ../revs) | wc -l)" -gt 1 ]; then + echo "WARNING: Your commit filter caused the head commit to expand to several rewritten commits. Only the first such commit was recorded as the current $dstbranch head but you will need to resolve the situation now (probably by manually merging the other commits). These are all the commits:" >&2 + sed 's/^/ /' ../map/$(tail -n 1 ../revs) >&2 + ret=1 +fi + +if [ "$filter_tag_name" ]; then + git-for-each-ref --format='%(objectname) %(objecttype) %(refname)' refs/tags | + while read sha1 type ref; do + ref="${ref#refs/tags/}" + # XXX: Rewrite tagged trees as well? + if [ "$type" != "commit" -a "$type" != "tag" ]; then + continue; + fi + + if [ "$type" = "tag" ]; then + # Dereference to a commit + sha1t="$sha1" + sha1="$(git-rev-parse "$sha1"^{commit} 2>/dev/null)" || continue + fi + + [ -f "../map/$sha1" ] || continue + new_sha1="$(cat "../map/$sha1")" + export GIT_COMMIT="$sha1" + new_ref="$(echo "$ref" | eval "$filter_tag_name")" + + echo "$ref -> $new_ref ($sha1 -> $new_sha1)" + + if [ "$type" = "tag" ]; then + # Warn that we are not rewriting the tag object itself. + warn "unreferencing tag object $sha1t" + fi + + git-update-ref "refs/tags/$new_ref" "$new_sha1" + done +fi + +cd ../.. +rm -rf "$tempdir" +echo "Rewritten history saved to the $dstbranch branch" + +exit $ret diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh new file mode 100755 index 0000000000..9a4dae44f2 --- /dev/null +++ b/t/t7003-filter-branch.sh @@ -0,0 +1,47 @@ +#!/bin/sh + +test_description='git-filter-branch' +. ./test-lib.sh + +make_commit () { + lower=$(echo $1 | tr A-Z a-z) + echo $lower > $lower + git add $lower + git commit -m $1 + git tag $1 +} + +test_expect_success 'setup' ' + make_commit A + make_commit B + git checkout -b branch B + make_commit D + make_commit E + git checkout master + make_commit C + git checkout branch + git merge C + git tag F + make_commit G + make_commit H +' + +H=$(git-rev-parse H) + +test_expect_success 'rewrite identically' ' + git-filter-branch H2 +' + +test_expect_success 'result is really identical' ' + test $H = $(git-rev-parse H2) +' + +test_expect_success 'rewrite, renaming a specific file' ' + git-filter-branch --tree-filter "mv d doh || :" H3 +' + +test_expect_success 'test that the file was renamed' ' + test d = $(git show H3:doh) +' + +test_done From aee078bf81d5810cb461e86950f6807d2d45befa Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 5 Jun 2007 00:07:31 -0700 Subject: [PATCH 02/11] t7003: make test repeatable Signed-off-by: Junio C Hamano --- t/t7003-filter-branch.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh index 9a4dae44f2..c82ff1d6e9 100755 --- a/t/t7003-filter-branch.sh +++ b/t/t7003-filter-branch.sh @@ -7,6 +7,7 @@ make_commit () { lower=$(echo $1 | tr A-Z a-z) echo $lower > $lower git add $lower + test_tick git commit -m $1 git tag $1 } From 350d8575293dc3f25b0c6ec4bbfd9303a0eebd76 Mon Sep 17 00:00:00 2001 From: Matthias Lederhofer Date: Tue, 5 Jun 2007 16:12:08 +0200 Subject: [PATCH 03/11] filter-branch: prevent filters from reading from stdin stdin is the list of commits when the env, tree and index filter are executed. The filters are not supposed to read anything from stdin so the best is to give them /dev/null for reading. Signed-off-by: Matthias Lederhofer Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index 0c8a7dfd3e..73e7c01009 100644 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -349,21 +349,21 @@ while read commit; do eval "$(set_ident AUTHOR <../commit)" eval "$(set_ident COMMITTER <../commit)" - eval "$filter_env" + eval "$filter_env" < /dev/null if [ "$filter_tree" ]; then git-checkout-index -f -u -a # files that $commit removed are now still in the working tree; # remove them, else they would be added again git-ls-files -z --others | xargs -0 rm -f - eval "$filter_tree" + eval "$filter_tree" < /dev/null git-diff-index -r $commit | cut -f 2- | tr '\n' '\0' | \ xargs -0 git-update-index --add --replace --remove git-ls-files -z --others | \ xargs -0 git-update-index --add --replace --remove fi - eval "$filter_index" + eval "$filter_index" < /dev/null parentstr= for parent in $(get_parents $commit); do From d674ee4cfcfc15cdee07c49d7f36656fe6c3e14d Mon Sep 17 00:00:00 2001 From: Matthias Lederhofer Date: Wed, 6 Jun 2007 09:29:39 +0200 Subject: [PATCH 04/11] chmod +x git-filter-branch.sh Signed-off-by: Matthias Lederhofer Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 git-filter-branch.sh diff --git a/git-filter-branch.sh b/git-filter-branch.sh old mode 100644 new mode 100755 From c12764b8b7e004f84b1e685b76f2d662bee8e196 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 6 Jun 2007 16:24:07 +0100 Subject: [PATCH 05/11] filter-branch: use $(($i+1)) instead of $((i+1)) The expression $((i+1)) is not portable at all: even some bash versions do not grok it. So do not use it. Noticed by Jonas Fonseca. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index 73e7c01009..2929925a0e 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -339,7 +339,7 @@ test $commits -eq 0 && die "Found nothing to rewrite" i=0 while read commit; do - i=$((i+1)) + i=$(($i+1)) printf "$commit ($i/$commits) " git-read-tree -i -m $commit From 9840906026be807d0882f96396de3a3cdb9fb43e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 5 Jun 2007 16:58:13 +0100 Subject: [PATCH 06/11] filter-branch: fix behaviour of '-k' The option '-k' says that the given commit and _all_ of its ancestors are kept as-is. However, if a to-be-rewritten commit branched from an ancestor of an ancestor of a commit given with '-k', filter-branch would fail. Example: A - B \ C If filter-branch was called with '-k B -s C', it would actually keep B (and A as its parent), but would rewrite C, and its parent. Noticed by Johannes Sixt. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 29 +++++++++++++++++------------ t/t7003-filter-branch.sh | 9 +++++++++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index 2929925a0e..f2b0e273ba 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -327,11 +327,6 @@ ret=0 mkdir ../map # map old->new commit ids for rewriting parents -# seed with identity mappings for the parents where we start off -for commit in $unchanged; do - echo $commit > ../map/$commit -done - git-rev-list --reverse --topo-order $srcbranch --not $unchanged >../revs commits=$(cat ../revs | wc -l | tr -d " ") @@ -372,7 +367,8 @@ while read commit; do parentstr="$parentstr -p $reparent" done else - die "assertion failed: parent $parent for commit $commit not found in rewritten ones" + # if it was not rewritten, take the original + parentstr="$parentstr -p $parent" fi done if [ "$filter_parent" ]; then @@ -385,12 +381,21 @@ while read commit; do tee ../map/$commit done <../revs -git-update-ref refs/heads/"$dstbranch" $(head -n 1 ../map/$(tail -n 1 ../revs)) -if [ "$(cat ../map/$(tail -n 1 ../revs) | wc -l)" -gt 1 ]; then - echo "WARNING: Your commit filter caused the head commit to expand to several rewritten commits. Only the first such commit was recorded as the current $dstbranch head but you will need to resolve the situation now (probably by manually merging the other commits). These are all the commits:" >&2 - sed 's/^/ /' ../map/$(tail -n 1 ../revs) >&2 - ret=1 -fi +src_head=$(tail -n 1 ../revs) +target_head=$(head -n 1 ../map/$src_head) +case "$target_head" in +'') + echo Nothing rewritten + ;; +*) + git-update-ref refs/heads/"$dstbranch" $target_head + if [ $(cat ../map/$src_head | wc -l) -gt 1 ]; then + echo "WARNING: Your commit filter caused the head commit to expand to several rewritten commits. Only the first such commit was recorded as the current $dstbranch head but you will need to resolve the situation now (probably by manually merging the other commits). These are all the commits:" >&2 + sed 's/^/ /' ../map/$src_head >&2 + ret=1 + fi + ;; +esac if [ "$filter_tag_name" ]; then git-for-each-ref --format='%(objectname) %(objecttype) %(refname)' refs/tags | diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh index c82ff1d6e9..6e2be5be0e 100755 --- a/t/t7003-filter-branch.sh +++ b/t/t7003-filter-branch.sh @@ -45,4 +45,13 @@ test_expect_success 'test that the file was renamed' ' test d = $(git show H3:doh) ' +git tag oldD H3~4 +test_expect_success 'rewrite one branch, keeping a side branch' ' + git-filter-branch --tree-filter "mv b boh || :" -k D -s oldD modD +' + +test_expect_success 'common ancestor is still common (unchanged)' ' + test "$(git-merge-base modD D)" = "$(git-rev-parse B)" +' + test_done From 2766ce28150597677bb91c424e6033a298c71510 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 6 Jun 2007 09:43:41 +0200 Subject: [PATCH 07/11] filter-branch: Use rev-list arguments to specify revision ranges. A subset of commits in a branch used to be specified by options (-k, -r) as well as the branch tip itself (-s). It is more natural (for git users) to specify revision ranges like 'master..next' instead. This makes it so. If no range is specified it defaults to 'HEAD'. As a consequence, the new name of the filtered branch must be the first non-option argument. All remaining arguments are passed to 'git rev-list' unmodified. The tip of the branch that gets filtered is implied: It is the first commit that git rev-list would print for the specified range. Signed-off-by: Johannes Sixt Acked-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 39 ++++++++++++--------------------------- t/t7003-filter-branch.sh | 2 +- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index f2b0e273ba..29e0d027ca 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -42,15 +42,6 @@ # does this in the '.git-rewrite/' directory but you can override # that choice by this parameter. # -# -r STARTREV:: The commit id to start the rewrite at -# Normally, the command will rewrite the entire history. If you -# pass this argument, though, this will be the first commit it -# will rewrite and keep the previous commits intact. -# -# -k KEEPREV:: A commit id until which _not_ to rewrite history -# If you pass this argument, this commit and all of its -# predecessors are kept intact. -# # Filters # ~~~~~~~ # The filters are applied in the order as listed below. The COMMAND @@ -164,27 +155,31 @@ # and all children of the merge will become merge commits with P1,P2 # as their parents instead of the merge commit. # -# To restrict rewriting to only part of the history, use -r or -k or both. +# To restrict rewriting to only part of the history, specify a revision +# range in addition to the new branch name. The new branch name will +# point to the top-most revision that a 'git rev-list' of this range +# will print. +# # Consider this history: # # D--E--F--G--H # / / # A--B-----C # -# To rewrite only commits F,G,H, use: +# To rewrite commits D,E,F,G,H, use: # -# git-filter-branch -r F ... +# git-filter-branch ... new-H C..H # # To rewrite commits E,F,G,H, use one of these: # -# git-filter-branch -r E -k C ... -# git-filter-branch -k D -k C ... +# git-filter-branch ... new-H C..H --not D +# git-filter-branch ... new-H D..H --not C # Testsuite: TODO set -e -USAGE="git-filter-branch [-d TEMPDIR] [-r STARTREV]... [-k KEEPREV]... [-s SRCBRANCH] [FILTERS] DESTBRANCH" +USAGE="git-filter-branch [-d TEMPDIR] [FILTERS] DESTBRANCH [REV-RANGE]" . git-sh-setup map() @@ -232,7 +227,6 @@ get_parents () { } tempdir=.git-rewrite -unchanged=" " filter_env= filter_tree= filter_index= @@ -240,7 +234,6 @@ filter_parent= filter_msg=cat filter_commit='git-commit-tree "$@"' filter_tag_name= -srcbranch=HEAD while case "$#" in 0) usage;; esac do case "$1" in @@ -265,12 +258,6 @@ do -d) tempdir="$OPTARG" ;; - -r) - unchanged="$(get_parents "$OPTARG") $unchanged" - ;; - -k) - unchanged="$(git-rev-parse "$OPTARG"^{commit}) $unchanged" - ;; --env-filter) filter_env="$OPTARG" ;; @@ -292,9 +279,6 @@ do --tag-name-filter) filter_tag_name="$OPTARG" ;; - -s) - srcbranch="$OPTARG" - ;; *) usage ;; @@ -302,6 +286,7 @@ do done dstbranch="$1" +shift test -n "$dstbranch" || die "missing branch name" git-show-ref "refs/heads/$dstbranch" 2> /dev/null && die "branch $dstbranch already exists" @@ -327,7 +312,7 @@ ret=0 mkdir ../map # map old->new commit ids for rewriting parents -git-rev-list --reverse --topo-order $srcbranch --not $unchanged >../revs +git-rev-list --reverse --topo-order --default HEAD "$@" >../revs commits=$(cat ../revs | wc -l | tr -d " ") test $commits -eq 0 && die "Found nothing to rewrite" diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh index 6e2be5be0e..3739cb191d 100755 --- a/t/t7003-filter-branch.sh +++ b/t/t7003-filter-branch.sh @@ -47,7 +47,7 @@ test_expect_success 'test that the file was renamed' ' git tag oldD H3~4 test_expect_success 'rewrite one branch, keeping a side branch' ' - git-filter-branch --tree-filter "mv b boh || :" -k D -s oldD modD + git-filter-branch --tree-filter "mv b boh || :" modD D..oldD ' test_expect_success 'common ancestor is still common (unchanged)' ' From 3520e1e86878c6787c3abfe677e6472ce2c97f66 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 6 Jun 2007 20:38:35 +0200 Subject: [PATCH 08/11] filter-branch: also don't fail in map() if a commit cannot be mapped The map() function can be used by filters to map a commit id to its rewritten id. Such a mapping may not exist, in which case the identity mapping is used (the commit is returned unchanged). In the rewrite loop, this mapping is also needed, but was done explicitly in the same way. Use the map() function instead. Signed-off-by: Johannes Sixt Acked-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index 29e0d027ca..9d61b7fff6 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -184,7 +184,8 @@ USAGE="git-filter-branch [-d TEMPDIR] [FILTERS] DESTBRANCH [REV-RANGE]" map() { - [ -r "$workdir/../map/$1" ] || return 1 + # if it was not rewritten, take the original + test -r "$workdir/../map/$1" || echo "$1" cat "$workdir/../map/$1" } @@ -347,14 +348,9 @@ while read commit; do parentstr= for parent in $(get_parents $commit); do - if [ -r "../map/$parent" ]; then - for reparent in $(cat "../map/$parent"); do - parentstr="$parentstr -p $reparent" - done - else - # if it was not rewritten, take the original - parentstr="$parentstr -p $parent" - fi + for reparent in $(map "$parent"); do + parentstr="$parentstr -p $reparent" + done done if [ "$filter_parent" ]; then parentstr="$(echo "$parentstr" | eval "$filter_parent")" From 685ef546b62d063c72b401cd38b83a879301aac4 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 8 Jun 2007 01:30:35 +0100 Subject: [PATCH 09/11] Teach filter-branch about subdirectory filtering With git-filter-branch --subdirectory-filter you can get at the history, as seen by a certain subdirectory. The history of the rewritten branch will only contain commits that touched that subdirectory, and the subdirectory will be rewritten to be the new project root. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 33 ++++++++++++++++++++++++++++++--- t/t7003-filter-branch.sh | 24 ++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index 9d61b7fff6..efb8f2dbca 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -119,6 +119,10 @@ # attached, the rewritten tag won't have it. Sorry. (It is by # definition impossible to preserve signatures at any rate, though.) # +# --subdirectory-filter DIRECTORY:: Only regard the history, as seen by +# the given subdirectory. The result will contain that directory as +# its project root. +# # EXAMPLE USAGE # ------------- # Suppose you want to remove a file (containing confidential information @@ -224,7 +228,13 @@ set_ident () { # list all parent's object names for a given commit get_parents () { - git-rev-list -1 --parents "$1" | sed "s/^[0-9a-f]*//" + case "$filter_subdir" in + "") + git-rev-list -1 --parents "$1" + ;; + *) + git-rev-list -1 --parents "$1" -- "$filter_subdir" + esac | sed "s/^[0-9a-f]*//" } tempdir=.git-rewrite @@ -235,6 +245,7 @@ filter_parent= filter_msg=cat filter_commit='git-commit-tree "$@"' filter_tag_name= +filter_subdir= while case "$#" in 0) usage;; esac do case "$1" in @@ -280,6 +291,9 @@ do --tag-name-filter) filter_tag_name="$OPTARG" ;; + --subdirectory-filter) + filter_subdir="$OPTARG" + ;; *) usage ;; @@ -313,7 +327,14 @@ ret=0 mkdir ../map # map old->new commit ids for rewriting parents -git-rev-list --reverse --topo-order --default HEAD "$@" >../revs +case "$filter_subdir" in +"") + git-rev-list --reverse --topo-order --default HEAD "$@" + ;; +*) + git-rev-list --reverse --topo-order --default HEAD "$@" \ + -- "$filter_subdir" +esac > ../revs commits=$(cat ../revs | wc -l | tr -d " ") test $commits -eq 0 && die "Found nothing to rewrite" @@ -323,7 +344,13 @@ while read commit; do i=$(($i+1)) printf "$commit ($i/$commits) " - git-read-tree -i -m $commit + case "$filter_subdir" in + "") + git-read-tree -i -m $commit + ;; + *) + git-read-tree -i -m $commit:"$filter_subdir" + esac export GIT_COMMIT=$commit git-cat-file commit "$commit" >../commit diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh index 3739cb191d..292b83766d 100755 --- a/t/t7003-filter-branch.sh +++ b/t/t7003-filter-branch.sh @@ -54,4 +54,28 @@ test_expect_success 'common ancestor is still common (unchanged)' ' test "$(git-merge-base modD D)" = "$(git-rev-parse B)" ' +test_expect_success 'filter subdirectory only' ' + mkdir subdir && + touch subdir/new && + git add subdir/new && + test_tick && + git commit -m "subdir" && + echo H > a && + test_tick && + git commit -m "not subdir" a && + echo A > subdir/new && + test_tick && + git commit -m "again subdir" subdir/new && + git rm a && + test_tick && + git commit -m "again not subdir" && + git-filter-branch --subdirectory-filter subdir sub +' + +test_expect_success 'subdirectory filter result looks okay' ' + test 2 = $(git-rev-list sub | wc -l) && + git show sub:new && + ! git show sub:subdir +' + test_done From 813b4734fcb82e541658b33b8563387c197d6247 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 8 Jun 2007 23:28:39 +0200 Subject: [PATCH 10/11] filter-branch: Simplify parent computation. We can use git rev-list --parents when we list the commits to rewrite. It is not necessary to run git rev-list --parents for each commit in the loop. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index efb8f2dbca..cb43b59740 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -226,17 +226,6 @@ set_ident () { echo "[ -n \"\$GIT_${uid}_NAME\" ] || export GIT_${uid}_NAME=\"\${GIT_${uid}_EMAIL%%@*}\"" } -# list all parent's object names for a given commit -get_parents () { - case "$filter_subdir" in - "") - git-rev-list -1 --parents "$1" - ;; - *) - git-rev-list -1 --parents "$1" -- "$filter_subdir" - esac | sed "s/^[0-9a-f]*//" -} - tempdir=.git-rewrite filter_env= filter_tree= @@ -329,18 +318,19 @@ mkdir ../map # map old->new commit ids for rewriting parents case "$filter_subdir" in "") - git-rev-list --reverse --topo-order --default HEAD "$@" + git-rev-list --reverse --topo-order --default HEAD \ + --parents "$@" ;; *) - git-rev-list --reverse --topo-order --default HEAD "$@" \ - -- "$filter_subdir" + git-rev-list --reverse --topo-order --default HEAD \ + --parents "$@" -- "$filter_subdir" esac > ../revs commits=$(cat ../revs | wc -l | tr -d " ") test $commits -eq 0 && die "Found nothing to rewrite" i=0 -while read commit; do +while read commit parents; do i=$(($i+1)) printf "$commit ($i/$commits) " @@ -374,7 +364,7 @@ while read commit; do eval "$filter_index" < /dev/null parentstr= - for parent in $(get_parents $commit); do + for parent in $parents; do for reparent in $(map "$parent"); do parentstr="$parentstr -p $reparent" done @@ -389,7 +379,7 @@ while read commit; do tee ../map/$commit done <../revs -src_head=$(tail -n 1 ../revs) +src_head=$(tail -n 1 ../revs | sed -e 's/ .*//') target_head=$(head -n 1 ../map/$src_head) case "$target_head" in '') From cfabd6eee1745cfec58cfcb794ce8847e43b888a Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 8 Jun 2007 23:28:50 +0200 Subject: [PATCH 11/11] filter-branch: subdirectory filter needs --full-history When two branches are merged that modify a subdirectory (possibly in different intermediate steps) such that both end up identical, then rev-list chooses only one branch. But when we filter history, we want to keep both branches. Therefore, we must use --full-history. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 2 +- t/t7003-filter-branch.sh | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index cb43b59740..bfd118cd3b 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -323,7 +323,7 @@ case "$filter_subdir" in ;; *) git-rev-list --reverse --topo-order --default HEAD \ - --parents "$@" -- "$filter_subdir" + --parents --full-history "$@" -- "$filter_subdir" esac > ../revs commits=$(cat ../revs | wc -l | tr -d " ") diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh index 292b83766d..0fabe4904f 100755 --- a/t/t7003-filter-branch.sh +++ b/t/t7003-filter-branch.sh @@ -78,4 +78,25 @@ test_expect_success 'subdirectory filter result looks okay' ' ! git show sub:subdir ' +test_expect_success 'setup and filter history that requires --full-history' ' + git checkout master && + mkdir subdir && + echo A > subdir/new && + git add subdir/new && + test_tick && + git commit -m "subdir on master" subdir/new && + git rm a && + test_tick && + git commit -m "again subdir on master" && + git merge branch && + git-filter-branch --subdirectory-filter subdir sub-master +' + +test_expect_success 'subdirectory filter result looks okay' ' + test 3 = $(git-rev-list -1 --parents sub-master | wc -w) && + git show sub-master^:new && + git show sub-master^2:new && + ! git show sub:subdir +' + test_done