git-commit-vandalism/contrib/remote-helpers/git-remote-bzr

960 lines
26 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python
#
# Copyright (c) 2012 Felipe Contreras
#
#
# Just copy to your ~/bin, or anywhere in your $PATH.
# Then you can clone with:
# % git clone bzr::/path/to/bzr/repo/or/url
#
# For example:
# % git clone bzr::$HOME/myrepo
# or
# % git clone bzr::lp:myrepo
#
# If you want to specify which branches you want track (per repo):
# git config remote-bzr.branches 'trunk, devel, test'
#
import sys
import bzrlib
if hasattr(bzrlib, "initialize"):
bzrlib.initialize()
import bzrlib.plugin
bzrlib.plugin.load_plugins()
import bzrlib.generate_ids
import bzrlib.transport
import bzrlib.errors
import bzrlib.ui
import bzrlib.urlutils
import bzrlib.branch
import sys
import os
import json
import re
import StringIO
import atexit, shutil, hashlib, urlparse, subprocess
NAME_RE = re.compile('^([^<>]+)')
AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
EMAIL_RE = re.compile('^([^<>]+[^ \\\t<>])?\\b(?:[ \\t<>]*?)\\b([^ \\t<>]+@[^ \\t<>]+)')
RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
def die(msg, *args):
sys.stderr.write('ERROR: %s\n' % (msg % args))
sys.exit(1)
def warn(msg, *args):
sys.stderr.write('WARNING: %s\n' % (msg % args))
def gittz(tz):
return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
def get_config(config):
cmd = ['git', 'config', '--get', config]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
output, _ = process.communicate()
return output
class Marks:
def __init__(self, path):
self.path = path
self.tips = {}
self.marks = {}
self.rev_marks = {}
self.last_mark = 0
self.load()
def load(self):
if not os.path.exists(self.path):
return
tmp = json.load(open(self.path))
self.tips = tmp['tips']
self.marks = tmp['marks']
self.last_mark = tmp['last-mark']
for rev, mark in self.marks.iteritems():
self.rev_marks[mark] = rev
def dict(self):
return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
def store(self):
json.dump(self.dict(), open(self.path, 'w'))
def __str__(self):
return str(self.dict())
def from_rev(self, rev):
return self.marks[rev]
def to_rev(self, mark):
return str(self.rev_marks[mark])
def next_mark(self):
self.last_mark += 1
return self.last_mark
def get_mark(self, rev):
self.last_mark += 1
self.marks[rev] = self.last_mark
return self.last_mark
def is_marked(self, rev):
return rev in self.marks
def new_mark(self, rev, mark):
self.marks[rev] = mark
self.rev_marks[mark] = rev
self.last_mark = mark
def get_tip(self, branch):
return self.tips.get(branch, None)
def set_tip(self, branch, tip):
self.tips[branch] = tip
class Parser:
def __init__(self, repo):
self.repo = repo
self.line = self.get_line()
def get_line(self):
return sys.stdin.readline().strip()
def __getitem__(self, i):
return self.line.split()[i]
def check(self, word):
return self.line.startswith(word)
def each_block(self, separator):
while self.line != separator:
yield self.line
self.line = self.get_line()
def __iter__(self):
return self.each_block('')
def next(self):
self.line = self.get_line()
if self.line == 'done':
self.line = None
def get_mark(self):
i = self.line.index(':') + 1
return int(self.line[i:])
def get_data(self):
if not self.check('data'):
return None
i = self.line.index(' ') + 1
size = int(self.line[i:])
return sys.stdin.read(size)
def get_author(self):
m = RAW_AUTHOR_RE.match(self.line)
if not m:
return None
_, name, email, date, tz = m.groups()
committer = '%s <%s>' % (name, email)
tz = int(tz)
tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
return (committer, int(date), tz)
def rev_to_mark(rev):
global marks
return marks.from_rev(rev)
def mark_to_rev(mark):
global marks
return marks.to_rev(mark)
def fixup_user(user):
name = mail = None
user = user.replace('"', '')
m = AUTHOR_RE.match(user)
if m:
name = m.group(1)
mail = m.group(2).strip()
else:
m = EMAIL_RE.match(user)
if m:
name = m.group(1)
mail = m.group(2)
else:
m = NAME_RE.match(user)
if m:
name = m.group(1).strip()
if not name:
name = 'unknown'
if not mail:
mail = 'Unknown'
return '%s <%s>' % (name, mail)
def get_filechanges(cur, prev):
modified = {}
removed = {}
changes = cur.changes_from(prev)
def u(s):
return s.encode('utf-8')
for path, fid, kind in changes.added:
modified[u(path)] = fid
for path, fid, kind in changes.removed:
removed[u(path)] = None
for path, fid, kind, mod, _ in changes.modified:
modified[u(path)] = fid
for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
removed[u(oldpath)] = None
if kind == 'directory':
lst = cur.list_files(from_dir=newpath, recursive=True)
for path, file_class, kind, fid, entry in lst:
if kind != 'directory':
modified[u(newpath + '/' + path)] = fid
else:
modified[u(newpath)] = fid
return modified, removed
def export_files(tree, files):
global marks, filenodes
final = []
for path, fid in files.iteritems():
kind = tree.kind(fid)
h = tree.get_file_sha1(fid)
if kind == 'symlink':
d = tree.get_symlink_target(fid)
mode = '120000'
elif kind == 'file':
if tree.is_executable(fid):
mode = '100755'
else:
mode = '100644'
# is the blob already exported?
if h in filenodes:
mark = filenodes[h]
final.append((mode, mark, path))
continue
d = tree.get_file_text(fid)
elif kind == 'directory':
continue
else:
die("Unhandled kind '%s' for path '%s'" % (kind, path))
mark = marks.next_mark()
filenodes[h] = mark
print "blob"
print "mark :%u" % mark
print "data %d" % len(d)
print d
final.append((mode, mark, path))
return final
def export_branch(repo, name):
global prefix
ref = '%s/heads/%s' % (prefix, name)
tip = marks.get_tip(name)
branch = bzrlib.branch.Branch.open(branches[name])
repo = branch.repository
branch.lock_read()
revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
try:
tip_revno = branch.revision_id_to_revno(tip)
last_revno, _ = branch.last_revision_info()
total = last_revno - tip_revno
except bzrlib.errors.NoSuchRevision:
tip_revno = 0
total = 0
for revid, _, seq, _ in revs:
if marks.is_marked(revid):
continue
rev = repo.get_revision(revid)
revno = seq[0]
parents = rev.parent_ids
time = rev.timestamp
tz = rev.timezone
committer = rev.committer.encode('utf-8')
committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
authors = rev.get_apparent_authors()
if authors:
author = authors[0].encode('utf-8')
author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
else:
author = committer
msg = rev.message.encode('utf-8')
msg += '\n'
if len(parents) == 0:
parent = bzrlib.revision.NULL_REVISION
else:
parent = parents[0]
cur_tree = repo.revision_tree(revid)
prev = repo.revision_tree(parent)
modified, removed = get_filechanges(cur_tree, prev)
modified_final = export_files(cur_tree, modified)
if len(parents) == 0:
print 'reset %s' % ref
print "commit %s" % ref
print "mark :%d" % (marks.get_mark(revid))
print "author %s" % (author)
print "committer %s" % (committer)
print "data %d" % (len(msg))
print msg
for i, p in enumerate(parents):
try:
m = rev_to_mark(p)
except KeyError:
# ghost?
continue
if i == 0:
print "from :%s" % m
else:
print "merge :%s" % m
for f in removed:
print "D %s" % (f,)
for f in modified_final:
print "M %s :%u %s" % f
print
if len(seq) > 1:
# let's skip branch revisions from the progress report
continue
progress = (revno - tip_revno)
if (progress % 100 == 0):
if total:
print "progress revision %d '%s' (%d/%d)" % (revno, name, progress, total)
else:
print "progress revision %d '%s' (%d)" % (revno, name, progress)
branch.unlock()
revid = branch.last_revision()
# make sure the ref is updated
print "reset %s" % ref
print "from :%u" % rev_to_mark(revid)
print
marks.set_tip(name, revid)
def export_tag(repo, name):
global tags, prefix
ref = '%s/tags/%s' % (prefix, name)
print "reset %s" % ref
print "from :%u" % rev_to_mark(tags[name])
print
def do_import(parser):
global dirname
repo = parser.repo
path = os.path.join(dirname, 'marks-git')
print "feature done"
if os.path.exists(path):
print "feature import-marks=%s" % path
print "feature export-marks=%s" % path
print "feature force"
sys.stdout.flush()
while parser.check('import'):
ref = parser[1]
if ref.startswith('refs/heads/'):
name = ref[len('refs/heads/'):]
export_branch(repo, name)
if ref.startswith('refs/tags/'):
name = ref[len('refs/tags/'):]
export_tag(repo, name)
parser.next()
print 'done'
sys.stdout.flush()
def parse_blob(parser):
global blob_marks
parser.next()
mark = parser.get_mark()
parser.next()
data = parser.get_data()
blob_marks[mark] = data
parser.next()
class CustomTree():
def __init__(self, branch, revid, parents, files):
global files_cache
self.updates = {}
self.branch = branch
def copy_tree(revid):
files = files_cache[revid] = {}
branch.lock_read()
tree = branch.repository.revision_tree(revid)
try:
for path, entry in tree.iter_entries_by_dir():
files[path] = [entry.file_id, None]
finally:
branch.unlock()
return files
if len(parents) == 0:
self.base_id = bzrlib.revision.NULL_REVISION
self.base_files = {}
else:
self.base_id = parents[0]
self.base_files = files_cache.get(self.base_id, None)
if not self.base_files:
self.base_files = copy_tree(self.base_id)
self.files = files_cache[revid] = self.base_files.copy()
self.rev_files = {}
for path, data in self.files.iteritems():
fid, mark = data
self.rev_files[fid] = [path, mark]
for path, f in files.iteritems():
fid, mark = self.files.get(path, [None, None])
if not fid:
fid = bzrlib.generate_ids.gen_file_id(path)
f['path'] = path
self.rev_files[fid] = [path, mark]
self.updates[fid] = f
def last_revision(self):
return self.base_id
def iter_changes(self):
changes = []
def get_parent(dirname, basename):
parent_fid, mark = self.base_files.get(dirname, [None, None])
if parent_fid:
return parent_fid
parent_fid, mark = self.files.get(dirname, [None, None])
if parent_fid:
return parent_fid
if basename == '':
return None
fid = bzrlib.generate_ids.gen_file_id(path)
add_entry(fid, dirname, 'directory')
return fid
def add_entry(fid, path, kind, mode = None):
dirname, basename = os.path.split(path)
parent_fid = get_parent(dirname, basename)
executable = False
if mode == '100755':
executable = True
elif mode == '120000':
kind = 'symlink'
change = (fid,
(None, path),
True,
(False, True),
(None, parent_fid),
(None, basename),
(None, kind),
(None, executable))
self.files[path] = [change[0], None]
changes.append(change)
def update_entry(fid, path, kind, mode = None):
dirname, basename = os.path.split(path)
parent_fid = get_parent(dirname, basename)
executable = False
if mode == '100755':
executable = True
elif mode == '120000':
kind = 'symlink'
change = (fid,
(path, path),
True,
(True, True),
(None, parent_fid),
(None, basename),
(None, kind),
(None, executable))
self.files[path] = [change[0], None]
changes.append(change)
def remove_entry(fid, path, kind):
dirname, basename = os.path.split(path)
parent_fid = get_parent(dirname, basename)
change = (fid,
(path, None),
True,
(True, False),
(parent_fid, None),
(None, None),
(None, None),
(None, None))
del self.files[path]
changes.append(change)
for fid, f in self.updates.iteritems():
path = f['path']
if 'deleted' in f:
remove_entry(fid, path, 'file')
continue
if path in self.base_files:
update_entry(fid, path, 'file', f['mode'])
else:
add_entry(fid, path, 'file', f['mode'])
self.files[path][1] = f['mark']
self.rev_files[fid][1] = f['mark']
return changes
def get_content(self, file_id):
path, mark = self.rev_files[file_id]
if mark:
return blob_marks[mark]
# last resort
tree = self.branch.repository.revision_tree(self.base_id)
return tree.get_file_text(file_id)
def get_file_with_stat(self, file_id, path=None):
content = self.get_content(file_id)
return (StringIO.StringIO(content), None)
def get_symlink_target(self, file_id):
return self.get_content(file_id)
def id2path(self, file_id):
path, mark = self.rev_files[file_id]
return path
def c_style_unescape(string):
if string[0] == string[-1] == '"':
return string.decode('string-escape')[1:-1]
return string
def parse_commit(parser):
global marks, blob_marks, parsed_refs
global mode
parents = []
ref = parser[1]
parser.next()
if ref.startswith('refs/heads/'):
name = ref[len('refs/heads/'):]
branch = bzrlib.branch.Branch.open(branches[name])
else:
die('unknown ref')
commit_mark = parser.get_mark()
parser.next()
author = parser.get_author()
parser.next()
committer = parser.get_author()
parser.next()
data = parser.get_data()
parser.next()
if parser.check('from'):
parents.append(parser.get_mark())
parser.next()
while parser.check('merge'):
parents.append(parser.get_mark())
parser.next()
# fast-export adds an extra newline
if data[-1] == '\n':
data = data[:-1]
files = {}
for line in parser:
if parser.check('M'):
t, m, mark_ref, path = line.split(' ', 3)
mark = int(mark_ref[1:])
f = { 'mode' : m, 'mark' : mark }
elif parser.check('D'):
t, path = line.split(' ')
f = { 'deleted' : True }
else:
die('Unknown file command: %s' % line)
path = c_style_unescape(path).decode('utf-8')
files[path] = f
committer, date, tz = committer
parents = [mark_to_rev(p) for p in parents]
revid = bzrlib.generate_ids.gen_revision_id(committer, date)
props = {}
props['branch-nick'] = branch.nick
mtree = CustomTree(branch, revid, parents, files)
changes = mtree.iter_changes()
branch.lock_write()
try:
builder = branch.get_commit_builder(parents, None, date, tz, committer, props, revid)
try:
list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
builder.finish_inventory()
builder.commit(data.decode('utf-8', 'replace'))
except Exception, e:
builder.abort()
raise
finally:
branch.unlock()
parsed_refs[ref] = revid
marks.new_mark(revid, commit_mark)
def parse_reset(parser):
global parsed_refs
ref = parser[1]
parser.next()
# ugh
if parser.check('commit'):
parse_commit(parser)
return
if not parser.check('from'):
return
from_mark = parser.get_mark()
parser.next()
parsed_refs[ref] = mark_to_rev(from_mark)
def do_export(parser):
global parsed_refs, dirname
parser.next()
for line in parser.each_block('done'):
if parser.check('blob'):
parse_blob(parser)
elif parser.check('commit'):
parse_commit(parser)
elif parser.check('reset'):
parse_reset(parser)
elif parser.check('tag'):
pass
elif parser.check('feature'):
pass
else:
die('unhandled export command: %s' % line)
for ref, revid in parsed_refs.iteritems():
if ref.startswith('refs/heads/'):
name = ref[len('refs/heads/'):]
branch = bzrlib.branch.Branch.open(branches[name])
branch.generate_revision_history(revid, marks.get_tip(name))
if name in peers:
peer = bzrlib.branch.Branch.open(peers[name])
try:
peer.bzrdir.push_branch(branch, revision_id=revid)
except bzrlib.errors.DivergedBranches:
print "error %s non-fast forward" % ref
continue
try:
wt = branch.bzrdir.open_workingtree()
wt.update()
except bzrlib.errors.NoWorkingTree:
pass
elif ref.startswith('refs/tags/'):
# TODO: implement tag push
print "error %s pushing tags not supported" % ref
continue
else:
# transport-helper/fast-export bugs
continue
print "ok %s" % ref
print
def do_capabilities(parser):
global dirname
print "import"
print "export"
print "refspec refs/heads/*:%s/heads/*" % prefix
print "refspec refs/tags/*:%s/tags/*" % prefix
path = os.path.join(dirname, 'marks-git')
if os.path.exists(path):
print "*import-marks %s" % path
print "*export-marks %s" % path
print
def ref_is_valid(name):
return not True in [c in name for c in '~^: \\']
def do_list(parser):
global tags
master_branch = None
for name in branches:
if not master_branch:
master_branch = name
print "? refs/heads/%s" % name
branch = bzrlib.branch.Branch.open(branches[master_branch])
branch.lock_read()
for tag, revid in branch.tags.get_tag_dict().items():
try:
branch.revision_id_to_dotted_revno(revid)
except bzrlib.errors.NoSuchRevision:
continue
if not ref_is_valid(tag):
continue
print "? refs/tags/%s" % tag
tags[tag] = revid
branch.unlock()
print "@refs/heads/%s HEAD" % master_branch
print
def get_remote_branch(origin, remote_branch, name):
global dirname, peers
branch_path = os.path.join(dirname, 'clone', name)
if os.path.exists(branch_path):
# pull
d = bzrlib.bzrdir.BzrDir.open(branch_path)
branch = d.open_branch()
try:
branch.pull(remote_branch, [], None, False)
except bzrlib.errors.DivergedBranches:
# use remote branch for now
return remote_branch
else:
# clone
d = origin.sprout(branch_path, None,
hardlink=True, create_tree_if_local=False,
force_new_repo=False,
source_branch=remote_branch)
branch = d.open_branch()
return branch
def find_branches(repo, wanted):
transport = repo.bzrdir.root_transport
for fn in transport.iter_files_recursive():
if not fn.endswith('.bzr/branch-format'):
continue
name = subdir = fn[:-len('/.bzr/branch-format')]
name = name if name != '' else 'master'
name = name.replace('/', '+')
if wanted and not name in wanted:
continue
try:
cur = transport.clone(subdir)
branch = bzrlib.branch.Branch.open_from_transport(cur)
except bzrlib.errors.NotBranchError:
continue
else:
yield name, branch
def get_repo(url, alias):
global dirname, peer, branches
normal_url = bzrlib.urlutils.normalize_url(url)
origin = bzrlib.bzrdir.BzrDir.open(url)
is_local = isinstance(origin.transport, bzrlib.transport.local.LocalTransport)
shared_path = os.path.join(gitdir, 'bzr')
try:
shared_dir = bzrlib.bzrdir.BzrDir.open(shared_path)
except bzrlib.errors.NotBranchError:
shared_dir = bzrlib.bzrdir.BzrDir.create(shared_path)
try:
shared_repo = shared_dir.open_repository()
except bzrlib.errors.NoRepositoryPresent:
shared_repo = shared_dir.create_repository(shared=True)
if not is_local:
clone_path = os.path.join(dirname, 'clone')
if not os.path.exists(clone_path):
os.mkdir(clone_path)
else:
# check and remove old organization
try:
bdir = bzrlib.bzrdir.BzrDir.open(clone_path)
bdir.destroy_repository()
except bzrlib.errors.NotBranchError:
pass
except bzrlib.errors.NoRepositoryPresent:
pass
try:
repo = origin.open_repository()
if not repo.user_transport.listable():
# this repository is not usable for us
raise bzrlib.errors.NoRepositoryPresent(repo.bzrdir)
except bzrlib.errors.NoRepositoryPresent:
# branch
name = 'master'
remote_branch = origin.open_branch()
if not is_local:
peers[name] = remote_branch.base
branch = get_remote_branch(origin, remote_branch, name)
else:
branch = remote_branch
branches[name] = branch.base
return branch.repository
else:
# repository
wanted = get_config('remote-bzr.branches').rstrip().split(', ')
# stupid python
wanted = [e for e in wanted if e]
for name, remote_branch in find_branches(repo, wanted):
if not is_local:
peers[name] = remote_branch.base
branch = get_remote_branch(origin, remote_branch, name)
else:
branch = remote_branch
branches[name] = branch.base
return repo
def fix_path(alias, orig_url):
url = urlparse.urlparse(orig_url, 'file')
if url.scheme != 'file' or os.path.isabs(url.path):
return
abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
subprocess.call(cmd)
def main(args):
global marks, prefix, gitdir, dirname
global tags, filenodes
global blob_marks
global parsed_refs
global files_cache
global is_tmp
global branches, peers
alias = args[1]
url = args[2]
tags = {}
filenodes = {}
blob_marks = {}
parsed_refs = {}
files_cache = {}
marks = None
branches = {}
peers = {}
if alias[5:] == url:
is_tmp = True
alias = hashlib.sha1(alias).hexdigest()
else:
is_tmp = False
prefix = 'refs/bzr/%s' % alias
gitdir = os.environ['GIT_DIR']
dirname = os.path.join(gitdir, 'bzr', alias)
if not is_tmp:
fix_path(alias, url)
if not os.path.exists(dirname):
os.makedirs(dirname)
if hasattr(bzrlib.ui.ui_factory, 'be_quiet'):
bzrlib.ui.ui_factory.be_quiet(True)
repo = get_repo(url, alias)
marks_path = os.path.join(dirname, 'marks-int')
marks = Marks(marks_path)
parser = Parser(repo)
for line in parser:
if parser.check('capabilities'):
do_capabilities(parser)
elif parser.check('list'):
do_list(parser)
elif parser.check('import'):
do_import(parser)
elif parser.check('export'):
do_export(parser)
else:
die('unhandled command: %s' % line)
sys.stdout.flush()
def bye():
if not marks:
return
if not is_tmp:
marks.store()
else:
shutil.rmtree(dirname)
atexit.register(bye)
sys.exit(main(sys.argv))