git-commit-vandalism/contrib/remote-helpers/git-remote-hg
Felipe Contreras 25027b983e remote-hg: redirect buggy mercurial output
Mercurial emits messages like "searching for changes", "no changes
found", etc. meant for the use of its own UI layer, which break the pipe
between transport helper and remote helper.

Since there's no way to silence Mercurial, let's redirect to standard
error.

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-11 10:46:46 -07:00

819 lines
21 KiB
Python
Executable File

#!/usr/bin/env python
#
# Copyright (c) 2012 Felipe Contreras
#
# Inspired by Rocco Rutte's hg-fast-export
# Just copy to your ~/bin, or anywhere in your $PATH.
# Then you can clone with:
# git clone hg::/path/to/mercurial/repo/
from mercurial import hg, ui, bookmarks, context, util, encoding
import re
import sys
import os
import json
import shutil
import subprocess
import urllib
#
# If you want to switch to hg-git compatibility mode:
# git config --global remote-hg.hg-git-compat true
#
# If you are not in hg-git-compat mode and want to disable the tracking of
# named branches:
# git config --global remote-hg.track-branches false
#
# git:
# Sensible defaults for git.
# hg bookmarks are exported as git branches, hg branches are prefixed
# with 'branches/', HEAD is a special case.
#
# hg:
# Emulate hg-git.
# Only hg bookmarks are exported as git branches.
# Commits are modified to preserve hg information and allow bidirectionality.
#
NAME_RE = re.compile('^([^<>]+)')
AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
def die(msg, *args):
sys.stderr.write('ERROR: %s\n' % (msg % args))
sys.exit(1)
def warn(msg, *args):
sys.stderr.write('WARNING: %s\n' % (msg % args))
def gitmode(flags):
return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
def gittz(tz):
return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
def hgmode(mode):
m = { '100755': 'x', '120000': 'l' }
return m.get(mode, '')
def get_config(config):
cmd = ['git', 'config', '--get', config]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
output, _ = process.communicate()
return output
class Marks:
def __init__(self, path):
self.path = path
self.tips = {}
self.marks = {}
self.rev_marks = {}
self.last_mark = 0
self.load()
def load(self):
if not os.path.exists(self.path):
return
tmp = json.load(open(self.path))
self.tips = tmp['tips']
self.marks = tmp['marks']
self.last_mark = tmp['last-mark']
for rev, mark in self.marks.iteritems():
self.rev_marks[mark] = int(rev)
def dict(self):
return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
def store(self):
json.dump(self.dict(), open(self.path, 'w'))
def __str__(self):
return str(self.dict())
def from_rev(self, rev):
return self.marks[str(rev)]
def to_rev(self, mark):
return self.rev_marks[mark]
def get_mark(self, rev):
self.last_mark += 1
self.marks[str(rev)] = self.last_mark
return self.last_mark
def new_mark(self, rev, mark):
self.marks[str(rev)] = mark
self.rev_marks[mark] = rev
self.last_mark = mark
def is_marked(self, rev):
return self.marks.has_key(str(rev))
def get_tip(self, branch):
return self.tips.get(branch, 0)
def set_tip(self, branch, tip):
self.tips[branch] = tip
class Parser:
def __init__(self, repo):
self.repo = repo
self.line = self.get_line()
def get_line(self):
return sys.stdin.readline().strip()
def __getitem__(self, i):
return self.line.split()[i]
def check(self, word):
return self.line.startswith(word)
def each_block(self, separator):
while self.line != separator:
yield self.line
self.line = self.get_line()
def __iter__(self):
return self.each_block('')
def next(self):
self.line = self.get_line()
if self.line == 'done':
self.line = None
def get_mark(self):
i = self.line.index(':') + 1
return int(self.line[i:])
def get_data(self):
if not self.check('data'):
return None
i = self.line.index(' ') + 1
size = int(self.line[i:])
return sys.stdin.read(size)
def get_author(self):
global bad_mail
ex = None
m = RAW_AUTHOR_RE.match(self.line)
if not m:
return None
_, name, email, date, tz = m.groups()
if name and 'ext:' in name:
m = re.match('^(.+?) ext:\((.+)\)$', name)
if m:
name = m.group(1)
ex = urllib.unquote(m.group(2))
if email != bad_mail:
if name:
user = '%s <%s>' % (name, email)
else:
user = '<%s>' % (email)
else:
user = name
if ex:
user += ex
tz = int(tz)
tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
return (user, int(date), -tz)
def export_file(fc):
d = fc.data()
print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
print "data %d" % len(d)
print d
def get_filechanges(repo, ctx, parent):
modified = set()
added = set()
removed = set()
cur = ctx.manifest()
prev = repo[parent].manifest().copy()
for fn in cur:
if fn in prev:
if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
modified.add(fn)
del prev[fn]
else:
added.add(fn)
removed |= set(prev.keys())
return added | modified, removed
def fixup_user_git(user):
name = mail = None
user = user.replace('"', '')
m = AUTHOR_RE.match(user)
if m:
name = m.group(1)
mail = m.group(2).strip()
else:
m = NAME_RE.match(user)
if m:
name = m.group(1).strip()
return (name, mail)
def fixup_user_hg(user):
def sanitize(name):
# stole this from hg-git
return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
m = AUTHOR_HG_RE.match(user)
if m:
name = sanitize(m.group(1))
mail = sanitize(m.group(2))
ex = m.group(3)
if ex:
name += ' ext:(' + urllib.quote(ex) + ')'
else:
name = sanitize(user)
if '@' in user:
mail = name
else:
mail = None
return (name, mail)
def fixup_user(user):
global mode, bad_mail
if mode == 'git':
name, mail = fixup_user_git(user)
else:
name, mail = fixup_user_hg(user)
if not name:
name = bad_name
if not mail:
mail = bad_mail
return '%s <%s>' % (name, mail)
def get_repo(url, alias):
global dirname, peer
myui = ui.ui()
myui.setconfig('ui', 'interactive', 'off')
myui.fout = sys.stderr
if hg.islocal(url):
repo = hg.repository(myui, url)
else:
local_path = os.path.join(dirname, 'clone')
if not os.path.exists(local_path):
peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
repo = dstpeer.local()
else:
repo = hg.repository(myui, local_path)
peer = hg.peer(myui, {}, url)
repo.pull(peer, heads=None, force=True)
return repo
def rev_to_mark(rev):
global marks
return marks.from_rev(rev)
def mark_to_rev(mark):
global marks
return marks.to_rev(mark)
def export_ref(repo, name, kind, head):
global prefix, marks, mode
ename = '%s/%s' % (kind, name)
tip = marks.get_tip(ename)
# mercurial takes too much time checking this
if tip and tip == head.rev():
# nothing to do
return
revs = xrange(tip, head.rev() + 1)
count = 0
revs = [rev for rev in revs if not marks.is_marked(rev)]
for rev in revs:
c = repo[rev]
(manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
rev_branch = extra['branch']
author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
if 'committer' in extra:
user, time, tz = extra['committer'].rsplit(' ', 2)
committer = "%s %s %s" % (user, time, gittz(int(tz)))
else:
committer = author
parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
if len(parents) == 0:
modified = c.manifest().keys()
removed = []
else:
modified, removed = get_filechanges(repo, c, parents[0])
if mode == 'hg':
extra_msg = ''
if rev_branch != 'default':
extra_msg += 'branch : %s\n' % rev_branch
renames = []
for f in c.files():
if f not in c.manifest():
continue
rename = c.filectx(f).renamed()
if rename:
renames.append((rename[0], f))
for e in renames:
extra_msg += "rename : %s => %s\n" % e
for key, value in extra.iteritems():
if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
continue
else:
extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
desc += '\n'
if extra_msg:
desc += '\n--HG--\n' + extra_msg
if len(parents) == 0 and rev:
print 'reset %s/%s' % (prefix, ename)
print "commit %s/%s" % (prefix, ename)
print "mark :%d" % (marks.get_mark(rev))
print "author %s" % (author)
print "committer %s" % (committer)
print "data %d" % (len(desc))
print desc
if len(parents) > 0:
print "from :%s" % (rev_to_mark(parents[0]))
if len(parents) > 1:
print "merge :%s" % (rev_to_mark(parents[1]))
for f in modified:
export_file(c.filectx(f))
for f in removed:
print "D %s" % (f)
print
count += 1
if (count % 100 == 0):
print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
print "#############################################################"
# make sure the ref is updated
print "reset %s/%s" % (prefix, ename)
print "from :%u" % rev_to_mark(rev)
print
marks.set_tip(ename, rev)
def export_tag(repo, tag):
export_ref(repo, tag, 'tags', repo[tag])
def export_bookmark(repo, bmark):
head = bmarks[bmark]
export_ref(repo, bmark, 'bookmarks', head)
def export_branch(repo, branch):
tip = get_branch_tip(repo, branch)
head = repo[tip]
export_ref(repo, branch, 'branches', head)
def export_head(repo):
global g_head
export_ref(repo, g_head[0], 'bookmarks', g_head[1])
def do_capabilities(parser):
global prefix, dirname
print "import"
print "export"
print "refspec refs/heads/branches/*:%s/branches/*" % prefix
print "refspec refs/heads/*:%s/bookmarks/*" % prefix
print "refspec refs/tags/*:%s/tags/*" % prefix
path = os.path.join(dirname, 'marks-git')
if os.path.exists(path):
print "*import-marks %s" % path
print "*export-marks %s" % path
print
def get_branch_tip(repo, branch):
global branches
heads = branches.get(branch, None)
if not heads:
return None
# verify there's only one head
if (len(heads) > 1):
warn("Branch '%s' has more than one head, consider merging" % branch)
# older versions of mercurial don't have this
if hasattr(repo, "branchtip"):
return repo.branchtip(branch)
return heads[0]
def list_head(repo, cur):
global g_head, bmarks
head = bookmarks.readcurrent(repo)
if head:
node = repo[head]
else:
# fake bookmark from current branch
head = cur
node = repo['.']
if not node:
node = repo['tip']
if not node:
return
if head == 'default':
head = 'master'
bmarks[head] = node
print "@refs/heads/%s HEAD" % head
g_head = (head, node)
def do_list(parser):
global branches, bmarks, mode, track_branches
repo = parser.repo
for bmark, node in bookmarks.listbookmarks(repo).iteritems():
bmarks[bmark] = repo[node]
cur = repo.dirstate.branch()
list_head(repo, cur)
if track_branches:
for branch in repo.branchmap():
heads = repo.branchheads(branch)
if len(heads):
branches[branch] = heads
for branch in branches:
print "? refs/heads/branches/%s" % branch
for bmark in bmarks:
print "? refs/heads/%s" % bmark
for tag, node in repo.tagslist():
if tag == 'tip':
continue
print "? refs/tags/%s" % tag
print
def do_import(parser):
repo = parser.repo
path = os.path.join(dirname, 'marks-git')
print "feature done"
if os.path.exists(path):
print "feature import-marks=%s" % path
print "feature export-marks=%s" % path
sys.stdout.flush()
tmp = encoding.encoding
encoding.encoding = 'utf-8'
# lets get all the import lines
while parser.check('import'):
ref = parser[1]
if (ref == 'HEAD'):
export_head(repo)
elif ref.startswith('refs/heads/branches/'):
branch = ref[len('refs/heads/branches/'):]
export_branch(repo, branch)
elif ref.startswith('refs/heads/'):
bmark = ref[len('refs/heads/'):]
export_bookmark(repo, bmark)
elif ref.startswith('refs/tags/'):
tag = ref[len('refs/tags/'):]
export_tag(repo, tag)
parser.next()
encoding.encoding = tmp
print 'done'
def parse_blob(parser):
global blob_marks
parser.next()
mark = parser.get_mark()
parser.next()
data = parser.get_data()
blob_marks[mark] = data
parser.next()
def get_merge_files(repo, p1, p2, files):
for e in repo[p1].files():
if e not in files:
if e not in repo[p1].manifest():
continue
f = { 'ctx' : repo[p1][e] }
files[e] = f
def parse_commit(parser):
global marks, blob_marks, parsed_refs
global mode
from_mark = merge_mark = None
ref = parser[1]
parser.next()
commit_mark = parser.get_mark()
parser.next()
author = parser.get_author()
parser.next()
committer = parser.get_author()
parser.next()
data = parser.get_data()
parser.next()
if parser.check('from'):
from_mark = parser.get_mark()
parser.next()
if parser.check('merge'):
merge_mark = parser.get_mark()
parser.next()
if parser.check('merge'):
die('octopus merges are not supported yet')
files = {}
for line in parser:
if parser.check('M'):
t, m, mark_ref, path = line.split(' ', 3)
mark = int(mark_ref[1:])
f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
elif parser.check('D'):
t, path = line.split(' ', 1)
f = { 'deleted' : True }
else:
die('Unknown file command: %s' % line)
files[path] = f
def getfilectx(repo, memctx, f):
of = files[f]
if 'deleted' in of:
raise IOError
if 'ctx' in of:
return of['ctx']
is_exec = of['mode'] == 'x'
is_link = of['mode'] == 'l'
rename = of.get('rename', None)
return context.memfilectx(f, of['data'],
is_link, is_exec, rename)
repo = parser.repo
user, date, tz = author
extra = {}
if committer != author:
extra['committer'] = "%s %u %u" % committer
if from_mark:
p1 = repo.changelog.node(mark_to_rev(from_mark))
else:
p1 = '\0' * 20
if merge_mark:
p2 = repo.changelog.node(mark_to_rev(merge_mark))
else:
p2 = '\0' * 20
#
# If files changed from any of the parents, hg wants to know, but in git if
# nothing changed from the first parent, nothing changed.
#
if merge_mark:
get_merge_files(repo, p1, p2, files)
if mode == 'hg':
i = data.find('\n--HG--\n')
if i >= 0:
tmp = data[i + len('\n--HG--\n'):].strip()
for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
if k == 'rename':
old, new = v.split(' => ', 1)
files[new]['rename'] = old
elif k == 'branch':
extra[k] = v
elif k == 'extra':
ek, ev = v.split(' : ', 1)
extra[ek] = urllib.unquote(ev)
data = data[:i]
ctx = context.memctx(repo, (p1, p2), data,
files.keys(), getfilectx,
user, (date, tz), extra)
tmp = encoding.encoding
encoding.encoding = 'utf-8'
node = repo.commitctx(ctx)
encoding.encoding = tmp
rev = repo[node].rev()
parsed_refs[ref] = node
marks.new_mark(rev, commit_mark)
def parse_reset(parser):
global parsed_refs
ref = parser[1]
parser.next()
# ugh
if parser.check('commit'):
parse_commit(parser)
return
if not parser.check('from'):
return
from_mark = parser.get_mark()
parser.next()
node = parser.repo.changelog.node(mark_to_rev(from_mark))
parsed_refs[ref] = node
def parse_tag(parser):
name = parser[1]
parser.next()
from_mark = parser.get_mark()
parser.next()
tagger = parser.get_author()
parser.next()
data = parser.get_data()
parser.next()
# nothing to do
def do_export(parser):
global parsed_refs, bmarks, peer
parser.next()
for line in parser.each_block('done'):
if parser.check('blob'):
parse_blob(parser)
elif parser.check('commit'):
parse_commit(parser)
elif parser.check('reset'):
parse_reset(parser)
elif parser.check('tag'):
parse_tag(parser)
elif parser.check('feature'):
pass
else:
die('unhandled export command: %s' % line)
for ref, node in parsed_refs.iteritems():
if ref.startswith('refs/heads/branches'):
pass
elif ref.startswith('refs/heads/'):
bmark = ref[len('refs/heads/'):]
if bmark in bmarks:
old = bmarks[bmark].hex()
else:
old = ''
if bmark == 'master' and 'master' not in parser.repo._bookmarks:
# fake bookmark
print "ok %s" % ref
continue
if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
print "error %s" % ref
continue
elif ref.startswith('refs/tags/'):
tag = ref[len('refs/tags/'):]
parser.repo.tag([tag], node, None, True, None, {})
else:
# transport-helper/fast-export bugs
continue
print "ok %s" % ref
if peer:
parser.repo.push(peer, force=False)
print
def fix_path(alias, repo, orig_url):
repo_url = util.url(repo.url())
url = util.url(orig_url)
if str(url) == str(repo_url):
return
cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % repo_url]
subprocess.call(cmd)
def main(args):
global prefix, dirname, branches, bmarks
global marks, blob_marks, parsed_refs
global peer, mode, bad_mail, bad_name
global track_branches
alias = args[1]
url = args[2]
peer = None
hg_git_compat = False
track_branches = True
try:
if get_config('remote-hg.hg-git-compat') == 'true\n':
hg_git_compat = True
track_branches = False
if get_config('remote-hg.track-branches') == 'false\n':
track_branches = False
except subprocess.CalledProcessError:
pass
if hg_git_compat:
mode = 'hg'
bad_mail = 'none@none'
bad_name = ''
else:
mode = 'git'
bad_mail = 'unknown'
bad_name = 'Unknown'
if alias[4:] == url:
is_tmp = True
alias = util.sha1(alias).hexdigest()
else:
is_tmp = False
gitdir = os.environ['GIT_DIR']
dirname = os.path.join(gitdir, 'hg', alias)
branches = {}
bmarks = {}
blob_marks = {}
parsed_refs = {}
repo = get_repo(url, alias)
prefix = 'refs/hg/%s' % alias
if not is_tmp:
fix_path(alias, peer or repo, url)
if not os.path.exists(dirname):
os.makedirs(dirname)
marks_path = os.path.join(dirname, 'marks-hg')
marks = Marks(marks_path)
parser = Parser(repo)
for line in parser:
if parser.check('capabilities'):
do_capabilities(parser)
elif parser.check('list'):
do_list(parser)
elif parser.check('import'):
do_import(parser)
elif parser.check('export'):
do_export(parser)
else:
die('unhandled command: %s' % line)
sys.stdout.flush()
if not is_tmp:
marks.store()
else:
shutil.rmtree(dirname)
sys.exit(main(sys.argv))