git-commit-vandalism/contrib/remote-helpers/git-remote-hg

986 lines
25 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python
#
# Copyright (c) 2012 Felipe Contreras
#
# Inspired by Rocco Rutte's hg-fast-export
# Just copy to your ~/bin, or anywhere in your $PATH.
# Then you can clone with:
# git clone hg::/path/to/mercurial/repo/
#
# For remote repositories a local clone is stored in
# "$GIT_DIR/hg/origin/clone/.hg/".
from mercurial import hg, ui, bookmarks, context, encoding, node, error, extensions
import re
import sys
import os
import json
import shutil
import subprocess
import urllib
import atexit
import urlparse, hashlib
#
# If you want to switch to hg-git compatibility mode:
# git config --global remote-hg.hg-git-compat true
#
# If you are not in hg-git-compat mode and want to disable the tracking of
# named branches:
# git config --global remote-hg.track-branches false
#
# If you don't want to force pushes (and thus risk creating new remote heads):
# git config --global remote-hg.force-push false
#
# If you want the equivalent of hg's clone/pull--insecure option:
# git config remote-hg.insecure true
#
# git:
# Sensible defaults for git.
# hg bookmarks are exported as git branches, hg branches are prefixed
# with 'branches/', HEAD is a special case.
#
# hg:
# Emulate hg-git.
# Only hg bookmarks are exported as git branches.
# Commits are modified to preserve hg information and allow bidirectionality.
#
NAME_RE = re.compile('^([^<>]+)')
AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
EMAIL_RE = re.compile('^([^<>]+[^ \\\t<>])?\\b(?:[ \\t<>]*?)\\b([^ \\t<>]+@[^ \\t<>]+)')
AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
def die(msg, *args):
sys.stderr.write('ERROR: %s\n' % (msg % args))
sys.exit(1)
def warn(msg, *args):
sys.stderr.write('WARNING: %s\n' % (msg % args))
def gitmode(flags):
return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
def gittz(tz):
return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
def hgmode(mode):
m = { '100755': 'x', '120000': 'l' }
return m.get(mode, '')
def hghex(node):
return hg.node.hex(node)
def hgref(ref):
return ref.replace('___', ' ')
def gitref(ref):
return ref.replace(' ', '___')
def get_config(config):
cmd = ['git', 'config', '--get', config]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
output, _ = process.communicate()
return output
class Marks:
def __init__(self, path):
self.path = path
self.tips = {}
self.marks = {}
self.rev_marks = {}
self.last_mark = 0
self.load()
def load(self):
if not os.path.exists(self.path):
return
tmp = json.load(open(self.path))
self.tips = tmp['tips']
self.marks = tmp['marks']
self.last_mark = tmp['last-mark']
for rev, mark in self.marks.iteritems():
self.rev_marks[mark] = int(rev)
def dict(self):
return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
def store(self):
json.dump(self.dict(), open(self.path, 'w'))
def __str__(self):
return str(self.dict())
def from_rev(self, rev):
return self.marks[str(rev)]
def to_rev(self, mark):
return self.rev_marks[mark]
def next_mark(self):
self.last_mark += 1
return self.last_mark
def get_mark(self, rev):
self.last_mark += 1
self.marks[str(rev)] = self.last_mark
return self.last_mark
def new_mark(self, rev, mark):
self.marks[str(rev)] = mark
self.rev_marks[mark] = rev
self.last_mark = mark
def is_marked(self, rev):
return str(rev) in self.marks
def get_tip(self, branch):
return self.tips.get(branch, 0)
def set_tip(self, branch, tip):
self.tips[branch] = tip
class Parser:
def __init__(self, repo):
self.repo = repo
self.line = self.get_line()
def get_line(self):
return sys.stdin.readline().strip()
def __getitem__(self, i):
return self.line.split()[i]
def check(self, word):
return self.line.startswith(word)
def each_block(self, separator):
while self.line != separator:
yield self.line
self.line = self.get_line()
def __iter__(self):
return self.each_block('')
def next(self):
self.line = self.get_line()
if self.line == 'done':
self.line = None
def get_mark(self):
i = self.line.index(':') + 1
return int(self.line[i:])
def get_data(self):
if not self.check('data'):
return None
i = self.line.index(' ') + 1
size = int(self.line[i:])
return sys.stdin.read(size)
def get_author(self):
global bad_mail
ex = None
m = RAW_AUTHOR_RE.match(self.line)
if not m:
return None
_, name, email, date, tz = m.groups()
if name and 'ext:' in name:
m = re.match('^(.+?) ext:\((.+)\)$', name)
if m:
name = m.group(1)
ex = urllib.unquote(m.group(2))
if email != bad_mail:
if name:
user = '%s <%s>' % (name, email)
else:
user = '<%s>' % (email)
else:
user = name
if ex:
user += ex
tz = int(tz)
tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
return (user, int(date), -tz)
def fix_file_path(path):
if not os.path.isabs(path):
return path
return os.path.relpath(path, '/')
def export_files(files):
global marks, filenodes
final = []
for f in files:
fid = node.hex(f.filenode())
if fid in filenodes:
mark = filenodes[fid]
else:
mark = marks.next_mark()
filenodes[fid] = mark
d = f.data()
print "blob"
print "mark :%u" % mark
print "data %d" % len(d)
print d
path = fix_file_path(f.path())
final.append((gitmode(f.flags()), mark, path))
return final
def get_filechanges(repo, ctx, parent):
modified = set()
added = set()
removed = set()
# load earliest manifest first for caching reasons
prev = repo[parent].manifest().copy()
cur = ctx.manifest()
for fn in cur:
if fn in prev:
if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
modified.add(fn)
del prev[fn]
else:
added.add(fn)
removed |= set(prev.keys())
return added | modified, removed
def fixup_user_git(user):
name = mail = None
user = user.replace('"', '')
m = AUTHOR_RE.match(user)
if m:
name = m.group(1)
mail = m.group(2).strip()
else:
m = EMAIL_RE.match(user)
if m:
name = m.group(1)
mail = m.group(2)
else:
m = NAME_RE.match(user)
if m:
name = m.group(1).strip()
return (name, mail)
def fixup_user_hg(user):
def sanitize(name):
# stole this from hg-git
return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
m = AUTHOR_HG_RE.match(user)
if m:
name = sanitize(m.group(1))
mail = sanitize(m.group(2))
ex = m.group(3)
if ex:
name += ' ext:(' + urllib.quote(ex) + ')'
else:
name = sanitize(user)
if '@' in user:
mail = name
else:
mail = None
return (name, mail)
def fixup_user(user):
global mode, bad_mail
if mode == 'git':
name, mail = fixup_user_git(user)
else:
name, mail = fixup_user_hg(user)
if not name:
name = bad_name
if not mail:
mail = bad_mail
return '%s <%s>' % (name, mail)
def get_repo(url, alias):
global dirname, peer
myui = ui.ui()
myui.setconfig('ui', 'interactive', 'off')
myui.fout = sys.stderr
try:
if get_config('remote-hg.insecure') == 'true\n':
myui.setconfig('web', 'cacerts', '')
except subprocess.CalledProcessError:
pass
try:
mod = extensions.load(myui, 'hgext.schemes', None)
mod.extsetup(myui)
except ImportError:
pass
if hg.islocal(url):
repo = hg.repository(myui, url)
else:
local_path = os.path.join(dirname, 'clone')
if not os.path.exists(local_path):
try:
peer, dstpeer = hg.clone(myui, {}, url, local_path, update=True, pull=True)
except:
die('Repository error')
repo = dstpeer.local()
else:
repo = hg.repository(myui, local_path)
try:
peer = hg.peer(myui, {}, url)
except:
die('Repository error')
repo.pull(peer, heads=None, force=True)
return repo
def rev_to_mark(rev):
global marks
return marks.from_rev(rev)
def mark_to_rev(mark):
global marks
return marks.to_rev(mark)
def export_ref(repo, name, kind, head):
global prefix, marks, mode
ename = '%s/%s' % (kind, name)
tip = marks.get_tip(ename)
revs = xrange(tip, head.rev() + 1)
count = 0
revs = [rev for rev in revs if not marks.is_marked(rev)]
for rev in revs:
c = repo[rev]
(manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
rev_branch = extra['branch']
author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
if 'committer' in extra:
user, time, tz = extra['committer'].rsplit(' ', 2)
committer = "%s %s %s" % (user, time, gittz(int(tz)))
else:
committer = author
parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
if len(parents) == 0:
modified = c.manifest().keys()
removed = []
else:
modified, removed = get_filechanges(repo, c, parents[0])
desc += '\n'
if mode == 'hg':
extra_msg = ''
if rev_branch != 'default':
extra_msg += 'branch : %s\n' % rev_branch
renames = []
for f in c.files():
if f not in c.manifest():
continue
rename = c.filectx(f).renamed()
if rename:
renames.append((rename[0], f))
for e in renames:
extra_msg += "rename : %s => %s\n" % e
for key, value in extra.iteritems():
if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
continue
else:
extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
if extra_msg:
desc += '\n--HG--\n' + extra_msg
if len(parents) == 0 and rev:
print 'reset %s/%s' % (prefix, ename)
modified_final = export_files(c.filectx(f) for f in modified)
print "commit %s/%s" % (prefix, ename)
print "mark :%d" % (marks.get_mark(rev))
print "author %s" % (author)
print "committer %s" % (committer)
print "data %d" % (len(desc))
print desc
if len(parents) > 0:
print "from :%s" % (rev_to_mark(parents[0]))
if len(parents) > 1:
print "merge :%s" % (rev_to_mark(parents[1]))
for f in modified_final:
print "M %s :%u %s" % f
for f in removed:
print "D %s" % (fix_file_path(f))
print
count += 1
if (count % 100 == 0):
print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
print "#############################################################"
# make sure the ref is updated
print "reset %s/%s" % (prefix, ename)
print "from :%u" % rev_to_mark(rev)
print
marks.set_tip(ename, rev)
def export_tag(repo, tag):
export_ref(repo, tag, 'tags', repo[hgref(tag)])
def export_bookmark(repo, bmark):
head = bmarks[hgref(bmark)]
export_ref(repo, bmark, 'bookmarks', head)
def export_branch(repo, branch):
tip = get_branch_tip(repo, branch)
head = repo[tip]
export_ref(repo, branch, 'branches', head)
def export_head(repo):
global g_head
export_ref(repo, g_head[0], 'bookmarks', g_head[1])
def do_capabilities(parser):
global prefix, dirname
print "import"
print "export"
print "refspec refs/heads/branches/*:%s/branches/*" % prefix
print "refspec refs/heads/*:%s/bookmarks/*" % prefix
print "refspec refs/tags/*:%s/tags/*" % prefix
path = os.path.join(dirname, 'marks-git')
if os.path.exists(path):
print "*import-marks %s" % path
print "*export-marks %s" % path
print
def branch_tip(repo, branch):
# older versions of mercurial don't have this
if hasattr(repo, 'branchtip'):
return repo.branchtip(branch)
else:
return repo.branchtags()[branch]
def get_branch_tip(repo, branch):
global branches
heads = branches.get(hgref(branch), None)
if not heads:
return None
# verify there's only one head
if (len(heads) > 1):
warn("Branch '%s' has more than one head, consider merging" % branch)
return branch_tip(repo, hgref(branch))
return heads[0]
def list_head(repo, cur):
global g_head, bmarks
head = bookmarks.readcurrent(repo)
if head:
node = repo[head]
else:
# fake bookmark from current branch
head = cur
node = repo['.']
if not node:
node = repo['tip']
if not node:
return
if head == 'default':
head = 'master'
bmarks[head] = node
head = gitref(head)
print "@refs/heads/%s HEAD" % head
g_head = (head, node)
def do_list(parser):
global branches, bmarks, mode, track_branches
repo = parser.repo
for bmark, node in bookmarks.listbookmarks(repo).iteritems():
bmarks[bmark] = repo[node]
cur = repo.dirstate.branch()
list_head(repo, cur)
if track_branches:
for branch in repo.branchmap():
heads = repo.branchheads(branch)
if len(heads):
branches[branch] = heads
for branch in branches:
print "? refs/heads/branches/%s" % gitref(branch)
for bmark in bmarks:
print "? refs/heads/%s" % gitref(bmark)
for tag, node in repo.tagslist():
if tag == 'tip':
continue
print "? refs/tags/%s" % gitref(tag)
print
def do_import(parser):
repo = parser.repo
path = os.path.join(dirname, 'marks-git')
print "feature done"
if os.path.exists(path):
print "feature import-marks=%s" % path
print "feature export-marks=%s" % path
sys.stdout.flush()
tmp = encoding.encoding
encoding.encoding = 'utf-8'
# lets get all the import lines
while parser.check('import'):
ref = parser[1]
if (ref == 'HEAD'):
export_head(repo)
elif ref.startswith('refs/heads/branches/'):
branch = ref[len('refs/heads/branches/'):]
export_branch(repo, branch)
elif ref.startswith('refs/heads/'):
bmark = ref[len('refs/heads/'):]
export_bookmark(repo, bmark)
elif ref.startswith('refs/tags/'):
tag = ref[len('refs/tags/'):]
export_tag(repo, tag)
parser.next()
encoding.encoding = tmp
print 'done'
def parse_blob(parser):
global blob_marks
parser.next()
mark = parser.get_mark()
parser.next()
data = parser.get_data()
blob_marks[mark] = data
parser.next()
def get_merge_files(repo, p1, p2, files):
for e in repo[p1].files():
if e not in files:
if e not in repo[p1].manifest():
continue
f = { 'ctx' : repo[p1][e] }
files[e] = f
def parse_commit(parser):
global marks, blob_marks, parsed_refs
global mode
from_mark = merge_mark = None
ref = parser[1]
parser.next()
commit_mark = parser.get_mark()
parser.next()
author = parser.get_author()
parser.next()
committer = parser.get_author()
parser.next()
data = parser.get_data()
parser.next()
if parser.check('from'):
from_mark = parser.get_mark()
parser.next()
if parser.check('merge'):
merge_mark = parser.get_mark()
parser.next()
if parser.check('merge'):
die('octopus merges are not supported yet')
# fast-export adds an extra newline
if data[-1] == '\n':
data = data[:-1]
files = {}
for line in parser:
if parser.check('M'):
t, m, mark_ref, path = line.split(' ', 3)
mark = int(mark_ref[1:])
f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
elif parser.check('D'):
t, path = line.split(' ', 1)
f = { 'deleted' : True }
else:
die('Unknown file command: %s' % line)
files[path] = f
def getfilectx(repo, memctx, f):
of = files[f]
if 'deleted' in of:
raise IOError
if 'ctx' in of:
return of['ctx']
is_exec = of['mode'] == 'x'
is_link = of['mode'] == 'l'
rename = of.get('rename', None)
return context.memfilectx(f, of['data'],
is_link, is_exec, rename)
repo = parser.repo
user, date, tz = author
extra = {}
if committer != author:
extra['committer'] = "%s %u %u" % committer
if from_mark:
p1 = repo.changelog.node(mark_to_rev(from_mark))
else:
p1 = '\0' * 20
if merge_mark:
p2 = repo.changelog.node(mark_to_rev(merge_mark))
else:
p2 = '\0' * 20
#
# If files changed from any of the parents, hg wants to know, but in git if
# nothing changed from the first parent, nothing changed.
#
if merge_mark:
get_merge_files(repo, p1, p2, files)
# Check if the ref is supposed to be a named branch
if ref.startswith('refs/heads/branches/'):
branch = ref[len('refs/heads/branches/'):]
extra['branch'] = hgref(branch)
if mode == 'hg':
i = data.find('\n--HG--\n')
if i >= 0:
tmp = data[i + len('\n--HG--\n'):].strip()
for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
if k == 'rename':
old, new = v.split(' => ', 1)
files[new]['rename'] = old
elif k == 'branch':
extra[k] = v
elif k == 'extra':
ek, ev = v.split(' : ', 1)
extra[ek] = urllib.unquote(ev)
data = data[:i]
ctx = context.memctx(repo, (p1, p2), data,
files.keys(), getfilectx,
user, (date, tz), extra)
tmp = encoding.encoding
encoding.encoding = 'utf-8'
node = repo.commitctx(ctx)
encoding.encoding = tmp
rev = repo[node].rev()
parsed_refs[ref] = node
marks.new_mark(rev, commit_mark)
def parse_reset(parser):
global parsed_refs
ref = parser[1]
parser.next()
# ugh
if parser.check('commit'):
parse_commit(parser)
return
if not parser.check('from'):
return
from_mark = parser.get_mark()
parser.next()
node = parser.repo.changelog.node(mark_to_rev(from_mark))
parsed_refs[ref] = node
def parse_tag(parser):
name = parser[1]
parser.next()
from_mark = parser.get_mark()
parser.next()
tagger = parser.get_author()
parser.next()
data = parser.get_data()
parser.next()
parsed_tags[name] = (tagger, data)
def write_tag(repo, tag, node, msg, author):
branch = repo[node].branch()
tip = branch_tip(repo, branch)
tip = repo[tip]
def getfilectx(repo, memctx, f):
try:
fctx = tip.filectx(f)
data = fctx.data()
except error.ManifestLookupError:
data = ""
content = data + "%s %s\n" % (hghex(node), tag)
return context.memfilectx(f, content, False, False, None)
p1 = tip.hex()
p2 = '\0' * 20
if not author:
author = (None, 0, 0)
user, date, tz = author
ctx = context.memctx(repo, (p1, p2), msg,
['.hgtags'], getfilectx,
user, (date, tz), {'branch' : branch})
tmp = encoding.encoding
encoding.encoding = 'utf-8'
tagnode = repo.commitctx(ctx)
encoding.encoding = tmp
return tagnode
def do_export(parser):
global parsed_refs, bmarks, peer
p_bmarks = []
parser.next()
for line in parser.each_block('done'):
if parser.check('blob'):
parse_blob(parser)
elif parser.check('commit'):
parse_commit(parser)
elif parser.check('reset'):
parse_reset(parser)
elif parser.check('tag'):
parse_tag(parser)
elif parser.check('feature'):
pass
else:
die('unhandled export command: %s' % line)
for ref, node in parsed_refs.iteritems():
if ref.startswith('refs/heads/branches'):
branch = ref[len('refs/heads/branches/'):]
if branch in branches and node in branches[branch]:
# up to date
continue
print "ok %s" % ref
elif ref.startswith('refs/heads/'):
bmark = ref[len('refs/heads/'):]
p_bmarks.append((bmark, node))
continue
elif ref.startswith('refs/tags/'):
tag = ref[len('refs/tags/'):]
tag = hgref(tag)
author, msg = parsed_tags.get(tag, (None, None))
if mode == 'git':
if not msg:
msg = 'Added tag %s for changeset %s' % (tag, hghex(node[:6]));
write_tag(parser.repo, tag, node, msg, author)
else:
fp = parser.repo.opener('localtags', 'a')
fp.write('%s %s\n' % (hghex(node), tag))
fp.close()
print "ok %s" % ref
else:
# transport-helper/fast-export bugs
continue
if peer:
parser.repo.push(peer, force=force_push)
# handle bookmarks
for bmark, node in p_bmarks:
ref = 'refs/heads/' + bmark
new = hghex(node)
if bmark in bmarks:
old = bmarks[bmark].hex()
else:
old = ''
if old == new:
continue
if bmark == 'master' and 'master' not in parser.repo._bookmarks:
# fake bookmark
pass
elif bookmarks.pushbookmark(parser.repo, bmark, old, new):
# updated locally
pass
else:
print "error %s" % ref
continue
if peer:
rb = peer.listkeys('bookmarks')
old = rb.get(bmark, '')
if not peer.pushkey('bookmarks', bmark, old, new):
print "error %s" % ref
continue
print "ok %s" % ref
print
def fix_path(alias, repo, orig_url):
url = urlparse.urlparse(orig_url, 'file')
if url.scheme != 'file' or os.path.isabs(url.path):
return
abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % abs_url]
subprocess.call(cmd)
def main(args):
global prefix, dirname, branches, bmarks
global marks, blob_marks, parsed_refs
global peer, mode, bad_mail, bad_name
global track_branches, force_push, is_tmp
global parsed_tags
global filenodes
alias = args[1]
url = args[2]
peer = None
hg_git_compat = False
track_branches = True
force_push = True
try:
if get_config('remote-hg.hg-git-compat') == 'true\n':
hg_git_compat = True
track_branches = False
if get_config('remote-hg.track-branches') == 'false\n':
track_branches = False
if get_config('remote-hg.force-push') == 'false\n':
force_push = False
except subprocess.CalledProcessError:
pass
if hg_git_compat:
mode = 'hg'
bad_mail = 'none@none'
bad_name = ''
else:
mode = 'git'
bad_mail = 'unknown'
bad_name = 'Unknown'
if alias[4:] == url:
is_tmp = True
alias = hashlib.sha1(alias).hexdigest()
else:
is_tmp = False
gitdir = os.environ['GIT_DIR']
dirname = os.path.join(gitdir, 'hg', alias)
branches = {}
bmarks = {}
blob_marks = {}
parsed_refs = {}
marks = None
parsed_tags = {}
filenodes = {}
repo = get_repo(url, alias)
prefix = 'refs/hg/%s' % alias
if not is_tmp:
fix_path(alias, peer or repo, url)
if not os.path.exists(dirname):
os.makedirs(dirname)
marks_path = os.path.join(dirname, 'marks-hg')
marks = Marks(marks_path)
parser = Parser(repo)
for line in parser:
if parser.check('capabilities'):
do_capabilities(parser)
elif parser.check('list'):
do_list(parser)
elif parser.check('import'):
do_import(parser)
elif parser.check('export'):
do_export(parser)
else:
die('unhandled command: %s' % line)
sys.stdout.flush()
def bye():
if not marks:
return
if not is_tmp:
marks.store()
else:
shutil.rmtree(dirname)
atexit.register(bye)
sys.exit(main(sys.argv))