Skip to content
This repository has been archived by the owner on Sep 25, 2020. It is now read-only.

Commit

Permalink
first commit: basic wiki conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
trentm committed Feb 6, 2011
0 parents commit f51c8b2
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 0 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
A small project with some helper scripts for moving a project of yours on
[Google Code project hosting](http://code.google.com/hosting/) to
[Github](https://github.com/).

More details to come.

143 changes: 143 additions & 0 deletions wikiconvert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env python

"""
Usage:
python googlecode2github/wikiconfig.py PROJID SRCDIR DSTDIR
where "PROJID" is the github project id, e.g. "trentm/python-markdown2",
"SRCDIR" is a Google Code project wiki Subversion working copy dir and
"DSTDIR" is the git clone dir of the git project's wiki.
"""

__version__ = "1.0.0"

import re
import sys
from os.path import *
from glob import glob
from pprint import pprint
import codecs
from hashlib import md5


def log(s):
sys.stderr.write(s+"\n")

def convert_dir(proj_id, src_dir, dst_dir):
if isfile(src_dir):
convert_file(proj_id, src_dir, dst_dir)
else:
for f in glob(join(src_dir, "*.wiki")):
convert_file(proj_id, f, dst_dir)

def convert_file(proj_id, src_path, dst_dir):
src = codecs.open(src_path, 'r', 'utf-8').read()
meta_lines = []
body_lines = []
lines = src.splitlines(False)
for i, line in enumerate(lines):
if line.startswith("#"):
meta_lines.append(line)
else:
assert not line.strip(), "line isn't empty: %r" % line
body_lines = lines[i+1:]
break
meta = {}
for line in meta_lines:
k,v = line[1:].split(None, 1)
meta[k] = v
text = '\n'.join(body_lines)
s_from_hash = {}

# Pull out pre-blocks.
def sub_pre_block(match):
pre = match.group(1)
hash = md5(pre).hexdigest()
s_from_hash[hash] = _indent(pre)
return hash
text = re.compile(r'^{{{\n(.*?)^}}}', re.M|re.S).sub(sub_pre_block, text)

# Headings.
text = re.compile(r'^===(.*?)===\s*$', re.M).sub(lambda m: "### %s\n"%m.group(1).strip(), text)
text = re.compile(r'^==(.*?)==\s*$', re.M).sub(lambda m: "## %s\n"%m.group(1).strip(), text)
text = re.compile(r'^=(.*?)=\s*$', re.M).sub(lambda m: "# %s\n"%m.group(1).strip(), text)

# Tables
def sub_table(m):
rows = []
for line in m.group(0).splitlines(False):
if not line.strip():
continue
rows.append(list(c.strip() for c in line.split("||")[1:-1]))
lines = ['<table>']
for row in rows:
lines.append(' <tr>%s</tr>' % ''.join('<td>%s</td>' % c for c in row))
lines.append('</table>')
return '\n\n' + '\n'.join(lines)
text = re.compile(r'\n(\n^\|\|(.*?\|\|)+$)+', re.M).sub(sub_table, text)

# Lists (don't handle nested lists).
text = re.compile(r'^[ \t]+\*[ \t]+(.*?)[ \t]*$', re.M).sub(r'- \1', text)
text = re.compile(r'^[ \t]+#[ \t]+(.*?)[ \t]*$', re.M).sub(r'1. \1', text)

# wiki links.
def sub_wikilink(m):
gh_page_name = _gh_page_name_from_gc_page_name(m.group(1)).replace('-', ' ')
if m.group(2):
s = "[[%s|%s]]" % (gh_page_name, m.group(2))
pass
else:
s = "[[%s]]" % gh_page_name
hash = md5(s).hexdigest()
s_from_hash[hash] = s
return hash
text = re.compile(r'\[((?:[A-Z][a-z]+)+)(?:\s+(.*?))?\]', re.S).sub(sub_wikilink, text)

# Links
def sub_link(m):
s = "[%s](%s)" % (m.group(2), m.group(1))
hash = md5(s).hexdigest()
s_from_hash[hash] = s
return hash
text = re.compile(r'(?<!\[)\[([^\s]+)\s+(.*?)\](?!\])', re.S).sub(sub_link, text)

# Italics, bold.
# in*ter*bold: (?<=\w)(\*\w+?\*)(?=\w)
text = re.compile(r'(?<![*\w])\*([^*]+?)\*(?![*\w])', re.S).sub(r'**\1**', text)
text = re.compile(r'(?<![_\w])_([^_]+?)_(?![_\w])', re.S).sub(r'*\1*', text)

# Auto-linking "issue \d+"
text = re.compile(r'(?<!\[)(issue (\d+))(?!\])').sub(
r'[\1](https://github.com/%s/issues#issue/\2)' % proj_id, text)

# Restore hashed-out blocks.
for hash, s in s_from_hash.items():
text = text.replace(hash, s)

# Add summary.
if "summary" in meta:
text = ("# %s\n\n" % meta["summary"]) + text

base = splitext(basename(src_path))[0]
gh_page_name = _gh_page_name_from_gc_page_name(base)
dst_path = join(dst_dir, gh_page_name+".md")
if not exists(dst_path) or codecs.open(dst_path, 'r', 'utf-8').read() != text:
codecs.open(dst_path, 'w', 'utf-8').write(text)
log("wrote '%s'" % dst_path)


#---- internal support stuff

def _indent(text):
return ' ' + '\n '.join(text.splitlines(False))

def _gh_page_name_from_gc_page_name(gc):
"""Github (gh) Wiki page name from Google Code (gc) Wiki page name."""
gh = re.sub(r'([A-Z][a-z]+)', r'-\1', gc)[1:]
return gh


#---- mainline

if __name__ == '__main__':
convert_dir(sys.argv[1], sys.argv[2], sys.argv[3])

0 comments on commit f51c8b2

Please sign in to comment.