diff --git a/wikiconvert.py b/wikiconvert.py index d09705d..97203b1 100644 --- a/wikiconvert.py +++ b/wikiconvert.py @@ -38,11 +38,8 @@ def convert_file(proj_id, src_path, dst_dir): for i, line in enumerate(lines): if line.startswith("#"): meta_lines.append(line) - else: - if not line.strip(): - body_lines = lines[i+1:] - else: - body_lines = lines[i:] + elif line.strip(): + body_lines = lines[i:] break meta = {} for line in meta_lines: @@ -58,6 +55,9 @@ def convert_file(proj_id, src_path, dst_dir): text = re.compile(r'^{{{+ *\n', re.M).sub(r"```\n", text) text = re.compile(r'^}}}+ *(\n|$)', re.M).sub(r"```\n", text) + # TODO: Add support for `backtick` code quotes + text = re.sub(r'{{{(.*?)}}}', r'`\1`', text) + # Headings. text = re.compile(r'^===(.*?)===\s*$', re.M).sub(lambda m: "### %s\n"%m.group(1).strip(), text) text = re.compile(r'^==(.*?)==\s*$', re.M).sub(lambda m: "## %s\n"%m.group(1).strip(), text) @@ -130,11 +130,11 @@ def sub_link(m): #---- internal support stuff def _indent(text): - return ' ' + '\n '.join(text.splitlines(False)) + return '\n ' + '\n '.join(text.splitlines(False)) def _gh_page_name_from_gc_page_name(gc): """Github (gh) Wiki page name from Google Code (gc) Wiki page name.""" - gh = re.sub(r'([A-Z][a-z]+)', r'-\1', gc)[1:] + gh = re.sub(r'([A-Za-z]+)_?', r'-\1', gc)[1:] return gh diff --git a/wikiconvert_creole.py b/wikiconvert_creole.py new file mode 100644 index 0000000..d30fba6 --- /dev/null +++ b/wikiconvert_creole.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python + +""" +Usage: + python googlecode2github/wikiconfig.py PROJID SRCDIR DSTDIR + +where "PROJID" is the github project id, e.g. "trentm/python-markdown2", +"SRCDIR" is a Google Code project wiki Subversion working copy dir and +"DSTDIR" is the git clone dir of the git project's wiki. +""" + +__version__ = "1.0.0" + +import re +import sys +from os.path import * +from glob import glob +from pprint import pprint +import codecs +from hashlib import md5 + + +def log(s): + sys.stderr.write(s+"\n") + +def convert_dir(proj_id, src_dir, dst_dir): + if isfile(src_dir): + convert_file(proj_id, src_dir, dst_dir) + else: + for f in glob(join(src_dir, "*.wiki")): + convert_file(proj_id, f, dst_dir) + +def convert_file(proj_id, src_path, dst_dir): + src = codecs.open(src_path, 'r', 'utf-8').read() + meta_lines = [] + body_lines = [] + lines = src.splitlines(False) + for i, line in enumerate(lines): + if line.startswith("#"): + meta_lines.append(line) + else: + assert not line.strip(), "line isn't empty in file %s %r" % (src_path, line) + # TODO is it actually mandtory that a blank line separate meta text from body text? + body_lines = lines[i+1:] + break + meta = {} + for line in meta_lines: + k,v = line[1:].split(None, 1) + meta[k] = v + text = '\n'.join(body_lines) + s_from_hash = {} + + # Pull out pre-blocks so we can restore them unmunged + def sub_block(match,indent=True): + pre = match.group(1) + hash = md5(pre.encode('utf8')).hexdigest() + # Creole uses braces, not indentation for code blocks + s_from_hash[hash] = "{{{"+pre+"}}}" if indent else pre + return hash + + def sub_pre_block(match): + return sub_block(match,indent=True) + + text = re.compile(r'^{{{(.*?)}}}', re.M|re.S).sub(sub_pre_block, text) + + # Pull out `backtick` code quotes + #def sub_code(match) + # return sub_block(match,indent=False) + text = re.compile(r'`(.*?)`', re.M|re.S).sub(r'{{{\1}}}', text) # monospace literal for Creole + + # Headings - No conversion needed for Creole. + + # Tables + def sub_table_creole(m): + rows = [] + for line in m.group(0).splitlines(False): + if not line.strip(): + continue + rows.append(list(c.strip() for c in line.split("||")[1:-1])) + lines = [] + # Assume first row is a header (or should we assume the reverse?) + if rows: + lines.append('|='+'|='.join(rows[0])+'|') + for row in rows[1:]: + lines.append('|'+'|'.join(row)+'|') + return '\n\n' + '\n'.join(lines) + text = re.compile(r'\n(\n^\|\|(.*?\|\|)+$)+', re.M).sub(sub_table_creole, text) + + # Lists (doesn't handle nested lists - flattens structure). + text = re.compile(r'^[ \t]+\*[ \t]+(.*?)$', re.M).sub(r'{^} \1', text) # temp marker to avoid bold processing + text = re.compile(r'^[ \t]+#[ \t]+(.*?)$', re.M).sub(r'# \1', text) + + # Italics, bold. - same for both Markdown & Creole + # in*ter*bold: (?<=\w)(\*\w+?\*)(?=\w) + text = re.compile(r'(? Github issue lookup map + text = re.compile(r'(?