'''Create JSON and loader from text source Provides titles, links, inline and display mathematics. Also provides and index. ''' import simplejson import re # These classes might be a good idea, but they get redefined later. class _TextNode(str): ''' Generic text node.''' def __repr__(self): return '%s(%s)' % (self.__class__.__name__, super(_TextNode, self).__repr__()) class Text(_TextNode): pass class Display(_TextNode): pass class InLine(_TextNode): pass class Link(_TextNode): pass class Par(list): pass # Regular expressions used for parsing. page_split = re.compile(r'\s*\n\.page=').split par_split = re.compile(r'\n\s*\n').split display_split = re.compile(r'\$\$(.*?)\$\$', re.DOTALL).split inline_split = re.compile(r'\$(.*?)\$', re.DOTALL).split link_split = re.compile(r'\[(.*?)\]').split class SplitDispatch(object): '''Split an object and dispatch functions on items.''' def __init__(self, split, f, g): self._split = split self._dispatch = [f, g] def __call__(self, str): i = 0 for item in self._split(str): item = self._dispatch[i%2](item) i += 1 class parse_par(list): '''Convert a string to a list, that can be converted to JSON.''' def __init__(self, str): self.value = [] SplitDispatch(display_split, self.non_display, self.display)(str) def non_display(self, str): SplitDispatch(inline_split, self.non_inline, self.inline)(str) def non_inline(self, str): SplitDispatch(link_split, self.text, self.link)(str) def display(self, str): self.append(Display(str)) def inline(self, str): self.append(InLine(str)) def link(self, str): self.append(Link(str)) def text(self, str): self.append(Text(str)) def Text(str): return 'A' + str def Link(str): return 'B' + str def InLine(str): return 'C' + str def Display(str): return 'D' + str def doit(str): value = {} index = [] for page_str in page_split(str)[1:]: a, body = page_str.split('\n', 1) key, title = a.split(' ', 1) index.append((key, title)) page = [] value[key] = page page.append('div') page.append(['h2', Text(title)]) par_strs = par_split(body) for s in par_strs: page.append(['p'] + parse_par(s)) index.sort() index_body = [['li', Link(item[0]), Text(' ' + item[1])] for item in index] value['index'] = ['div', ['h2', Text('Index')], ['ul'] + index_body] return value data = file('help.txt').read() pages = doit(data) file('help_data.json', 'wb').write('help_data=' + simplejson.dumps(pages))