org2json.py

   1 # org2json
   2 #  parse a org-mode file and convert it to JSON
   3
   4 __author__ = "Sridhar Ratnakumar <http://nearfar.org/>"
   5
   6 import re
   7 from itertools import count
   8
   9 from simplejson import dumps
  10
  11 def rindexed(seq):
  12     """
  13     >>> l = [5,7,9]
  14     >>> print list(rindexed(l))
  15     [(2, 9), (1, 7), (0, 5)]
  16     """
  17     return zip(
  18         range(len(seq))[::-1],
  19         reversed(seq))
  20
  21 def reverse(iter):
  22     l = list(iter)
  23     l.reverse()
  24     return l
  25
  26
  27 def org2py(orgtext):
  28     """Parse the given org file text and return the Python data structure
  29
  30     >>> j = org2py(open('sample.org').read())
  31     >>> j=list(j)
  32     >>> j[-1]['text']
  33     'Projects'
  34     >>> j[-2]['text']
  35     'Whims'
  36     >>> j[-3]['text']
  37     'Online Stuff'
  38     >>> j[-4]['text']
  39     'Stuff'
  40     >>> j[-5]['text']
  41     'Travels'
  42     """
  43     lines = orgtext.splitlines()
  44     def e(i1, i2):
  45         return '\n'.join(lines[i1:i2])
  46
  47     def by_star():
  48         last_index = len(lines)
  49         for index, line in rindexed(lines):
  50             if line.startswith("*"):
  51                 yield [index, e(index, last_index)]
  52                 last_index = index
  53
  54     def hier(items):
  55         STARS_PAT = re.compile(r"^(\**) (.*)", re.DOTALL)
  56         TAGS_PAT  = re.compile(r"^(.*)\s((\w)*(:(\w)*)*:)$")
  57         def splititem(s):
  58             """
  59             >>> splititem("*** Foo Bar :TAG1:TAG2:")
  60             (3, 'Foo Bar', ('TAG1, 'TAG2'))
  61             >>> splititem("** write org-mode tutorial")
  62             (2, 'write org-mode tutorial')
  63             """
  64             match = STARS_PAT.match(s)
  65             stars, text = match.group(1), match.group(2)
  66             match = TAGS_PAT.match(s.splitlines()[0])
  67             tags = match and match.group(2).strip(':').split(':') or ()
  68             text = match and match.group(1) or text
  69             return len(stars), text, tags
  70
  71         def node(text, children, tags):
  72             return {'text': text, 'children': children, 'tags': tags}
  73
  74         istack = [[], [], [], [], [], [], [], [], []] # and so on ...
  75
  76         pn = None
  77         for index, text in items:
  78             n, text, tags = splititem(text)
  79             assert n>0
  80
  81             if n < pn:
  82                 # up to the parent
  83                 istack[n].append(node(text, reverse(istack[pn]), tags))
  84                 istack[pn] = []
  85             else:
  86                 # previous sibling OR child of one of the top nodes
  87                 istack[n].append(node(text, [], tags))
  88             pn = n
  89
  90         return reverse(istack[1])
  91
  92     return hier(by_star())
  93
  94 def org2json(orgtext):
  95     return dumps(org2py(orgtext))
  96
  97
  98 if __name__ == '__main__':
  99     from doctest import testmod
 100     testmod()
 101     print '--demo--'
 102     from pprint import pprint
 103     pprint( org2py(open('sample.org').read()) )