src/gitology/utils.py

   1 """
   2 Various utility functions used by gitology.*
   3 """
   4 # imports # {{{
   5 from django.conf.urls.defaults import patterns
   6 from django.utils import simplejson
   7
   8 import sys, os, textwrap
   9
  10 try:
  11     import path
  12 except ImportError:
  13     from gitology import path
  14
  15 import fnmatch, re, path, string
  16 import gzip, bz2
  17 import docutils.writers.html4css1, docutils.core
  18 from odict import OrderedDict as odict
  19 # }}}
  20
  21 class ImproperlyConfigured(Exception): pass
  22
  23 # path2obj # {{{
  24 def path2obj(path):
  25     from django.core.urlresolvers import get_mod_func
  26     mod_name, obj_name = get_mod_func(path)
  27     return getattr(__import__(mod_name, {}, {}, ['']), obj_name)
  28 # }}}
  29
  30 def text_to_html(text_input): return "<pre>%s</pre>" % text_input
  31
  32 # rest_to_html # {{{
  33 def rest_to_html(rest_input, css_path=None):
  34     """Render ReStructuredText."""
  35     writer = docutils.writers.html4css1.Writer()
  36     from gitology.config import settings
  37     import os.path
  38     if css_path is None and "DEFAULT_REST_CSS" in settings.DEFAULTS:
  39         css_path = os.path.expanduser(settings.DEFAULTS.DEFAULT_REST_CSS)
  40     return docutils.core.publish_parts(
  41         rest_input, writer_name="html", settings_overrides={
  42             'stylesheet': css_path,
  43             'stylesheet_path': None,
  44             'embed_stylesheet': True
  45         }
  46     )['html_body']
  47     return writer.output
  48 # }}}
  49
  50 # attrdict # {{{
  51 # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/361668
  52 class attrdict(dict):
  53     """A dict whose items can also be accessed as member variables.
  54
  55     >>> d = attrdict(a=1, b=2)
  56     >>> d['c'] = 3
  57     >>> print d.a, d.b, d.c
  58     1 2 3
  59     >>> d.b = 10
  60     >>> print d['b']
  61     10
  62
  63     # but be careful, it's easy to hide methods
  64     >>> print d.get('c')
  65     3
  66     >>> d['get'] = 4
  67     >>> print d.get('a')
  68     Traceback (most recent call last):
  69     TypeError: 'int' object is not callable
  70     """
  71     def __init__(self, *args, **kwargs):
  72         dict.__init__(self, *args, **kwargs)
  73         self.__dict__ = self
  74 # }}}
  75
  76 # NamedObject # {{{
  77 class NamedObject(object):
  78     def __init__(self, name):
  79         self._name = name
  80     def _get_name(self): return self._name
  81     name = property(
  82         _get_name, doc="""
  83             self.name is readonly.
  84
  85             >>> d = DocumentMeta("somename")
  86             >>> d.name
  87             'somename'
  88             >>> d.name = "new name"
  89             Traceback (most recent call last):
  90                 ...
  91             AttributeError: can't set attribute
  92             >>>
  93         """
  94     )
  95     def __unicode__(self):
  96         return u"%s(%s)" % ( self.__class__.__name__, self.name )
  97     __str__ = __unicode__
  98     __repr__ = __unicode__
  99 # }}}
 100
 101 # get_blog_data # {{{
 102 class list_with_clone(list):
 103     def _clone(self): return self
 104
 105 global_blog_dict = {}
 106 def get_blog_data(p):
 107     from gitology.document import Document
 108     blog = {}
 109     blog["name"] = p.basename()
 110     blog["document"] = Document("blogs@%s" % blog["name"])
 111     if p.basename() == "main": blog["prefix"] = "blog/"
 112     else: blog["prefix"] = "%s/" % p.basename()
 113     # posts
 114     blog["posts"] = odict()
 115     years = p.dirs()
 116     years.sort()
 117     for y in years:
 118         if y.namebase == "labels": continue
 119         months = y.dirs()
 120         months.sort()
 121         for m in months:
 122             dates = m.glob("*.lst")
 123             dates.sort()
 124             for d in dates:
 125                 for l in d.open().readlines():
 126                     # format: url document_name timestamp
 127                     url, document_name, timestamp = l.split(" ", 2)
 128                     blog["posts"][url] = {
 129                         'date': timestamp, 'document': Document(document_name),
 130                     }
 131                     global_blog_dict[url] = blog
 132     blog["posts"].reverse()
 133     # labels
 134     blog["labels"] = {}
 135     for l in p.joinpath("labels").glob("*.lst"):
 136         d = {}
 137         d["name"] = l.namebase
 138         d["posts"] = list_with_clone()
 139         d["document"] = Document("blogs@%s@label@%s" % (blog["name"], l.namebase))
 140         for line in l.open().readlines():
 141             # format: url, data is in respective archive file
 142             d["posts"].append(blog["posts"][line.strip()])
 143             blog["posts"][line.strip()].setdefault("labels", []).append(d)
 144         blog["labels"][l.namebase] = d
 145     return blog
 146 # }}}
 147
 148 # get_blog {{{
 149 def get_blog(p):
 150     urls = []
 151     b = get_blog_data(p)
 152     urls.append(
 153         ("^%s$" % b["prefix"], "gitology.d.views.show_blog", { 'blog_data': b,})
 154     )
 155     urls.append(
 156         (
 157             "^%slabelled/(?P<label_name>[^/]+)/$" % b["prefix"],
 158             "gitology.d.views.show_category", { 'blog_data': b },
 159         )
 160     )
 161     from django.contrib.syndication.feeds import Feed
 162     class LatestEntries(Feed):
 163         title = b["document"].meta.title
 164         link = b["document"].meta.title
 165         description = b["document"].meta.subtitle
 166
 167         def items(self):
 168             return b["posts"].values()[:10]
 169
 170         def item_link(self, item):
 171             return item["document"].meta.url
 172
 173     feeds = { 'latest': LatestEntries }
 174
 175     urls.append(
 176         (
 177             '^%sfeeds/(?P<url>.*)/$' % b["prefix"],
 178             'django.contrib.syndication.views.feed',
 179             {'feed_dict': feeds},
 180         )
 181     )
 182     return urls
 183 # }}}
 184
 185 # get_blogs # {{{
 186 def get_blogs():
 187     from gitology.config import settings as gsettings
 188     urls = []
 189     blogs_folder = gsettings.LOCAL_REPO_PATH.joinpath("blogs")
 190     for d in blogs_folder.dirs():
 191         urls += get_blog(d)
 192     return urls
 193 # }}}
 194
 195 # get_wiki # {{{
 196 global_wiki_dict = {}
 197 def get_wiki():
 198     from gitology.config import settings as gsettings
 199     from gitology.document import Document
 200     urls = []
 201     wiki_folder = gsettings.LOCAL_REPO_PATH.joinpath("wiki")
 202     for i in wiki_folder.walk():
 203         if not i.isfile(): continue
 204         wiki_document = Document(i.open().read().strip())
 205         wiki_url = i[len(wiki_folder):-4] + "/"
 206         global_wiki_dict[wiki_url] = wiki_document
 207     return urls
 208 # }}}
 209
 210 # get redirects # {{{
 211 global_redirect_dict = {}
 212 def get_redirects():
 213     from gitology.config import settings as gsettings
 214     redirects_file = gsettings.LOCAL_REPO_PATH.joinpath("redirects.lst")
 215     if not redirects_file.exists(): return
 216     for line in redirects_file.open().readlines():
 217         try:
 218             before, after = line.split()
 219         except ValueError:
 220             print "Bad redirect line: ", line
 221             continue
 222         global_redirect_dict[before] = after
 223 # }}}
 224
 225 # refresh_urlconf_cache # {{{
 226 def refresh_urlconf_cache():
 227     print "refresh_urlconf_cache"
 228     from gitology.config import settings
 229     """ creates a urlconf that is stored """
 230     global global_wiki_dict, global_blog_dict, global_redirect_dict
 231     global_blog_dict = {}
 232     global_wiki_dict = {}
 233     global_redirect_dict = {}
 234     urls = ['']
 235     # for blog:
 236     # list of blogs is in $reporoot/blogs/
 237     # urls: /blog_name/
 238     # blog named "main" goes under /blog/, rest of them go to /folder_name/
 239     urls += get_blogs()
 240     urls += get_wiki()
 241     get_redirects()
 242     # for each blog, list of labels in $reporoot/blogs/blog_name/labels/
 243     # urls: /blog_name/label/label_name/
 244     # for each blog, date based heirarchy is kept in
 245     # $reporoot/blogs/blog_name/year/month/date.lst
 246     # /blog_name/year/month/date/document_name/
 247
 248     # for wiki:
 249     # list of wiki document names are in $reporoot/wiki/document_alias.txt
 250     # urls: /document_alias/
 251     # further heirarchy is maintained:
 252     # $reporoot/wiki/document_alias/child_alias.txt
 253     # /document_alias/child_alias/
 254
 255     # for notes:
 256     # notebooks are stored in $reporoot/notebooks/
 257     # urls: /notebook/ this is a dedicated app
 258
 259     # for albums:
 260     # list of albums are in $reporoot/albums/ album_name.meta, album_name.lst
 261     # urls: /album|gallery/album_name/ this is document. it can contain select
 262     # few photos etc.
 263     # each photo is basically a document, its list is in the album_name.lst
 264     # each album photo ll have a thumbnail and caption meta data to be shown
 265     # on album page/
 266     # /album/album_name/photos/ will list all photos, each photo may be in one
 267     # or more albums. each photo can be a blog post too in one or more blogs
 268
 269     # optimization: this info will be loaded from a file, and some other tool
 270     # is to update this file everytime something interesting happens.
 271
 272     # this function returns a dict containing url to view mapping.
 273     file(settings.LOCAL_REPO_PATH.joinpath("urlconf.cache"), "w+").write(
 274         #simplejson.dumps(urls)
 275         "updated"
 276     )
 277     #print urls
 278     return urls
 279 # }}}
 280
 281 # sort_nicely # {{{
 282 # http://nedbatchelder.com/blog/200712/human_sorting.html
 283 import re
 284 def tryint(s):
 285     try: return int(s)
 286     except: return s
 287
 288 def alphanum_key(s):
 289     """ Turn a string into a list of string and number chunks.
 290         "z23a" -> ["z", 23, "a"]
 291     """
 292     return [ tryint(c) for c in re.split('([0-9]+)', s) ]
 293
 294 def sort_nicely(l):
 295     """ Sort the given list in the way that humans expect.
 296     """
 297     l.sort(key=alphanum_key)
 298 # }}}
 299
 300 # smart_wrap # {{{
 301 def smart_wrap(s, width=None):
 302     if width is None:
 303         width = int(os.environ.get("COLUMNS", 70))
 304     lines = []
 305     for line in s.split("\n"):
 306         if not line.split(): lines.append("")
 307         else: lines += textwrap.wrap(line, width=width)
 308     return "\n".join(lines)
 309 # }}}
 310
 311 # getDirSize {{{
 312 # stolen from: http://mail.python.org/pipermail/python-list/2000-June/037460.html
 313 def calcDirSize(arg, dir, files):
 314     for file in files:
 315         stats = os.stat(os.path.join(dir, file))
 316         size = stats[6]
 317         arg.append(size)
 318
 319 def getDirSize(dir):
 320     sizes = []
 321     os.path.walk(dir, calcDirSize, sizes)
 322     total = 0
 323     for size in sizes:
 324         total = total + size
 325     if total > 1073741824:
 326         return (round(total/1073741824.0, 2), 'GB')
 327     if total > 1048576:
 328         return (round(total/1048576.0, 2), 'MB')
 329     if total > 1024:
 330         return (round(total/1024.0, 2), 'KB')
 331     return (total, 'bytes')
 332 # }}}
 333
 334 # generators from http://www.dabeaz.com/generators/ # {{{
 335 # these generators are one time use only
 336
 337 # use http://www.fiber-space.de/generator_tools/doc/generator_tools.html
 338 # for copying generators.
 339
 340 def gen_find(filepat,top=""):
 341     if not top: top = path.path(".").abspath()
 342     top = os.path.expanduser(top)
 343     for path_, dirlist, filelist in os.walk(top):
 344         for name in fnmatch.filter(filelist, filepat):
 345             yield os.path.join(path_, name)
 346
 347 def gen_exclude(pattern, lines):
 348     patter = re.compile(pattern)
 349     for line in lines:
 350         if not re.search(pattern, line):
 351             yield line
 352
 353 def gen_grep(pattern, lines):
 354     patter = re.compile(pattern)
 355     for line in lines:
 356         if re.search(pattern, line):
 357             yield line
 358
 359 def gen_open(filenames):
 360     for name in filenames:
 361         if name.endswith(".gz"):
 362               yield gzip.open(name)
 363         elif name.endswith(".bz2"):
 364               yield bz2.BZ2File(name)
 365         else:
 366               yield open(name)
 367
 368 def gen_cat(sources):
 369     for s in sources:
 370         for item in s:
 371             yield item
 372
 373 def counter(gen):
 374     c = 0
 375     for item in gen: c += 1
 376     return c
 377 # }}}
 378
 379 # is_valid_url # {{{
 380 allowed_chars_in_urls = string.ascii_letters + string.digits + ".-_/"
 381 def is_valid_url(url):
 382     if not url.startswith("/"): return False
 383     for c in url:
 384         if c not in allowed_chars_in_urls:
 385             return False
 386     # consecutive // not allowed
 387     for p in url.split("/")[1:-1]:
 388         if not p: return False
 389     return True
 390 # }}}
 391
 392 # parse_date # {{{
 393 def parse_date(s):
 394     from django.db.models.fields import DateTimeField
 395     dtf = DateTimeField()
 396     return dtf.to_python(s)
 397 # }}}
 398