work in progress delicious import and path fix
[gitology.git] / src / gitology / utils.py
blob111cc345231d57b4496b38f727078246fed44649
1 """
2 Various utility functions used by gitology.*
3 """
4 # imports # {{{
5 from django.conf.urls.defaults import patterns
6 from django.utils import simplejson
8 import sys, os, textwrap
10 try:
11 import path
12 except ImportError:
13 from gitology import path
15 import fnmatch, re, path, string
16 import gzip, bz2
17 import docutils.writers.html4css1, docutils.core
18 from odict import OrderedDict as odict
19 # }}}
21 class ImproperlyConfigured(Exception): pass
23 # path2obj # {{{
24 def path2obj(path):
25 from django.core.urlresolvers import get_mod_func
26 mod_name, obj_name = get_mod_func(path)
27 return getattr(__import__(mod_name, {}, {}, ['']), obj_name)
28 # }}}
30 def text_to_html(text_input): return "<pre>%s</pre>" % text_input
32 # rest_to_html # {{{
33 def rest_to_html(rest_input, css_path=None):
34 """Render ReStructuredText."""
35 writer = docutils.writers.html4css1.Writer()
36 from gitology.config import settings
37 import os.path
38 if css_path is None and "DEFAULT_REST_CSS" in settings.DEFAULTS:
39 css_path = os.path.expanduser(settings.DEFAULTS.DEFAULT_REST_CSS)
40 return docutils.core.publish_parts(
41 rest_input, writer_name="html", settings_overrides={
42 'stylesheet': css_path,
43 'stylesheet_path': None,
44 'embed_stylesheet': True
46 )['html_body']
47 return writer.output
48 # }}}
50 # attrdict # {{{
51 # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/361668
52 class attrdict(dict):
53 """A dict whose items can also be accessed as member variables.
55 >>> d = attrdict(a=1, b=2)
56 >>> d['c'] = 3
57 >>> print d.a, d.b, d.c
58 1 2 3
59 >>> d.b = 10
60 >>> print d['b']
63 # but be careful, it's easy to hide methods
64 >>> print d.get('c')
66 >>> d['get'] = 4
67 >>> print d.get('a')
68 Traceback (most recent call last):
69 TypeError: 'int' object is not callable
70 """
71 def __init__(self, *args, **kwargs):
72 dict.__init__(self, *args, **kwargs)
73 self.__dict__ = self
74 # }}}
76 # NamedObject # {{{
77 class NamedObject(object):
78 def __init__(self, name):
79 self._name = name
80 def _get_name(self): return self._name
81 name = property(
82 _get_name, doc="""
83 self.name is readonly.
85 >>> d = DocumentMeta("somename")
86 >>> d.name
87 'somename'
88 >>> d.name = "new name"
89 Traceback (most recent call last):
90 ...
91 AttributeError: can't set attribute
92 >>>
93 """
95 def __unicode__(self):
96 return u"%s(%s)" % ( self.__class__.__name__, self.name )
97 __str__ = __unicode__
98 __repr__ = __unicode__
99 # }}}
101 # get_blog_data # {{{
102 class list_with_clone(list):
103 def _clone(self): return self
105 global_blog_dict = {}
106 def get_blog_data(p):
107 from gitology.document import Document
108 blog = {}
109 blog["name"] = p.basename()
110 blog["document"] = Document("blogs@%s" % blog["name"])
111 if p.basename() == "main": blog["prefix"] = "blog/"
112 else: blog["prefix"] = "%s/" % p.basename()
113 # posts
114 blog["posts"] = odict()
115 years = p.dirs()
116 years.sort()
117 for y in years:
118 if y.namebase == "labels": continue
119 months = y.dirs()
120 months.sort()
121 for m in months:
122 dates = m.glob("*.lst")
123 dates.sort()
124 for d in dates:
125 for l in d.open().readlines():
126 # format: url document_name timestamp
127 url, document_name, timestamp = l.split(" ", 2)
128 blog["posts"][url] = {
129 'date': timestamp, 'document': Document(document_name),
131 global_blog_dict[url] = blog
132 blog["posts"].reverse()
133 # labels
134 blog["labels"] = {}
135 for l in p.joinpath("labels").glob("*.lst"):
136 d = {}
137 d["name"] = l.namebase
138 d["posts"] = list_with_clone()
139 d["document"] = Document("blogs@%s@label@%s" % (blog["name"], l.namebase))
140 for line in l.open().readlines():
141 # format: url, data is in respective archive file
142 d["posts"].append(blog["posts"][line.strip()])
143 blog["posts"][line.strip()].setdefault("labels", []).append(d)
144 blog["labels"][l.namebase] = d
145 return blog
146 # }}}
148 # get_blog {{{
149 def get_blog(p):
150 urls = []
151 b = get_blog_data(p)
152 urls.append(
153 ("^%s$" % b["prefix"], "gitology.d.views.show_blog", { 'blog_data': b,})
155 urls.append(
157 "^%slabelled/(?P<label_name>[^/]+)/$" % b["prefix"],
158 "gitology.d.views.show_category", { 'blog_data': b },
161 from django.contrib.syndication.feeds import Feed
162 class LatestEntries(Feed):
163 title = b["document"].meta.title
164 link = b["document"].meta.title
165 description = b["document"].meta.subtitle
167 def items(self):
168 return b["posts"].values()[:10]
170 def item_link(self, item):
171 return item["document"].meta.url
173 feeds = { 'latest': LatestEntries }
175 urls.append(
177 '^%sfeeds/(?P<url>.*)/$' % b["prefix"],
178 'django.contrib.syndication.views.feed',
179 {'feed_dict': feeds},
182 return urls
183 # }}}
185 # get_blogs # {{{
186 def get_blogs():
187 from gitology.config import settings as gsettings
188 urls = []
189 blogs_folder = gsettings.LOCAL_REPO_PATH.joinpath("blogs")
190 for d in blogs_folder.dirs():
191 urls += get_blog(d)
192 return urls
193 # }}}
195 # get_wiki # {{{
196 global_wiki_dict = {}
197 def get_wiki():
198 from gitology.config import settings as gsettings
199 from gitology.document import Document
200 urls = []
201 wiki_folder = gsettings.LOCAL_REPO_PATH.joinpath("wiki")
202 for i in wiki_folder.walk():
203 if not i.isfile(): continue
204 wiki_document = Document(i.open().read().strip())
205 wiki_url = i[len(wiki_folder):-4] + "/"
206 global_wiki_dict[wiki_url] = wiki_document
207 return urls
208 # }}}
210 # get redirects # {{{
211 global_redirect_dict = {}
212 def get_redirects():
213 from gitology.config import settings as gsettings
214 redirects_file = gsettings.LOCAL_REPO_PATH.joinpath("redirects.lst")
215 if not redirects_file.exists(): return
216 for line in redirects_file.open().readlines():
217 try:
218 before, after = line.split()
219 except ValueError:
220 print "Bad redirect line: ", line
221 continue
222 global_redirect_dict[before] = after
223 # }}}
225 # refresh_urlconf_cache # {{{
226 def refresh_urlconf_cache():
227 print "refresh_urlconf_cache"
228 from gitology.config import settings
229 """ creates a urlconf that is stored """
230 global global_wiki_dict, global_blog_dict, global_redirect_dict
231 global_blog_dict = {}
232 global_wiki_dict = {}
233 global_redirect_dict = {}
234 urls = ['']
235 # for blog:
236 # list of blogs is in $reporoot/blogs/
237 # urls: /blog_name/
238 # blog named "main" goes under /blog/, rest of them go to /folder_name/
239 urls += get_blogs()
240 urls += get_wiki()
241 get_redirects()
242 # for each blog, list of labels in $reporoot/blogs/blog_name/labels/
243 # urls: /blog_name/label/label_name/
244 # for each blog, date based heirarchy is kept in
245 # $reporoot/blogs/blog_name/year/month/date.lst
246 # /blog_name/year/month/date/document_name/
248 # for wiki:
249 # list of wiki document names are in $reporoot/wiki/document_alias.txt
250 # urls: /document_alias/
251 # further heirarchy is maintained:
252 # $reporoot/wiki/document_alias/child_alias.txt
253 # /document_alias/child_alias/
255 # for notes:
256 # notebooks are stored in $reporoot/notebooks/
257 # urls: /notebook/ this is a dedicated app
259 # for albums:
260 # list of albums are in $reporoot/albums/ album_name.meta, album_name.lst
261 # urls: /album|gallery/album_name/ this is document. it can contain select
262 # few photos etc.
263 # each photo is basically a document, its list is in the album_name.lst
264 # each album photo ll have a thumbnail and caption meta data to be shown
265 # on album page/
266 # /album/album_name/photos/ will list all photos, each photo may be in one
267 # or more albums. each photo can be a blog post too in one or more blogs
269 # optimization: this info will be loaded from a file, and some other tool
270 # is to update this file everytime something interesting happens.
272 # this function returns a dict containing url to view mapping.
273 file(settings.LOCAL_REPO_PATH.joinpath("urlconf.cache"), "w+").write(
274 #simplejson.dumps(urls)
275 "updated"
277 #print urls
278 return urls
279 # }}}
281 # sort_nicely # {{{
282 # http://nedbatchelder.com/blog/200712/human_sorting.html
283 import re
284 def tryint(s):
285 try: return int(s)
286 except: return s
288 def alphanum_key(s):
289 """ Turn a string into a list of string and number chunks.
290 "z23a" -> ["z", 23, "a"]
292 return [ tryint(c) for c in re.split('([0-9]+)', s) ]
294 def sort_nicely(l):
295 """ Sort the given list in the way that humans expect.
297 l.sort(key=alphanum_key)
298 # }}}
300 # smart_wrap # {{{
301 def smart_wrap(s, width=None):
302 if width is None:
303 width = int(os.environ.get("COLUMNS", 70))
304 lines = []
305 for line in s.split("\n"):
306 if not line.split(): lines.append("")
307 else: lines += textwrap.wrap(line, width=width)
308 return "\n".join(lines)
309 # }}}
311 # getDirSize {{{
312 # stolen from: http://mail.python.org/pipermail/python-list/2000-June/037460.html
313 def calcDirSize(arg, dir, files):
314 for file in files:
315 stats = os.stat(os.path.join(dir, file))
316 size = stats[6]
317 arg.append(size)
319 def getDirSize(dir):
320 sizes = []
321 os.path.walk(dir, calcDirSize, sizes)
322 total = 0
323 for size in sizes:
324 total = total + size
325 if total > 1073741824:
326 return (round(total/1073741824.0, 2), 'GB')
327 if total > 1048576:
328 return (round(total/1048576.0, 2), 'MB')
329 if total > 1024:
330 return (round(total/1024.0, 2), 'KB')
331 return (total, 'bytes')
332 # }}}
334 # generators from http://www.dabeaz.com/generators/ # {{{
335 # these generators are one time use only
337 # use http://www.fiber-space.de/generator_tools/doc/generator_tools.html
338 # for copying generators.
340 def gen_find(filepat,top=""):
341 if not top: top = path.path(".").abspath()
342 top = os.path.expanduser(top)
343 for path_, dirlist, filelist in os.walk(top):
344 for name in fnmatch.filter(filelist, filepat):
345 yield os.path.join(path_, name)
347 def gen_exclude(pattern, lines):
348 patter = re.compile(pattern)
349 for line in lines:
350 if not re.search(pattern, line):
351 yield line
353 def gen_grep(pattern, lines):
354 patter = re.compile(pattern)
355 for line in lines:
356 if re.search(pattern, line):
357 yield line
359 def gen_open(filenames):
360 for name in filenames:
361 if name.endswith(".gz"):
362 yield gzip.open(name)
363 elif name.endswith(".bz2"):
364 yield bz2.BZ2File(name)
365 else:
366 yield open(name)
368 def gen_cat(sources):
369 for s in sources:
370 for item in s:
371 yield item
373 def counter(gen):
374 c = 0
375 for item in gen: c += 1
376 return c
377 # }}}
379 # is_valid_url # {{{
380 allowed_chars_in_urls = string.ascii_letters + string.digits + ".-_/"
381 def is_valid_url(url):
382 if not url.startswith("/"): return False
383 for c in url:
384 if c not in allowed_chars_in_urls:
385 return False
386 # consecutive // not allowed
387 for p in url.split("/")[1:-1]:
388 if not p: return False
389 return True
390 # }}}
392 # parse_date # {{{
393 def parse_date(s):
394 from django.db.models.fields import DateTimeField
395 dtf = DateTimeField()
396 return dtf.to_python(s)
397 # }}}