1 """ deki.py - Access the wiki pages on a MindTouch Deki server via the API.
3 Here's what this code can do:
5 wiki = deki.Deki("http://developer.mozilla.org/@api/deki/", username, password)
6 page = wiki.get_page("Sheep")
10 page.title = "Bananas"
13 There are also some additional methods:
14 wiki.create_page(path, content, title=, override=)
15 wiki.move_page(old, new)
16 wiki.get_subpages(page)
18 This module does not try to mimic the MindTouch "Plug" API. It's meant to be
19 higher-level than that.
23 import urllib2
, cookielib
, httplib
24 import xml
.dom
.minidom
as dom
25 from urllib
import quote
as _urllib_quote
26 from urllib
import urlencode
as _urlencode
28 from datetime
import datetime
38 raise AssertionError('check failed')
40 def _urlquote(s
, *args
):
41 return _urllib_quote(s
.encode('utf-8'), *args
)
43 def _make_url(*dirs
, **params
):
44 """ dirs must already be url-encoded, params must not """
47 url
+= '?' + _urlencode(params
)
50 class PutRequest(urllib2
.Request
):
54 # === Dream framework client code
56 # This handler causes python to "always be logged in" when it's talking to the
57 # server. If you're just accessing public pages, it generates more requests
58 # than are strictly needed, but this is the behavior you want for a bot.
60 # The users/authenticate request is sent twice: once without any basic auth and
61 # once with. Dumb. Feel free to fix.
63 class _LoginHandler(urllib2
.HTTPCookieProcessor
):
64 def __init__(self
, server
):
65 policy
= cookielib
.DefaultCookiePolicy(rfc2965
=True)
66 cookiejar
= cookielib
.CookieJar(policy
)
67 urllib2
.HTTPCookieProcessor
.__init
__(self
, cookiejar
)
70 def http_request(self
, req
):
71 #print "DEBUG- Requesting " + req.get_full_url()
73 req
= urllib2
.HTTPCookieProcessor
.http_request(self
, req
)
74 if ('Cookie' not in req
.unredirected_hdrs
75 and req
.get_full_url() != s
.base
+ 'users/authenticate'):
77 # Retry - should have a new cookie.
78 req
= urllib2
.HTTPCookieProcessor
.http_request(self
, req
)
79 _check('Cookie' in req
.unredirected_hdrs
)
83 def __init__(self
, base
, user
, password
):
85 base - The base URI of the Deki API, with trailing slash.
86 Typically, 'http://wiki.example.org/@api/deki/'.
87 user, password - Your Deki login information.
90 pm
= urllib2
.HTTPPasswordMgrWithDefaultRealm()
91 pm
.add_password(None, self
.base
, user
, password
)
92 ah
= urllib2
.HTTPBasicAuthHandler(pm
)
93 lh
= _LoginHandler(self
)
94 self
._opener
= urllib2
.build_opener(ah
, lh
)
97 response
= self
._opener
.open(self
.base
+ 'users/authenticate')
101 return self
._opener
.open(self
.base
+ url
)
103 def _handleResponse(self
, req
):
104 """Helper method shared between post() and put()"""
105 resp
= self
._opener
.open(req
)
107 ct
= resp
.headers
.get('Content-Type', '(none)')
108 if '/xml' in ct
or '+xml' in ct
:
109 return dom
.parse(resp
)
111 #print "DEBUG- Content-Type:", ct
113 #print 'DEBUG- crud:\n---\n%s\n---' % re.sub(r'(?m)^', ' ', crud)
119 def post(self
, url
, data
, type):
120 #print "DEBUG- posting to:", self.base + url
121 req
= urllib2
.Request(self
.base
+ url
, data
, {'Content-Type': type})
122 return self
._handleResponse
(req
)
124 def put(self
, url
, data
, type):
125 #print "DEBUG- putting to:", self.base + url
126 req
= PutRequest(self
.base
+ url
, data
, {'Content-Type': type})
127 return self
._handleResponse
(req
)
129 def get_xml(self
, url
):
130 resp
= self
.open(url
)
132 return dom
.parse(resp
)
140 if node
.nodeType
== node
.ELEMENT_NODE
:
141 return u
''.join(_text_of(n
) for n
in node
.childNodes
)
142 elif node
.nodeType
== node
.TEXT_NODE
:
143 return node
.nodeValue
147 def _the_element_by_name(doc
, tagName
):
148 elts
= doc
.getElementsByTagName(tagName
)
150 raise ValueError("Expected exactly one <%s> tag, got %d." % (tagName
, len(elts
)))
153 def _first_element(node
):
156 if n
.nodeType
== n
.ELEMENT_NODE
:
161 def _find_elements(node
, path
):
163 [first
, rest
] = path
.split(u
'/', 1)
164 for child
in _find_elements(node
, first
):
165 for desc
in _find_elements(child
, rest
):
168 for n
in node
.childNodes
:
169 if n
.nodeType
== node
.ELEMENT_NODE
and n
.nodeName
== path
:
175 def _format_page_id(id):
176 if isinstance(id, int):
178 elif id is Deki
.HOME
:
180 elif isinstance(id, basestring
):
181 # Double-encoded, per the Deki API reference.
182 return '=' + _urlquote(_urlquote(id, ''))
184 class Deki(DreamClient
):
187 def get_page(self
, page_id
):
188 """ Get the content of a page from the wiki.
190 The page_id argument must be one of:
191 an int - The page id (an arbitrary number assigned by Deki)
192 a str - The page name (not the title, the full path that shows up in the URL)
193 Deki.HOME - Refers to the main page of the wiki.
195 Returns a Page object.
201 def create_page(self
, path
, content
, title
=None, overwrite
=False):
202 """ Create a new wiki page.
205 path - str - The page id.
206 content - str - The XML content to put in the new page.
207 The document element must be a <body>.
208 title - str - The page title. Keyword argument only.
209 Defaults to the last path-segment of path.
210 overwrite - bool - Whether to overwrite an existing page. If false,
211 and the page already exists, the method will throw an error.
214 title
= path
.split('/')[-1]
215 doc
= dom
.parseString(content
)
216 _check(doc
.documentElement
.tagName
== 'body')
218 p
._create
(path
, title
, doc
, overwrite
)
220 def attach_file(self
, page
, name
, data
, mimetype
, description
=None):
221 """Create or update a file attachment.
224 page - str - the page ID this file is related to
225 name - str - the name of the file
226 data - str - the file data
227 mimetype - str - the MIME type of the file
228 description - str - a description of the file
232 if description
is not None:
233 p
['description'] = description
235 url
= _make_url('pages', _format_page_id(page
),
236 'files', _format_page_id(name
), **p
)
238 r
= self
.put(url
, data
, mimetype
)
239 _check(r
.documentElement
.nodeName
== u
'file')
241 def get_subpages(self
, page_id
):
242 """ Return the ids of all subpages of the given page. """
243 doc
= self
.get_xml(_make_url("pages", _format_page_id(page_id
),
245 for elt
in _find_elements(doc
, u
'page/subpages/page.subpage/path'):
248 def move_page(self
, page_id
, new_title
, redirects
=True):
249 """ Move an existing page to a new location.
251 A page cannot be moved to a destination that already exists, is a
252 descendant, or has a protected title (ex. Special:xxx, User:,
255 When a page is moved, subpages under the specified page are also moved.
256 For each moved page, the system automatically creates an alias page
257 that redirects from the old to the new destination.
259 self
.post(_make_url("pages", _format_page_id(page_id
), "move",
261 redirects
=redirects
and "1" or "0"),
265 """ A Deki wiki page.
267 To obtain a page, call wiki.get_page(id).
269 title : unicode - The page title.
270 doc : Document - The content of the page as a DOM Document.
271 The root element of this document is a <body>.
272 path : unicode - The path. Use this to detect redirects, as otherwise
273 page.save() will overwrite the redirect with a copy of the content!
274 deki : Deki - The Deki object from which the page was loaded.
275 page_id : str/id/Deki.HOME - The page id used to load the page.
276 load_time : datetime - The time the page was loaded,
277 according to the clock on the client machine.
279 save() - Save the modified document back to the server.
280 Only the page.title and the contents of page.doc are saved.
283 def __init__(self
, deki
):
286 def _create(self
, path
, title
, doc
, overwrite
):
291 self
.load_time
= datetime(2500, 1, 1)
293 self
.load_time
= datetime(1900, 1, 1)
297 def _load(self
, page_id
):
298 """ page_id - See comment near the definition of `HOME`. """
299 load_time
= datetime
.utcnow()
301 # Getting the title is a whole separate query!
302 url
= 'pages/%s/info' % _format_page_id(page_id
)
303 doc
= self
.deki
.get_xml(url
)
304 title
= _text_of(_the_element_by_name(doc
, 'title'))
305 path
= _text_of(_the_element_by_name(doc
, 'path'))
307 # If you prefer to sling regexes, you can request format=raw instead.
308 # The result is an XML document with one big fat text node in the body.
309 url
= _make_url('pages', _format_page_id(page_id
), 'contents',
310 format
='xhtml', mode
='edit')
311 doc
= self
.deki
.get_xml(url
)
313 content
= doc
.documentElement
314 _check(content
.tagName
== u
'content')
315 body
= _first_element(content
)
316 _check(body
is not None)
317 _check(body
.tagName
== u
'body')
319 doc
.removeChild(content
)
320 doc
.appendChild(body
)
322 self
.page_id
= page_id
323 self
.load_time
= load_time
329 p
= {'edittime': _urlquote(self
.load_time
.strftime('%Y%m%d%H%M%S')),
332 if self
.title
is not None:
333 p
['title'] = _urlquote(self
.title
)
335 url
= _make_url('pages', _format_page_id(self
.page_id
), 'contents', **p
)
337 body
= self
.doc
.documentElement
338 bodyInnerXML
= ''.join(n
.toxml('utf-8') for n
in body
.childNodes
)
340 reply
= self
.deki
.post(url
, bodyInnerXML
, 'text/plain; charset=utf-8')
341 _check(reply
.documentElement
.nodeName
== u
'edit')
342 _check(reply
.documentElement
.getAttribute(u
'status') == u
'success')