2 """Get last order for PKKA from tumblr using his API"""
9 url
='http://orders.ecpsu.ru/api/read'
11 def remove_html_tags(data
):
12 """Remove all HTML tags except <a>"""
13 p
= re
.compile('<(?!\/?a(?=>|\s.*>))\/?.*?>')
14 return p
.sub('', data
)
16 def sanitize_urls(data
):
17 """Get content inside <a></a> tags and "href" attribute"""
18 patt
=r
'<a.+?href="(http[^"]+)"(?:>|\s.*?>)([^<]+)</a>'
20 return re
.sub(patt
, repl
, data
)
22 doc
= xml
.dom
.minidom
.parse(urllib2
.urlopen(url
))
23 posts
= doc
.getElementsByTagName('post')
25 dirty_body
= last_post
.getElementsByTagName('regular-body')[0].firstChild
.data
27 body_with_a
= remove_html_tags(dirty_body
)
28 body
= sanitize_urls(body_with_a
)