3 # Released under the terms of the GPLv3
12 from constants
import USER_AGENT
14 def lynxDump(url
, lynxArgs
=''):
15 _
, lynxStdout
, lynxErrout
= os
.popen3('lynx -dump -useragent="%s via lynx" %s "%s"'
16 % (USER_AGENT
, lynxArgs
, url
))
18 # TODO: work out the encoding somehow? For now we assume it's latin1...
19 lynxDecoded
= codecs
.EncodedFile(lynxStdout
, 'utf8', 'latin1', 'replace')
25 for line
in lynxDecoded
:
26 if line
== 'References\n':
28 # The previous matched 'References' was part of the
36 m
= re
.match(r
'\s*\d+\. (.*)\n', line
)
38 linkUrls
+= [m
.groups()[0]]
44 lynxErr
= lynxErrout
.read()
45 return dumped
, linkUrls
, lynxErr