version 0.5.0
[flinks.git] / flinkspkg / lynxDump.py
blob8d92dc44a3e24bde322ae66bd7bcf9b9c252563b
1 # Part of flinks
2 # (C) Martin Bays 2008
3 # Released under the terms of the GPLv3
5 import sys, os
6 from string import *
8 import re
10 from subprocess import Popen, PIPE
12 from .constants import USER_AGENT
13 from .readme import README
15 def lynxDump(url, lynxArgs=[]):
16 if url == "special:README":
17 return README, [], ''
19 try:
20 p = Popen(['lynx', '-dump', '-force_html', '-assume-charset=utf8', '-display-charset=utf8', '-useragent="%s via lynx"' % USER_AGENT] +
21 lynxArgs + [url],
22 stdin=None, stdout=PIPE, stderr=PIPE, universal_newlines=True)
23 (lynxStdout, lynxErrout) = (p.stdout, p.stderr)
24 except OSError:
25 return "", [], "Fatal error - lynx execution failed. Is it installed?"
27 dumped = ''
28 refdumped = ''
29 linkUrls = []
30 readingRefs = False
31 for line in lynxStdout:
32 if line == 'References\n':
33 if readingRefs:
34 # The previous matched 'References' was part of the
35 # document...
36 dumped += refdumped
37 refdumped = ''
38 linkUrls = []
39 readingRefs = True
41 if readingRefs:
42 m = re.match(r'\s*\d+\. (.*)\n', line)
43 if m:
44 linkUrls += [m.groups()[0]]
45 refdumped += line
46 else:
47 dumped += line
49 lynxStdout.close()
50 lynxErr = lynxErrout.read()
51 lynxErrout.close()
52 return dumped, linkUrls, lynxErr