1 from bs4
import BeautifulSoup
, SoupStrainer
, FeatureNotFound
6 def soupify_bs4(html
, nohtml
=False):
9 htm
= html
if nohtml
else '<html><body>%s</body></html>'%(html)
11 res
= BeautifulSoup(htm
, parser
)
12 except FeatureNotFound
as e
:
13 parser
= 'html.parser'
14 res
= BeautifulSoup(htm
, parser
)
17 def soupify_gumbo(html
, nohtml
=False):
18 htm
= html
if nohtml
else '<html><body>%s</body></html>'%(html)
20 soup
= gumbo
.soup_parse(htm
)
28 except Exception as e
:
29 sys
.stdout
.write(html
)
32 def soupify(html
, nohtml
=False):
33 # return soupify_gumbo(html, nohtml)
34 return soupify_bs4(html
, nohtml
)