1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 from lxml
.html
.clean
import Cleaner
21 from mediagoblin
import mg_globals
22 from mediagoblin
.tools
import url
25 # A super strict version of the lxml.html cleaner class
26 HTML_CLEANER
= Cleaner(
33 processing_instructions
=True,
39 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br',
41 remove_unknown_tags
=False, # can't be used with allow_tags
43 add_nofollow
=True, # for now
45 whitelist_tags
=set([]))
49 # clean_html barfs on an empty string
53 return HTML_CLEANER
.clean_html(html
)
56 def convert_to_tag_list_of_dicts(tag_string
):
58 Filter input from incoming string containing user tags,
60 Strips trailing, leading, and internal whitespace, and also converts
61 the "tags" text into an array of tags
66 # Strip out internal, trailing, and leading whitespace
67 stripped_tag_string
= u
' '.join(tag_string
.strip().split())
69 # Split the tag string into a list of tags
70 for tag
in stripped_tag_string
.split(','):
72 # Ignore empty or duplicate tags
73 if tag
and tag
not in [t
['name'] for t
in taglist
]:
74 taglist
.append({'name': tag
,
75 'slug': url
.slugify(tag
)})
79 def media_tags_as_string(media_entry_tags
):
81 Generate a string from a media item's tags, stored as a list of dicts
83 This is the opposite of convert_to_tag_list_of_dicts
87 tags_string
= u
', '.join([tag
['name'] for tag
in media_entry_tags
])
91 TOO_LONG_TAG_WARNING
= \
92 u
'Tags must be shorter than %s characters. Tags that are too long: %s'
95 def tag_length_validator(form
, field
):
97 Make sure tags do not exceed the maximum tag length.
99 tags
= convert_to_tag_list_of_dicts(field
.data
)
101 tag
['name'] for tag
in tags
102 if len(tag
['name']) > mg_globals
.app_config
['tags_max_length']]
105 raise wtforms
.ValidationError(
106 TOO_LONG_TAG_WARNING
% (mg_globals
.app_config
['tags_max_length'],
107 ', '.join(too_long_tags
)))
110 # Don't use the safe mode, because lxml.html.clean is better and we are using
112 UNSAFE_MARKDOWN_INSTANCE
= markdown
.Markdown()
115 def cleaned_markdown_conversion(text
):
117 Take a block of text, run it through MarkDown, and clean its HTML.
119 # Markdown will do nothing with and clean_html can do nothing with
124 return clean_html(UNSAFE_MARKDOWN_INSTANCE
.convert(text
))