1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 from __future__
import print_function
26 from six
.moves
.urllib
.parse
import urlparse
28 from mediagoblin
.db
.models
import LocalUser
29 from mediagoblin
.gmg_commands
import util
as commands_util
30 from mediagoblin
.submit
.lib
import (
31 submit_media
, get_upload_file_limits
,
32 FileUploadLimit
, UserUploadLimit
, UserPastUploadLimit
)
33 from mediagoblin
.tools
.metadata
import compact_and_validate
34 from mediagoblin
.tools
.translate
import pass_to_ugettext
as _
35 from jsonschema
.exceptions
import ValidationError
38 def parser_setup(subparser
):
39 subparser
.description
= """\
40 This command allows the administrator to upload many media files at once."""
41 subparser
.epilog
= _(u
"""For more information about how to properly run this
42 script (and how to format the metadata csv file), read the MediaGoblin
43 documentation page on command line uploading
44 <http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
45 subparser
.add_argument(
47 help=_(u
"Name of user these media entries belong to"))
48 subparser
.add_argument(
51 u
"""Path to the csv file containing metadata information."""))
52 subparser
.add_argument(
55 help=_(u
"Don't process eagerly, pass off to celery"))
58 def batchaddmedia(args
):
59 # Run eagerly unless explicetly set not to
61 os
.environ
['CELERY_ALWAYS_EAGER'] = 'true'
63 app
= commands_util
.setup_app(args
)
65 files_uploaded
, files_attempted
= 0, 0
68 user
= app
.db
.LocalUser
.query
.filter(
69 LocalUser
.username
==args
.username
.lower()
72 print(_(u
"Sorry, no user by username '{username}' exists".format(
73 username
=args
.username
)))
76 upload_limit
, max_file_size
= get_upload_file_limits(user
)
79 if os
.path
.isfile(args
.metadata_path
):
80 metadata_path
= args
.metadata_path
83 error
= _(u
'File at {path} not found, use -h flag for help'.format(
84 path
=args
.metadata_path
))
88 abs_metadata_filename
= os
.path
.abspath(metadata_path
)
89 abs_metadata_dir
= os
.path
.dirname(abs_metadata_filename
)
90 upload_limit
, max_file_size
= get_upload_file_limits(user
)
92 def maybe_unicodeify(some_string
):
93 # this is kinda terrible
94 if some_string
is None:
97 return six
.text_type(some_string
)
100 abs_metadata_filename
, 'r', encoding
='utf-8') as all_metadata
:
101 contents
= all_metadata
.read()
102 media_metadata
= parse_csv_file(contents
)
104 for media_id
, file_metadata
in media_metadata
.iteritems():
106 # In case the metadata was not uploaded initialize an empty dictionary.
107 json_ld_metadata
= compact_and_validate({})
109 # Get all metadata entries starting with 'media' as variables and then
110 # delete them because those are for internal use only.
111 original_location
= file_metadata
['location']
113 ### Pull the important media information for mediagoblin from the
114 ### metadata, if it is provided.
115 title
= file_metadata
.get('title') or file_metadata
.get('dc:title')
116 description
= (file_metadata
.get('description') or
117 file_metadata
.get('dc:description'))
119 license
= file_metadata
.get('license')
121 json_ld_metadata
= compact_and_validate(file_metadata
)
122 except ValidationError
as exc
:
123 error
= _(u
"""Error with media '{media_id}' value '{error_path}': {error_msg}
124 Metadata was not uploaded.""".format(
126 error_path
=exc
.path
[0],
127 error_msg
=exc
.message
))
131 url
= urlparse(original_location
)
132 filename
= url
.path
.split()[-1]
134 if url
.scheme
== 'http':
135 res
= requests
.get(url
.geturl(), stream
=True)
138 elif url
.scheme
== '':
140 if os
.path
.isabs(path
):
141 file_abs_path
= os
.path
.abspath(path
)
143 file_path
= os
.path
.join(abs_metadata_dir
, path
)
144 file_abs_path
= os
.path
.abspath(file_path
)
146 media_file
= file(file_abs_path
, 'r')
149 FAIL: Local file {filename} could not be accessed.
150 {filename} will not be uploaded.""".format(filename
=filename
)))
156 submitted_file
=media_file
,
158 title
=maybe_unicodeify(title
),
159 description
=maybe_unicodeify(description
),
160 license
=maybe_unicodeify(license
),
161 metadata
=json_ld_metadata
,
163 upload_limit
=upload_limit
, max_file_size
=max_file_size
)
164 print(_(u
"""Successfully submitted {filename}!
165 Be sure to look at the Media Processing Panel on your website to be sure it
166 uploaded successfully.""".format(filename
=filename
)))
168 except FileUploadLimit
:
170 u
"FAIL: This file is larger than the upload limits for this site."))
171 except UserUploadLimit
:
173 "FAIL: This file will put this user past their upload limits."))
174 except UserPastUploadLimit
:
175 print(_("FAIL: This user is already past their upload limits."))
177 "{files_uploaded} out of {files_attempted} files successfully submitted".format(
178 files_uploaded
=files_uploaded
,
179 files_attempted
=files_attempted
)))
182 def unicode_csv_reader(unicode_csv_data
, dialect
=csv
.excel
, **kwargs
):
183 # csv.py doesn't do Unicode; encode temporarily as UTF-8:
184 # TODO: this probably won't be necessary in Python 3
185 csv_reader
= csv
.reader(utf_8_encoder(unicode_csv_data
),
186 dialect
=dialect
, **kwargs
)
187 for row
in csv_reader
:
188 # decode UTF-8 back to Unicode, cell by cell:
189 yield [six
.text_type(cell
, 'utf-8') for cell
in row
]
191 def utf_8_encoder(unicode_csv_data
):
192 for line
in unicode_csv_data
:
193 yield line
.encode('utf-8')
195 def parse_csv_file(file_contents
):
197 The helper function which converts the csv file into a dictionary where each
198 item's key is the provided value 'id' and each item's value is another
201 list_of_contents
= file_contents
.split('\n')
202 key
, lines
= (list_of_contents
[0].split(','),
203 list_of_contents
[1:])
207 for index
, line
in enumerate(lines
):
208 if line
.isspace() or line
== u
'': continue
209 values
= unicode_csv_reader([line
]).next()
210 line_dict
= dict([(key
[i
], val
)
211 for i
, val
in enumerate(values
)])
212 media_id
= line_dict
.get('id') or index
213 objects_dict
[media_id
] = (line_dict
)