1 # Vorbis comment support for Mutagen
2 # Copyright 2005-2006 Joe Wreschnig
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of version 2 of the GNU General Public License as
6 # published by the Free Software Foundation.
8 """Read and write Vorbis comment data.
10 Vorbis comments are freeform key/value pairs; keys are
11 case-insensitive ASCII and values are Unicode strings. A key may have
14 The specification is at http://www.xiph.org/vorbis/doc/v-comment.html.
19 from cStringIO
import StringIO
22 from mutagen
._util
import DictMixin
, cdata
26 from sets
import Set
as set
28 def is_valid_key(key
):
29 """Return true if a string is a valid Vorbis comment key.
31 Valid Vorbis comment keys are printable ASCII between 0x20 (space)
32 and 0x7D ('}'), excluding '='.
35 if c
< " " or c
> "}" or c
== "=": return False
36 else: return bool(key
)
39 class error(IOError): pass
40 class VorbisUnsetFrameError(error
): pass
41 class VorbisEncodingError(error
): pass
43 class VComment(mutagen
.Metadata
, list):
44 """A Vorbis comment parser, accessor, and renderer.
46 All comment ordering is preserved. A VComment is a list of
47 key/value pairs, and so any Python list method can be used on it.
49 Vorbis comments are always wrapped in something like an Ogg Vorbis
50 bitstream or a FLAC metadata block, so this loads string data or a
51 file-like object, not a filename.
54 vendor -- the stream 'vendor' (i.e. writer); default 'Mutagen'
57 vendor
= u
"Mutagen " + mutagen
.version_string
59 def __init__(self
, data
=None, *args
, **kwargs
):
60 # Collect the args to pass to load, this lets child classes
61 # override just load and get equivalent magic for the
64 if isinstance(data
, str):
66 elif not hasattr(data
, 'read'):
67 raise TypeError("VComment requires string data or a file-like")
68 self
.load(data
, *args
, **kwargs
)
70 def load(self
, fileobj
, errors
='replace', framing
=True):
71 """Parse a Vorbis comment from a file-like object.
75 'strict', 'replace', or 'ignore'. This affects Unicode decoding
76 and how other malformed content is interpreted.
77 framing -- if true, fail if a framing bit is not present
79 Framing bits are required by the Vorbis comment specification,
80 but are not used in FLAC Vorbis comment blocks.
84 vendor_length
= cdata
.uint_le(fileobj
.read(4))
85 self
.vendor
= fileobj
.read(vendor_length
).decode('utf-8', errors
)
86 count
= cdata
.uint_le(fileobj
.read(4))
87 for i
in range(count
):
88 length
= cdata
.uint_le(fileobj
.read(4))
89 try: string
= fileobj
.read(length
).decode('utf-8', errors
)
90 except (OverflowError, MemoryError):
91 raise error("cannot read %d bytes, too large" % length
)
92 try: tag
, value
= string
.split('=', 1)
93 except ValueError, err
:
94 if errors
== "ignore":
96 elif errors
== "replace":
97 tag
, value
= u
"unknown%d" % i
, string
99 raise VorbisEncodingError
, str(err
), sys
.exc_info()[2]
100 try: tag
= tag
.encode('ascii', errors
)
101 except UnicodeEncodeError:
102 raise VorbisEncodingError
, "invalid tag name %r" % tag
104 if is_valid_key(tag
): self
.append((tag
, value
))
105 if framing
and not ord(fileobj
.read(1)) & 0x01:
106 raise VorbisUnsetFrameError("framing bit was unset")
107 except (cdata
.error
, TypeError):
108 raise error("file is not a valid Vorbis comment")
111 """Validate keys and values.
113 Check to make sure every key used is a valid Vorbis key, and
114 that every value used is a valid Unicode or UTF-8 string. If
115 any invalid keys or values are found, a ValueError is raised.
118 if not isinstance(self
.vendor
, unicode):
119 try: self
.vendor
.decode('utf-8')
120 except UnicodeDecodeError: raise ValueError
122 for key
, value
in self
:
124 if not is_valid_key(key
): raise ValueError
125 except: raise ValueError("%r is not a valid key" % key
)
126 if not isinstance(value
, unicode):
127 try: value
.encode("utf-8")
128 except: raise ValueError("%r is not a valid value" % value
)
132 """Clear all keys from the comment."""
135 def write(self
, framing
=True):
136 """Return a string representation of the data.
138 Validation is always performed, so calling this function on
139 invalid data may raise a ValueError.
142 framing -- if true, append a framing bit (see load)
148 f
.write(cdata
.to_uint_le(len(self
.vendor
.encode('utf-8'))))
149 f
.write(self
.vendor
.encode('utf-8'))
150 f
.write(cdata
.to_uint_le(len(self
)))
151 for tag
, value
in self
:
152 comment
= "%s=%s" % (tag
, value
.encode('utf-8'))
153 f
.write(cdata
.to_uint_le(len(comment
)))
155 if framing
: f
.write("\x01")
159 return "\n".join(["%s=%s" % (k
.lower(), v
) for k
, v
in self
])
161 class VCommentDict(VComment
, DictMixin
):
162 """A VComment that looks like a dictionary.
164 This object differs from a dictionary in two ways. First,
165 len(comment) will still return the number of values, not the
166 number of keys. Secondly, iterating through the object will
167 iterate over (key, value) pairs, not keys. Since a key may have
168 multiple values, the same value may appear multiple times while
171 Since Vorbis comment keys are case-insensitive, all keys are
172 normalized to lowercase ASCII.
175 def __getitem__(self
, key
):
176 """A list of values for the key.
178 This is a copy, so comment['title'].append('a title') will not
182 key
= key
.lower().encode('ascii')
183 values
= [value
for (k
, value
) in self
if k
.lower() == key
]
184 if not values
: raise KeyError, key
187 def __delitem__(self
, key
):
188 """Delete all values associated with the key."""
189 key
= key
.lower().encode('ascii')
190 to_delete
= filter(lambda x
: x
[0].lower() == key
, self
)
191 if not to_delete
:raise KeyError, key
192 else: map(self
.remove
, to_delete
)
194 def __contains__(self
, key
):
195 """Return true if the key has any values."""
196 key
= key
.lower().encode('ascii')
197 for k
, value
in self
:
198 if k
.lower() == key
: return True
201 def __setitem__(self
, key
, values
):
202 """Set a key's value or values.
204 Setting a value overwrites all old ones. The value may be a
205 list of Unicode or UTF-8 strings, or a single Unicode or UTF-8
209 key
= key
.lower().encode('ascii')
210 if not isinstance(values
, list):
213 except KeyError: pass
215 self
.append((key
, value
))
218 """Return all keys in the comment."""
219 return self
and list(set([k
.lower() for k
, v
in self
]))
222 """Return a copy of the comment data in a real dict."""
223 return dict([(key
, self
[key
]) for key
in self
.keys()])