Tentative fix for LRUcache bug (with Python 2.7.11?), reported by
[pyTivo/wmcbrine.git] / mutagen / m4a.py
blobcfd310c757b604e113578280b0d05c90495fdee1
1 # Copyright 2006 Joe Wreschnig
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License version 2 as
5 # published by the Free Software Foundation.
7 # $Id: m4a.py 4231 2007-12-15 08:13:53Z luks $
9 """Read and write MPEG-4 audio files with iTunes metadata.
11 This module will read MPEG-4 audio information and metadata,
12 as found in Apple's M4A (aka MP4, M4B, M4P) files.
14 There is no official specification for this format. The source code
15 for TagLib, FAAD, and various MPEG specifications at
16 http://developer.apple.com/documentation/QuickTime/QTFF/,
17 http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt,
18 and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all
19 consulted.
21 This module does not support 64 bit atom sizes, and so will not
22 work on metadata over 4GB.
23 """
25 import struct
26 import sys
28 from cStringIO import StringIO
30 from mutagen import FileType, Metadata
31 from mutagen._constants import GENRES
32 from mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy
34 class error(IOError): pass
35 class M4AMetadataError(error): pass
36 class M4AStreamInfoError(error): pass
37 class M4AMetadataValueError(ValueError, M4AMetadataError): pass
39 import warnings
40 warnings.warn(
41 "mutagen.m4a is deprecated; use mutagen.mp4 instead.", DeprecationWarning)
43 # This is not an exhaustive list of container atoms, but just the
44 # ones this module needs to peek inside.
45 _CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst",
46 "stbl", "minf", "stsd"]
47 _SKIP_SIZE = { "meta": 4 }
49 __all__ = ['M4A', 'Open', 'delete', 'M4ACover']
51 class M4ACover(str):
52 """A cover artwork.
54 Attributes:
55 imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG)
56 """
57 FORMAT_JPEG = 0x0D
58 FORMAT_PNG = 0x0E
60 def __new__(cls, data, imageformat=None):
61 self = str.__new__(cls, data)
62 if imageformat is None: imageformat = M4ACover.FORMAT_JPEG
63 self.imageformat = imageformat
64 try: self.format
65 except AttributeError:
66 self.format = imageformat
67 return self
69 class Atom(object):
70 """An individual atom.
72 Attributes:
73 children -- list child atoms (or None for non-container atoms)
74 length -- length of this atom, including length and name
75 name -- four byte name of the atom, as a str
76 offset -- location in the constructor-given fileobj of this atom
78 This structure should only be used internally by Mutagen.
79 """
81 children = None
83 def __init__(self, fileobj):
84 self.offset = fileobj.tell()
85 self.length, self.name = struct.unpack(">I4s", fileobj.read(8))
86 if self.length == 1:
87 raise error("64 bit atom sizes are not supported")
88 elif self.length < 8:
89 return
91 if self.name in _CONTAINERS:
92 self.children = []
93 fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1)
94 while fileobj.tell() < self.offset + self.length:
95 self.children.append(Atom(fileobj))
96 else:
97 fileobj.seek(self.offset + self.length, 0)
99 def render(name, data):
100 """Render raw atom data."""
101 # this raises OverflowError if Py_ssize_t can't handle the atom data
102 size = len(data) + 8
103 if size <= 0xFFFFFFFF:
104 return struct.pack(">I4s", size, name) + data
105 else:
106 return struct.pack(">I4sQ", 1, name, size + 8) + data
107 render = staticmethod(render)
109 def __getitem__(self, remaining):
110 """Look up a child atom, potentially recursively.
112 e.g. atom['udta', 'meta'] => <Atom name='meta' ...>
114 if not remaining:
115 return self
116 elif self.children is None:
117 raise KeyError("%r is not a container" % self.name)
118 for child in self.children:
119 if child.name == remaining[0]:
120 return child[remaining[1:]]
121 else:
122 raise KeyError, "%r not found" % remaining[0]
124 def __repr__(self):
125 klass = self.__class__.__name__
126 if self.children is None:
127 return "<%s name=%r length=%r offset=%r>" % (
128 klass, self.name, self.length, self.offset)
129 else:
130 children = "\n".join([" " + line for child in self.children
131 for line in repr(child).splitlines()])
132 return "<%s name=%r length=%r offset=%r\n%s>" % (
133 klass, self.name, self.length, self.offset, children)
135 class Atoms(object):
136 """Root atoms in a given file.
138 Attributes:
139 atoms -- a list of top-level atoms as Atom objects
141 This structure should only be used internally by Mutagen.
143 def __init__(self, fileobj):
144 self.atoms = []
145 fileobj.seek(0, 2)
146 end = fileobj.tell()
147 fileobj.seek(0)
148 while fileobj.tell() < end:
149 self.atoms.append(Atom(fileobj))
151 def path(self, *names):
152 """Look up and return the complete path of an atom.
154 For example, atoms.path('moov', 'udta', 'meta') will return a
155 list of three atoms, corresponding to the moov, udta, and meta
156 atoms.
158 path = [self]
159 for name in names:
160 path.append(path[-1][name,])
161 return path[1:]
163 def __getitem__(self, names):
164 """Look up a child atom.
166 'names' may be a list of atoms (['moov', 'udta']) or a string
167 specifying the complete path ('moov.udta').
169 if isinstance(names, basestring):
170 names = names.split(".")
171 for child in self.atoms:
172 if child.name == names[0]:
173 return child[names[1:]]
174 else:
175 raise KeyError, "%s not found" % names[0]
177 def __repr__(self):
178 return "\n".join([repr(child) for child in self.atoms])
180 class M4ATags(DictProxy, Metadata):
181 """Dictionary containing Apple iTunes metadata list key/values.
183 Keys are four byte identifiers, except for freeform ('----')
184 keys. Values are usually unicode strings, but some atoms have a
185 special structure:
186 cpil -- boolean
187 trkn, disk -- tuple of 16 bit ints (current, total)
188 tmpo -- 16 bit int
189 covr -- list of M4ACover objects (which are tagged strs)
190 gnre -- not supported. Use '\\xa9gen' instead.
192 The freeform '----' frames use a key in the format '----:mean:name'
193 where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique
194 identifier for this frame. The value is a str, but is probably
195 text that can be decoded as UTF-8.
197 M4A tag data cannot exist outside of the structure of an M4A file,
198 so this class should not be manually instantiated.
200 Unknown non-text tags are removed.
203 def load(self, atoms, fileobj):
204 try: ilst = atoms["moov.udta.meta.ilst"]
205 except KeyError, key:
206 raise M4AMetadataError(key)
207 for atom in ilst.children:
208 fileobj.seek(atom.offset + 8)
209 data = fileobj.read(atom.length - 8)
210 parse = self.__atoms.get(atom.name, (M4ATags.__parse_text,))[0]
211 parse(self, atom, data)
213 def __key_sort(item1, item2):
214 (key1, v1) = item1
215 (key2, v2) = item2
216 # iTunes always writes the tags in order of "relevance", try
217 # to copy it as closely as possible.
218 order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb",
219 "\xa9gen", "gnre", "trkn", "disk",
220 "\xa9day", "cpil", "tmpo", "\xa9too",
221 "----", "covr", "\xa9lyr"]
222 order = dict(zip(order, range(len(order))))
223 last = len(order)
224 # If there's no key-based way to distinguish, order by length.
225 # If there's still no way, go by string comparison on the
226 # values, so we at least have something determinstic.
227 return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or
228 cmp(len(v1), len(v2)) or cmp(v1, v2))
229 __key_sort = staticmethod(__key_sort)
231 def save(self, filename):
232 """Save the metadata to the given filename."""
233 values = []
234 items = self.items()
235 items.sort(self.__key_sort)
236 for key, value in items:
237 render = self.__atoms.get(
238 key[:4], (None, M4ATags.__render_text))[1]
239 values.append(render(self, key, value))
240 data = Atom.render("ilst", "".join(values))
242 # Find the old atoms.
243 fileobj = open(filename, "rb+")
244 try:
245 atoms = Atoms(fileobj)
247 moov = atoms["moov"]
249 if moov != atoms.atoms[-1]:
250 # "Free" the old moov block. Something in the mdat
251 # block is not happy when its offset changes and it
252 # won't play back. So, rather than try to figure that
253 # out, just move the moov atom to the end of the file.
254 offset = self.__move_moov(fileobj, moov)
255 else:
256 offset = 0
258 try:
259 path = atoms.path("moov", "udta", "meta", "ilst")
260 except KeyError:
261 self.__save_new(fileobj, atoms, data, offset)
262 else:
263 self.__save_existing(fileobj, atoms, path, data, offset)
264 finally:
265 fileobj.close()
267 def __move_moov(self, fileobj, moov):
268 fileobj.seek(moov.offset)
269 data = fileobj.read(moov.length)
270 fileobj.seek(moov.offset)
271 free = Atom.render("free", "\x00" * (moov.length - 8))
272 fileobj.write(free)
273 fileobj.seek(0, 2)
274 # Figure out how far we have to shift all our successive
275 # seek calls, relative to what the atoms say.
276 old_end = fileobj.tell()
277 fileobj.write(data)
278 return old_end - moov.offset
280 def __save_new(self, fileobj, atoms, ilst, offset):
281 hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9)
282 meta = Atom.render("meta", "\x00\x00\x00\x00" + hdlr + ilst)
283 moov, udta = atoms.path("moov", "udta")
284 insert_bytes(fileobj, len(meta), udta.offset + offset + 8)
285 fileobj.seek(udta.offset + offset + 8)
286 fileobj.write(meta)
287 self.__update_parents(fileobj, [moov, udta], len(meta), offset)
289 def __save_existing(self, fileobj, atoms, path, data, offset):
290 # Replace the old ilst atom.
291 ilst = path.pop()
292 delta = len(data) - ilst.length
293 fileobj.seek(ilst.offset + offset)
294 if delta > 0:
295 insert_bytes(fileobj, delta, ilst.offset + offset)
296 elif delta < 0:
297 delete_bytes(fileobj, -delta, ilst.offset + offset)
298 fileobj.seek(ilst.offset + offset)
299 fileobj.write(data)
300 self.__update_parents(fileobj, path, delta, offset)
302 def __update_parents(self, fileobj, path, delta, offset):
303 # Update all parent atoms with the new size.
304 for atom in path:
305 fileobj.seek(atom.offset + offset)
306 size = cdata.uint_be(fileobj.read(4)) + delta
307 fileobj.seek(atom.offset + offset)
308 fileobj.write(cdata.to_uint_be(size))
310 def __render_data(self, key, flags, data):
311 data = struct.pack(">2I", flags, 0) + data
312 return Atom.render(key, Atom.render("data", data))
314 def __parse_freeform(self, atom, data):
315 try:
316 fileobj = StringIO(data)
317 mean_length = cdata.uint_be(fileobj.read(4))
318 # skip over 8 bytes of atom name, flags
319 mean = fileobj.read(mean_length - 4)[8:]
320 name_length = cdata.uint_be(fileobj.read(4))
321 name = fileobj.read(name_length - 4)[8:]
322 value_length = cdata.uint_be(fileobj.read(4))
323 # Name, flags, and reserved bytes
324 value = fileobj.read(value_length - 4)[12:]
325 except struct.error:
326 # Some ---- atoms have no data atom, I have no clue why
327 # they actually end up in the file.
328 pass
329 else:
330 self["%s:%s:%s" % (atom.name, mean, name)] = value
331 def __render_freeform(self, key, value):
332 dummy, mean, name = key.split(":", 2)
333 mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean
334 name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name
335 value = struct.pack(">I4s2I", len(value) + 16, "data", 0x1, 0) + value
336 final = mean + name + value
337 return Atom.render("----", mean + name + value)
339 def __parse_pair(self, atom, data):
340 self[atom.name] = struct.unpack(">2H", data[18:22])
341 def __render_pair(self, key, value):
342 track, total = value
343 if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
344 data = struct.pack(">4H", 0, track, total, 0)
345 return self.__render_data(key, 0, data)
346 else:
347 raise M4AMetadataValueError("invalid numeric pair %r" % (value,))
349 def __render_pair_no_trailing(self, key, value):
350 track, total = value
351 if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
352 data = struct.pack(">3H", 0, track, total)
353 return self.__render_data(key, 0, data)
354 else:
355 raise M4AMetadataValueError("invalid numeric pair %r" % (value,))
357 def __parse_genre(self, atom, data):
358 # Translate to a freeform genre.
359 genre = cdata.short_be(data[16:18])
360 if "\xa9gen" not in self:
361 try: self["\xa9gen"] = GENRES[genre - 1]
362 except IndexError: pass
364 def __parse_tempo(self, atom, data):
365 self[atom.name] = cdata.short_be(data[16:18])
366 def __render_tempo(self, key, value):
367 if 0 <= value < 1 << 16:
368 return self.__render_data(key, 0x15, cdata.to_ushort_be(value))
369 else:
370 raise M4AMetadataValueError("invalid short integer %r" % value)
372 def __parse_compilation(self, atom, data):
373 try: self[atom.name] = bool(ord(data[16:17]))
374 except TypeError: self[atom.name] = False
376 def __render_compilation(self, key, value):
377 return self.__render_data(key, 0x15, chr(bool(value)))
379 def __parse_cover(self, atom, data):
380 length, name, imageformat = struct.unpack(">I4sI", data[:12])
381 if name != "data":
382 raise M4AMetadataError(
383 "unexpected atom %r inside 'covr'" % name)
384 if imageformat not in (M4ACover.FORMAT_JPEG, M4ACover.FORMAT_PNG):
385 imageformat = M4ACover.FORMAT_JPEG
386 self[atom.name]= M4ACover(data[16:length], imageformat)
387 def __render_cover(self, key, value):
388 try: imageformat = value.imageformat
389 except AttributeError: imageformat = M4ACover.FORMAT_JPEG
390 data = Atom.render("data", struct.pack(">2I", imageformat, 0) + value)
391 return Atom.render(key, data)
393 def __parse_text(self, atom, data):
394 flags = cdata.uint_be(data[8:12])
395 if flags == 1:
396 self[atom.name] = data[16:].decode('utf-8', 'replace')
397 def __render_text(self, key, value):
398 return self.__render_data(key, 0x1, value.encode('utf-8'))
400 def delete(self, filename):
401 self.clear()
402 self.save(filename)
404 __atoms = {
405 "----": (__parse_freeform, __render_freeform),
406 "trkn": (__parse_pair, __render_pair),
407 "disk": (__parse_pair, __render_pair_no_trailing),
408 "gnre": (__parse_genre, None),
409 "tmpo": (__parse_tempo, __render_tempo),
410 "cpil": (__parse_compilation, __render_compilation),
411 "covr": (__parse_cover, __render_cover),
414 def pprint(self):
415 values = []
416 for key, value in self.iteritems():
417 key = key.decode('latin1')
418 try: values.append("%s=%s" % (key, value))
419 except UnicodeDecodeError:
420 values.append("%s=[%d bytes of data]" % (key, len(value)))
421 return "\n".join(values)
423 class M4AInfo(object):
424 """MPEG-4 stream information.
426 Attributes:
427 bitrate -- bitrate in bits per second, as an int
428 length -- file length in seconds, as a float
431 bitrate = 0
433 def __init__(self, atoms, fileobj):
434 hdlr = atoms["moov.trak.mdia.hdlr"]
435 fileobj.seek(hdlr.offset)
436 if "soun" not in fileobj.read(hdlr.length):
437 raise M4AStreamInfoError("track has no audio data")
439 mdhd = atoms["moov.trak.mdia.mdhd"]
440 fileobj.seek(mdhd.offset)
441 data = fileobj.read(mdhd.length)
442 if ord(data[8]) == 0:
443 offset = 20
444 fmt = ">2I"
445 else:
446 offset = 28
447 fmt = ">IQ"
448 end = offset + struct.calcsize(fmt)
449 unit, length = struct.unpack(fmt, data[offset:end])
450 self.length = float(length) / unit
452 try:
453 atom = atoms["moov.trak.mdia.minf.stbl.stsd"]
454 fileobj.seek(atom.offset)
455 data = fileobj.read(atom.length)
456 self.bitrate = cdata.uint_be(data[-17:-13])
457 except (ValueError, KeyError):
458 # Bitrate values are optional.
459 pass
461 def pprint(self):
462 return "MPEG-4 audio, %.2f seconds, %d bps" % (
463 self.length, self.bitrate)
465 class M4A(FileType):
466 """An MPEG-4 audio file, probably containing AAC.
468 If more than one track is present in the file, the first is used.
469 Only audio ('soun') tracks will be read.
472 _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"]
474 def load(self, filename):
475 self.filename = filename
476 fileobj = open(filename, "rb")
477 try:
478 atoms = Atoms(fileobj)
479 try: self.info = M4AInfo(atoms, fileobj)
480 except StandardError, err:
481 raise M4AStreamInfoError, err, sys.exc_info()[2]
482 try: self.tags = M4ATags(atoms, fileobj)
483 except M4AMetadataError:
484 self.tags = None
485 except StandardError, err:
486 raise M4AMetadataError, err, sys.exc_info()[2]
487 finally:
488 fileobj.close()
490 def add_tags(self):
491 self.tags = M4ATags()
493 def score(filename, fileobj, header):
494 return ("ftyp" in header) + ("mp4" in header)
495 score = staticmethod(score)
497 Open = M4A
499 def delete(filename):
500 """Remove tags from a file."""
501 M4A(filename).delete()