mutagen/id3.py

   1 # id3 support for mutagen
   2 # Copyright (C) 2005  Michael Urman
   3 #
   4 # This program is free software; you can redistribute it and/or modify
   5 # it under the terms of version 2 of the GNU General Public License as
   6 # published by the Free Software Foundation.
   7 #
   8 # $Id: id3.py 4285 2008-09-06 08:01:31Z piman $
   9
  10 """ID3v2 reading and writing.
  11
  12 This is based off of the following references:
  13    http://www.id3.org/id3v2.4.0-structure.txt
  14    http://www.id3.org/id3v2.4.0-frames.txt
  15    http://www.id3.org/id3v2.3.0.html
  16    http://www.id3.org/id3v2-00.txt
  17    http://www.id3.org/id3v1.html
  18
  19 Its largest deviation from the above (versions 2.3 and 2.2) is that it
  20 will not interpret the / characters as a separator, and will almost
  21 always accept null separators to generate multi-valued text frames.
  22
  23 Because ID3 frame structure differs between frame types, each frame is
  24 implemented as a different class (e.g. TIT2 as mutagen.id3.TIT2). Each
  25 frame's documentation contains a list of its attributes.
  26
  27 Since this file's documentation is a little unwieldy, you are probably
  28 interested in the 'ID3' class to start with.
  29 """
  30
  31 __all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete']
  32
  33 import struct
  34
  35 from struct import unpack, pack, error as StructError
  36 from zlib import error as zlibError
  37 from warnings import warn
  38
  39 import mutagen
  40 from mutagen._util import insert_bytes, delete_bytes, DictProxy
  41
  42 class error(Exception): pass
  43 class ID3NoHeaderError(error, ValueError): pass
  44 class ID3BadUnsynchData(error, ValueError): pass
  45 class ID3BadCompressedData(error, ValueError): pass
  46 class ID3TagError(error, ValueError): pass
  47 class ID3UnsupportedVersionError(error, NotImplementedError): pass
  48 class ID3EncryptionUnsupportedError(error, NotImplementedError): pass
  49 class ID3JunkFrameError(error, ValueError): pass
  50
  51 class ID3Warning(error, UserWarning): pass
  52
  53 def is_valid_frame_id(frame_id):
  54     return frame_id.isalnum() and frame_id.isupper()
  55
  56 class ID3(DictProxy, mutagen.Metadata):
  57     """A file with an ID3v2 tag.
  58
  59     Attributes:
  60     version -- ID3 tag version as a tuple
  61     unknown_frames -- raw frame data of any unknown frames found
  62     size -- the total size of the ID3 tag, including the header
  63     """
  64
  65     PEDANTIC = True
  66     version = (2, 4, 0)
  67
  68     filename = None
  69     size = 0
  70     __flags = 0
  71     __readbytes = 0
  72     __crc = None
  73
  74     def __init__(self, *args, **kwargs):
  75         self.unknown_frames = []
  76         super(ID3, self).__init__(*args, **kwargs)
  77
  78     def __fullread(self, size):
  79         try:
  80             if size < 0:
  81                 raise ValueError('Requested bytes (%s) less than zero' % size)
  82             if size > self.__filesize:
  83                 raise EOFError('Requested %#x of %#x (%s)' %
  84                         (long(size), long(self.__filesize), self.filename))
  85         except AttributeError: pass
  86         data = self.__fileobj.read(size)
  87         if len(data) != size: raise EOFError
  88         self.__readbytes += size
  89         return data
  90
  91     def load(self, filename, known_frames=None, translate=True):
  92         """Load tags from a filename.
  93
  94         Keyword arguments:
  95         filename -- filename to load tag data from
  96         known_frames -- dict mapping frame IDs to Frame objects
  97         translate -- Update all tags to ID3v2.4 internally. Mutagen is
  98                      only capable of writing ID3v2.4 tags, so if you
  99                      intend to save, this must be true.
 100
 101         Example of loading a custom frame:
 102             my_frames = dict(mutagen.id3.Frames)
 103             class XMYF(Frame): ...
 104             my_frames["XMYF"] = XMYF
 105             mutagen.id3.ID3(filename, known_frames=my_frames)
 106         """
 107
 108         from os.path import getsize
 109         self.filename = filename
 110         self.__known_frames = known_frames
 111         self.__fileobj = open(filename, 'rb')
 112         self.__filesize = getsize(filename)
 113         try:
 114             try:
 115                 self.__load_header()
 116             except EOFError:
 117                 self.size = 0
 118                 raise ID3NoHeaderError("%s: too small (%d bytes)" %(
 119                     filename, self.__filesize))
 120             except (ID3NoHeaderError, ID3UnsupportedVersionError), err:
 121                 self.size = 0
 122                 import sys
 123                 stack = sys.exc_info()[2]
 124                 try: self.__fileobj.seek(-128, 2)
 125                 except EnvironmentError: raise err, None, stack
 126                 else:
 127                     frames = ParseID3v1(self.__fileobj.read(128))
 128                     if frames is not None:
 129                         self.version = (1, 1)
 130                         map(self.add, frames.values())
 131                     else: raise err, None, stack
 132             else:
 133                 frames = self.__known_frames
 134                 if frames is None:
 135                     if (2,3,0) <= self.version: frames = Frames
 136                     elif (2,2,0) <= self.version: frames = Frames_2_2
 137                 data = self.__fullread(self.size - 10)
 138                 for frame in self.__read_frames(data, frames=frames):
 139                     if isinstance(frame, Frame): self.add(frame)
 140                     else: self.unknown_frames.append(frame)
 141         finally:
 142             self.__fileobj.close()
 143             del self.__fileobj
 144             del self.__filesize
 145             if translate:
 146                 self.update_to_v24()
 147
 148     def getall(self, key):
 149         """Return all frames with a given name (the list may be empty).
 150
 151         This is best explained by examples:
 152             id3.getall('TIT2') == [id3['TIT2']]
 153             id3.getall('TTTT') == []
 154             id3.getall('TXXX') == [TXXX(desc='woo', text='bar'),
 155                                    TXXX(desc='baz', text='quuuux'), ...]
 156
 157         Since this is based on the frame's HashKey, which is
 158         colon-separated, you can use it to do things like
 159         getall('COMM:MusicMatch') or getall('TXXX:QuodLibet:').
 160         """
 161         if key in self: return [self[key]]
 162         else:
 163             key = key + ":"
 164             return [v for s,v in self.items() if s.startswith(key)]
 165
 166     def delall(self, key):
 167         """Delete all tags of a given kind; see getall."""
 168         if key in self: del(self[key])
 169         else:
 170             key = key + ":"
 171             for k in filter(lambda s: s.startswith(key), self.keys()):
 172                 del(self[k])
 173
 174     def setall(self, key, values):
 175         """Delete frames of the given type and add frames in 'values'."""
 176         self.delall(key)
 177         for tag in values:
 178             self[tag.HashKey] = tag
 179
 180     def pprint(self):
 181         """Return tags in a human-readable format.
 182
 183         "Human-readable" is used loosely here. The format is intended
 184         to mirror that used for Vorbis or APEv2 output, e.g.
 185             TIT2=My Title
 186         However, ID3 frames can have multiple keys:
 187             POPM=user@example.org=3 128/255
 188         """
 189         frames = list(map(Frame.pprint, self.values()))
 190         frames.sort()
 191         return "\n".join(frames)
 192
 193     def loaded_frame(self, tag):
 194         """Deprecated; use the add method."""
 195         # turn 2.2 into 2.3/2.4 tags
 196         if len(type(tag).__name__) == 3: tag = type(tag).__base__(tag)
 197         self[tag.HashKey] = tag
 198
 199     # add = loaded_frame (and vice versa) break applications that
 200     # expect to be able to override loaded_frame (e.g. Quod Libet),
 201     # as does making loaded_frame call add.
 202     def add(self, frame):
 203         """Add a frame to the tag."""
 204         return self.loaded_frame(frame)
 205
 206     def __load_header(self):
 207         fn = self.filename
 208         data = self.__fullread(10)
 209         id3, vmaj, vrev, flags, size = unpack('>3sBBB4s', data)
 210         self.__flags = flags
 211         self.size = BitPaddedInt(size) + 10
 212         self.version = (2, vmaj, vrev)
 213
 214         if id3 != 'ID3':
 215             raise ID3NoHeaderError("'%s' doesn't start with an ID3 tag" % fn)
 216         if vmaj not in [2, 3, 4]:
 217             raise ID3UnsupportedVersionError("'%s' ID3v2.%d not supported"
 218                     % (fn, vmaj))
 219
 220         if self.PEDANTIC:
 221             if (2,4,0) <= self.version and (flags & 0x0f):
 222                 raise ValueError("'%s' has invalid flags %#02x" % (fn, flags))
 223             elif (2,3,0) <= self.version < (2,4,0) and (flags & 0x1f):
 224                 raise ValueError("'%s' has invalid flags %#02x" % (fn, flags))
 225
 226         if self.f_extended:
 227             extsize = self.__fullread(4)
 228             if extsize in Frames:
 229                 # Some tagger sets the extended header flag but
 230                 # doesn't write an extended header; in this case, the
 231                 # ID3 data follows immediately. Since no extended
 232                 # header is going to be long enough to actually match
 233                 # a frame, and if it's *not* a frame we're going to be
 234                 # completely lost anyway, this seems to be the most
 235                 # correct check.
 236                 # http://code.google.com/p/quodlibet/issues/detail?id=126
 237                 self.__flags ^= 0x40
 238                 self.__extsize = 0
 239                 self.__fileobj.seek(-4, 1)
 240                 self.__readbytes -= 4
 241             elif self.version >= (2,4,0):
 242                 # "Where the 'Extended header size' is the size of the whole
 243                 # extended header, stored as a 32 bit synchsafe integer."
 244                 self.__extsize = BitPaddedInt(extsize) - 4
 245             else:
 246                 # "Where the 'Extended header size', currently 6 or 10 bytes,
 247                 # excludes itself."
 248                 self.__extsize = unpack('>L', extsize)[0]
 249             if self.__extsize:
 250                 self.__extdata = self.__fullread(self.__extsize)
 251             else:
 252                 self.__extdata = ""
 253
 254     def __determine_bpi(self, data, frames, EMPTY="\x00" * 10):
 255         if self.version < (2, 4, 0):
 256             return int
 257         # have to special case whether to use bitpaddedints here
 258         # spec says to use them, but iTunes has it wrong
 259
 260         # count number of tags found as BitPaddedInt and how far past
 261         o = 0
 262         asbpi = 0
 263         while o < len(data) - 10:
 264             part = data[o:o + 10]
 265             if part == EMPTY:
 266                 bpioff = -((len(data) - o) % 10)
 267                 break
 268             name, size, flags = unpack('>4sLH', part)
 269             size = BitPaddedInt(size)
 270             o += 10 + size
 271             if name in frames:
 272                 asbpi += 1
 273         else:
 274             bpioff = o - len(data)
 275
 276         # count number of tags found as int and how far past
 277         o = 0
 278         asint = 0
 279         while o < len(data) - 10:
 280             part = data[o:o + 10]
 281             if part == EMPTY:
 282                 intoff = -((len(data) - o) % 10)
 283                 break
 284             name, size, flags = unpack('>4sLH', part)
 285             o += 10 + size
 286             if name in frames:
 287                 asint += 1
 288         else:
 289             intoff = o - len(data)
 290
 291         # if more tags as int, or equal and bpi is past and int is not
 292         if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)):
 293             return int
 294         return BitPaddedInt
 295
 296     def __read_frames(self, data, frames):
 297         if self.version < (2,4,0) and self.f_unsynch:
 298             try: data = unsynch.decode(data)
 299             except ValueError: pass
 300
 301         if (2,3,0) <= self.version:
 302             bpi = self.__determine_bpi(data, frames)
 303             while data:
 304                 header = data[:10]
 305                 try: name, size, flags = unpack('>4sLH', header)
 306                 except struct.error: return # not enough header
 307                 if name.strip('\x00') == '': return
 308                 size = bpi(size)
 309                 framedata = data[10:10+size]
 310                 data = data[10+size:]
 311                 if size == 0: continue # drop empty frames
 312                 try: tag = frames[name]
 313                 except KeyError:
 314                     if is_valid_frame_id(name): yield header + framedata
 315                 else:
 316                     try: yield self.__load_framedata(tag, flags, framedata)
 317                     except NotImplementedError: yield header + framedata
 318                     except ID3JunkFrameError: pass
 319
 320         elif (2,2,0) <= self.version:
 321             while data:
 322                 header = data[0:6]
 323                 try: name, size = unpack('>3s3s', header)
 324                 except struct.error: return # not enough header
 325                 size, = struct.unpack('>L', '\x00'+size)
 326                 if name.strip('\x00') == '': return
 327                 framedata = data[6:6+size]
 328                 data = data[6+size:]
 329                 if size == 0: continue # drop empty frames
 330                 try: tag = frames[name]
 331                 except KeyError:
 332                     if is_valid_frame_id(name): yield header + framedata
 333                 else:
 334                     try: yield self.__load_framedata(tag, 0, framedata)
 335                     except NotImplementedError: yield header + framedata
 336                     except ID3JunkFrameError: pass
 337
 338     def __load_framedata(self, tag, flags, framedata):
 339         return tag.fromData(self, flags, framedata)
 340
 341     f_unsynch = property(lambda s: bool(s.__flags & 0x80))
 342     f_extended = property(lambda s: bool(s.__flags & 0x40))
 343     f_experimental = property(lambda s: bool(s.__flags & 0x20))
 344     f_footer = property(lambda s: bool(s.__flags & 0x10))
 345
 346     #f_crc = property(lambda s: bool(s.__extflags & 0x8000))
 347
 348     def save(self, filename=None, v1=1):
 349         """Save changes to a file.
 350
 351         If no filename is given, the one most recently loaded is used.
 352
 353         Keyword arguments:
 354         v1 -- if 0, ID3v1 tags will be removed
 355               if 1, ID3v1 tags will be updated but not added
 356               if 2, ID3v1 tags will be created and/or updated
 357
 358         The lack of a way to update only an ID3v1 tag is intentional.
 359         """
 360
 361         # Sort frames by 'importance'
 362         order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"]
 363         order = dict(zip(order, range(len(order))))
 364         last = len(order)
 365         frames = self.items()
 366         frames.sort(lambda a, b: cmp(order.get(a[0][:4], last),
 367                                      order.get(b[0][:4], last)))
 368
 369         framedata = [self.__save_frame(frame) for (key, frame) in frames]
 370         framedata.extend([data for data in self.unknown_frames
 371                 if len(data) > 10])
 372         if not framedata:
 373             try:
 374                 self.delete(filename)
 375             except EnvironmentError, err:
 376                 from errno import ENOENT
 377                 if err.errno != ENOENT: raise
 378             return
 379
 380         framedata = ''.join(framedata)
 381         framesize = len(framedata)
 382
 383         if filename is None: filename = self.filename
 384         try: f = open(filename, 'rb+')
 385         except IOError, err:
 386             from errno import ENOENT
 387             if err.errno != ENOENT: raise
 388             f = open(filename, 'ab') # create, then reopen
 389             f = open(filename, 'rb+')
 390         try:
 391             idata = f.read(10)
 392             try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata)
 393             except struct.error: id3, insize = '', 0
 394             insize = BitPaddedInt(insize)
 395             if id3 != 'ID3': insize = -10
 396
 397             if insize >= framesize: outsize = insize
 398             else: outsize = (framesize + 1023) & ~0x3FF
 399             framedata += '\x00' * (outsize - framesize)
 400
 401             framesize = BitPaddedInt.to_str(outsize, width=4)
 402             flags = 0
 403             header = pack('>3sBBB4s', 'ID3', 4, 0, flags, framesize)
 404             data = header + framedata
 405
 406             if (insize < outsize):
 407                 insert_bytes(f, outsize-insize, insize+10)
 408             f.seek(0)
 409             f.write(data)
 410
 411             try:
 412                 f.seek(-128, 2)
 413             except IOError, err:
 414                 # If the file is too small, that's OK - it just means
 415                 # we're certain it doesn't have a v1 tag.
 416                 from errno import EINVAL
 417                 if err.errno != EINVAL:
 418                     # If we failed to see for some other reason, bail out.
 419                     raise
 420                 # Since we're sure this isn't a v1 tag, don't read it.
 421                 f.seek(0, 2)
 422
 423             data = f.read(128)
 424             try:
 425                 idx = data.index("TAG")
 426             except ValueError:
 427                 offset = 0
 428                 has_v1 = False
 429             else:
 430                 offset = idx - len(data)
 431                 has_v1 = True
 432
 433             f.seek(offset, 2)
 434             if v1 == 1 and has_v1 or v1 == 2:
 435                 f.write(MakeID3v1(self))
 436             else:
 437                 f.truncate()
 438
 439         finally:
 440             f.close()
 441
 442     def delete(self, filename=None, delete_v1=True, delete_v2=True):
 443         """Remove tags from a file.
 444
 445         If no filename is given, the one most recently loaded is used.
 446
 447         Keyword arguments:
 448         delete_v1 -- delete any ID3v1 tag
 449         delete_v2 -- delete any ID3v2 tag
 450         """
 451         if filename is None:
 452             filename = self.filename
 453         delete(filename, delete_v1, delete_v2)
 454         self.clear()
 455
 456     def __save_frame(self, frame):
 457         flags = 0
 458         if self.PEDANTIC and isinstance(frame, TextFrame):
 459             if len(str(frame)) == 0: return ''
 460         framedata = frame._writeData()
 461         usize = len(framedata)
 462         if usize > 2048:
 463             # Disabled as this causes iTunes and other programs
 464             # to fail to find these frames, which usually includes
 465             # e.g. APIC.
 466             #framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib')
 467             #flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN
 468             pass
 469         datasize = BitPaddedInt.to_str(len(framedata), width=4)
 470         header = pack('>4s4sH', type(frame).__name__, datasize, flags)
 471         return header + framedata
 472
 473     def update_to_v24(self):
 474         """Convert older tags into an ID3v2.4 tag.
 475
 476         This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to
 477         TDRC). If you intend to save tags, you must call this function
 478         at some point; it is called by default when loading the tag.
 479         """
 480
 481         if self.version < (2,3,0): del self.unknown_frames[:]
 482         # unsafe to write
 483
 484         # TDAT, TYER, and TIME have been turned into TDRC.
 485         try:
 486             if str(self.get("TYER", "")).strip("\x00"):
 487                 date = str(self.pop("TYER"))
 488                 if str(self.get("TDAT", "")).strip("\x00"):
 489                     dat = str(self.pop("TDAT"))
 490                     date = "%s-%s-%s" % (date, dat[2:], dat[:2])
 491                     if str(self.get("TIME", "")).strip("\x00"):
 492                         time = str(self.pop("TIME"))
 493                         date += "T%s:%s:00" % (time[:2], time[2:])
 494                 if "TDRC" not in self:
 495                     self.add(TDRC(encoding=0, text=date))
 496         except UnicodeDecodeError:
 497             # Old ID3 tags have *lots* of Unicode problems, so if TYER
 498             # is bad, just chuck the frames.
 499             pass
 500
 501         # TORY can be the first part of a TDOR.
 502         if "TORY" in self:
 503             f = self.pop("TORY")
 504             if "TDOR" not in self:
 505                 try:
 506                     self.add(TDOR(encoding=0, text=str(f)))
 507                 except UnicodeDecodeError:
 508                     pass
 509
 510         # IPLS is now TIPL.
 511         if "IPLS" in self:
 512             f = self.pop("IPLS")
 513             if "TIPL" not in self:
 514                 self.add(TIPL(encoding=f.encoding, people=f.people))
 515
 516         if "TCON" in self:
 517             # Get rid of "(xx)Foobr" format.
 518             self["TCON"].genres = self["TCON"].genres
 519
 520         if self.version < (2, 3):
 521             # ID3v2.2 PIC frames are slightly different.
 522             pics = self.getall("APIC")
 523             mimes = { "PNG": "image/png", "JPG": "image/jpeg" }
 524             self.delall("APIC")
 525             for pic in pics:
 526                 newpic = APIC(
 527                     encoding=pic.encoding, mime=mimes.get(pic.mime, pic.mime),
 528                     type=pic.type, desc=pic.desc, data=pic.data)
 529                 self.add(newpic)
 530
 531             # ID3v2.2 LNK frames are just way too different to upgrade.
 532             self.delall("LINK")
 533
 534         # These can't be trivially translated to any ID3v2.4 tags, or
 535         # should have been removed already.
 536         for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME", "CRM"]:
 537             if key in self: del(self[key])
 538
 539 def delete(filename, delete_v1=True, delete_v2=True):
 540     """Remove tags from a file.
 541
 542     Keyword arguments:
 543     delete_v1 -- delete any ID3v1 tag
 544     delete_v2 -- delete any ID3v2 tag
 545     """
 546
 547     f = open(filename, 'rb+')
 548
 549     if delete_v1:
 550         try:
 551             f.seek(-128, 2)
 552         except IOError: pass
 553         else:
 554             if f.read(3) == "TAG":
 555                 f.seek(-128, 2)
 556                 f.truncate()
 557
 558     # technically an insize=0 tag is invalid, but we delete it anyway
 559     # (primarily because we used to write it)
 560     if delete_v2:
 561         f.seek(0, 0)
 562         idata = f.read(10)
 563         try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata)
 564         except struct.error: id3, insize = '', -1
 565         insize = BitPaddedInt(insize)
 566         if id3 == 'ID3' and insize >= 0:
 567             delete_bytes(f, insize + 10, 0)
 568
 569 class BitPaddedInt(int):
 570     def __new__(cls, value, bits=7, bigendian=True):
 571         "Strips 8-bits bits out of every byte"
 572         mask = (1<<(bits))-1
 573         if isinstance(value, (int, long)):
 574             bytes = []
 575             while value:
 576                 bytes.append(value & ((1<<bits)-1))
 577                 value = value >> 8
 578         if isinstance(value, str):
 579             bytes = [ord(byte) & mask for byte in value]
 580             if bigendian: bytes.reverse()
 581         numeric_value = 0
 582         for shift, byte in zip(range(0, len(bytes)*bits, bits), bytes):
 583             numeric_value += byte << shift
 584         if isinstance(numeric_value, long):
 585             self = long.__new__(BitPaddedLong, numeric_value)
 586         else:
 587             self = int.__new__(BitPaddedInt, numeric_value)
 588         self.bits = bits
 589         self.bigendian = bigendian
 590         return self
 591
 592     def as_str(value, bits=7, bigendian=True, width=4):
 593         bits = getattr(value, 'bits', bits)
 594         bigendian = getattr(value, 'bigendian', bigendian)
 595         value = int(value)
 596         mask = (1<<bits)-1
 597         bytes = []
 598         while value:
 599             bytes.append(value & mask)
 600             value = value >> bits
 601         # PCNT and POPM use growing integers of at least 4 bytes as counters.
 602         if width == -1: width = max(4, len(bytes))
 603         if len(bytes) > width:
 604             raise ValueError, 'Value too wide (%d bytes)' % len(bytes)
 605         else: bytes.extend([0] * (width-len(bytes)))
 606         if bigendian: bytes.reverse()
 607         return ''.join(map(chr, bytes))
 608     to_str = staticmethod(as_str)
 609
 610 class BitPaddedLong(long):
 611     def as_str(value, bits=7, bigendian=True, width=4):
 612         return BitPaddedInt.to_str(value, bits, bigendian, width)
 613     to_str = staticmethod(as_str)
 614
 615 class unsynch(object):
 616     def decode(value):
 617         output = []
 618         safe = True
 619         append = output.append
 620         for val in value:
 621             if safe:
 622                 append(val)
 623                 safe = val != '\xFF'
 624             else:
 625                 if val >= '\xE0': raise ValueError('invalid sync-safe string')
 626                 elif val != '\x00': append(val)
 627                 safe = True
 628         if not safe: raise ValueError('string ended unsafe')
 629         return ''.join(output)
 630     decode = staticmethod(decode)
 631
 632     def encode(value):
 633         output = []
 634         safe = True
 635         append = output.append
 636         for val in value:
 637             if safe:
 638                 append(val)
 639                 if val == '\xFF': safe = False
 640             elif val == '\x00' or val >= '\xE0':
 641                 append('\x00')
 642                 append(val)
 643                 safe = val != '\xFF'
 644             else:
 645                 append(val)
 646                 safe = True
 647         if not safe: append('\x00')
 648         return ''.join(output)
 649     encode = staticmethod(encode)
 650
 651 class Spec(object):
 652     def __init__(self, name): self.name = name
 653     def __hash__(self): raise TypeError("Spec objects are unhashable")
 654
 655 class ByteSpec(Spec):
 656     def read(self, frame, data): return ord(data[0]), data[1:]
 657     def write(self, frame, value): return chr(value)
 658     def validate(self, frame, value): return value
 659
 660 class IntegerSpec(Spec):
 661     def read(self, frame, data):
 662         return int(BitPaddedInt(data, bits=8)), ''
 663     def write(self, frame, value):
 664         return BitPaddedInt.to_str(value, bits=8, width=-1)
 665     def validate(self, frame, value):
 666         return value
 667
 668 class SizedIntegerSpec(Spec):
 669     def __init__(self, name, size):
 670         self.name, self.__sz = name, size
 671     def read(self, frame, data):
 672         return int(BitPaddedInt(data[:self.__sz], bits=8)), data[self.__sz:]
 673     def write(self, frame, value):
 674         return BitPaddedInt.to_str(value, bits=8, width=self.__sz)
 675     def validate(self, frame, value):
 676         return value
 677
 678 class EncodingSpec(ByteSpec):
 679     def read(self, frame, data):
 680         enc, data = super(EncodingSpec, self).read(frame, data)
 681         if enc < 16: return enc, data
 682         else: return 0, chr(enc)+data
 683
 684     def validate(self, frame, value):
 685         if 0 <= value <= 3: return value
 686         if value is None: return None
 687         raise ValueError, 'Invalid Encoding: %r' % value
 688
 689 class StringSpec(Spec):
 690     def __init__(self, name, length):
 691         super(StringSpec, self).__init__(name)
 692         self.len = length
 693     def read(s, frame, data): return data[:s.len], data[s.len:]
 694     def write(s, frame, value):
 695         if value is None: return '\x00' * s.len
 696         else: return (str(value) + '\x00' * s.len)[:s.len]
 697     def validate(s, frame, value):
 698         if value is None: return None
 699         if isinstance(value, basestring) and len(value) == s.len: return value
 700         raise ValueError, 'Invalid StringSpec[%d] data: %r' % (s.len, value)
 701
 702 class BinaryDataSpec(Spec):
 703     def read(self, frame, data): return data, ''
 704     def write(self, frame, value): return str(value)
 705     def validate(self, frame, value): return str(value)
 706
 707 class EncodedTextSpec(Spec):
 708     # Okay, seriously. This is private and defined explicitly and
 709     # completely by the ID3 specification. You can't just add
 710     # encodings here however you want.
 711     _encodings = ( ('latin1', '\x00'), ('utf16', '\x00\x00'),
 712                    ('utf_16_be', '\x00\x00'), ('utf8', '\x00') )
 713
 714     def read(self, frame, data):
 715         enc, term = self._encodings[frame.encoding]
 716         ret = ''
 717         if len(term) == 1:
 718             if term in data:
 719                 data, ret = data.split(term, 1)
 720         else:
 721             offset = -1
 722             try:
 723                 while True:
 724                     offset = data.index(term, offset+1)
 725                     if offset & 1: continue
 726                     data, ret = data[0:offset], data[offset+2:]; break
 727             except ValueError: pass
 728
 729         if len(data) < len(term): return u'', ret
 730         return data.decode(enc), ret
 731
 732     def write(self, frame, value):
 733         enc, term = self._encodings[frame.encoding]
 734         return value.encode(enc) + term
 735
 736     def validate(self, frame, value): return unicode(value)
 737
 738 class MultiSpec(Spec):
 739     def __init__(self, name, *specs, **kw):
 740         super(MultiSpec, self).__init__(name)
 741         self.specs = specs
 742         self.sep = kw.get('sep')
 743
 744     def read(self, frame, data):
 745         values = []
 746         while data:
 747             record = []
 748             for spec in self.specs:
 749                 value, data = spec.read(frame, data)
 750                 record.append(value)
 751             if len(self.specs) != 1: values.append(record)
 752             else: values.append(record[0])
 753         return values, data
 754
 755     def write(self, frame, value):
 756         data = []
 757         if len(self.specs) == 1:
 758             for v in value:
 759                 data.append(self.specs[0].write(frame, v))
 760         else:
 761             for record in value:
 762                 for v, s in zip(record, self.specs):
 763                     data.append(s.write(frame, v))
 764         return ''.join(data)
 765
 766     def validate(self, frame, value):
 767         if value is None: return []
 768         if self.sep and isinstance(value, basestring):
 769             value = value.split(self.sep)
 770         if isinstance(value, list):
 771             if len(self.specs) == 1:
 772                 return [self.specs[0].validate(frame, v) for v in value]
 773             else:
 774                 return [
 775                     [s.validate(frame, v) for (v,s) in zip(val, self.specs)]
 776                     for val in value ]
 777         raise ValueError, 'Invalid MultiSpec data: %r' % value
 778
 779 class EncodedNumericTextSpec(EncodedTextSpec): pass
 780 class EncodedNumericPartTextSpec(EncodedTextSpec): pass
 781
 782 class Latin1TextSpec(EncodedTextSpec):
 783     def read(self, frame, data):
 784         if '\x00' in data: data, ret = data.split('\x00',1)
 785         else: ret = ''
 786         return data.decode('latin1'), ret
 787
 788     def write(self, data, value):
 789         return value.encode('latin1') + '\x00'
 790
 791     def validate(self, frame, value): return unicode(value)
 792
 793 class ID3TimeStamp(object):
 794     """A time stamp in ID3v2 format.
 795
 796     This is a restricted form of the ISO 8601 standard; time stamps
 797     take the form of:
 798         YYYY-MM-DD HH:MM:SS
 799     Or some partial form (YYYY-MM-DD HH, YYYY, etc.).
 800
 801     The 'text' attribute contains the raw text data of the time stamp.
 802     """
 803
 804     import re
 805     def __init__(self, text):
 806         if isinstance(text, ID3TimeStamp): text = text.text
 807         self.text = text
 808
 809     __formats = ['%04d'] + ['%02d'] * 5
 810     __seps = ['-', '-', ' ', ':', ':', 'x']
 811     def get_text(self):
 812         parts = [self.year, self.month, self.day,
 813                 self.hour, self.minute, self.second]
 814         pieces = []
 815         for i, part in enumerate(iter(iter(parts).next, None)):
 816             pieces.append(self.__formats[i]%part + self.__seps[i])
 817         return u''.join(pieces)[:-1]
 818
 819     def set_text(self, text, splitre=re.compile('[-T:/.]|\s+')):
 820         year, month, day, hour, minute, second = \
 821                 splitre.split(text + ':::::')[:6]
 822         for a in 'year month day hour minute second'.split():
 823             try: v = int(locals()[a])
 824             except ValueError: v = None
 825             setattr(self, a, v)
 826
 827     text = property(get_text, set_text, doc="ID3v2.4 date and time.")
 828
 829     def __str__(self): return self.text
 830     def __repr__(self): return repr(self.text)
 831     def __cmp__(self, other): return cmp(self.text, other.text)
 832     __hash__ = object.__hash__
 833     def encode(self, *args): return self.text.encode(*args)
 834
 835 class TimeStampSpec(EncodedTextSpec):
 836     def read(self, frame, data):
 837         value, data = super(TimeStampSpec, self).read(frame, data)
 838         return self.validate(frame, value), data
 839
 840     def write(self, frame, data):
 841         return super(TimeStampSpec, self).write(frame,
 842                 data.text.replace(' ', 'T'))
 843
 844     def validate(self, frame, value):
 845         try: return ID3TimeStamp(value)
 846         except TypeError: raise ValueError, "Invalid ID3TimeStamp: %r" % value
 847
 848 class ChannelSpec(ByteSpec):
 849     (OTHER, MASTER, FRONTRIGHT, FRONTLEFT, BACKRIGHT, BACKLEFT, FRONTCENTRE,
 850      BACKCENTRE, SUBWOOFER) = range(9)
 851
 852 class VolumeAdjustmentSpec(Spec):
 853     def read(self, frame, data):
 854         value, = unpack('>h', data[0:2])
 855         return value/512.0, data[2:]
 856
 857     def write(self, frame, value):
 858         return pack('>h', int(round(value * 512)))
 859
 860     def validate(self, frame, value): return value
 861
 862 class VolumePeakSpec(Spec):
 863     def read(self, frame, data):
 864         # http://bugs.xmms.org/attachment.cgi?id=113&action=view
 865         peak = 0
 866         bits = ord(data[0])
 867         bytes = min(4, (bits + 7) >> 3)
 868         # not enough frame data
 869         if bytes + 1 > len(data): raise ID3JunkFrameError
 870         shift = ((8 - (bits & 7)) & 7) + (4 - bytes) * 8
 871         for i in range(1, bytes+1):
 872             peak *= 256
 873             peak += ord(data[i])
 874         peak *= 2**shift
 875         return (float(peak) / (2**31-1)), data[1+bytes:]
 876
 877     def write(self, frame, value):
 878         # always write as 16 bits for sanity.
 879         return "\x10" + pack('>H', int(round(value * 32768)))
 880
 881     def validate(self, frame, value): return value
 882
 883 class SynchronizedTextSpec(EncodedTextSpec):
 884     def read(self, frame, data):
 885         texts = []
 886         encoding, term = self._encodings[frame.encoding]
 887         while data:
 888             l = len(term)
 889             try:
 890                 value_idx = data.index(term)
 891             except ValueError:
 892                 raise ID3JunkFrameError
 893             value = data[:value_idx].decode(encoding)
 894             time, = struct.unpack(">I", data[value_idx+l:value_idx+l+4])
 895             texts.append((value, time))
 896             data = data[value_idx+l+4:]
 897         return texts, ""
 898
 899     def write(self, frame, value):
 900         data = []
 901         encoding, term = self._encodings[frame.encoding]
 902         for text, time in frame.text:
 903             text = text.encode(encoding) + term
 904             data.append(text + struct.pack(">I", time))
 905         return "".join(data)
 906
 907     def validate(self, frame, value):
 908         return value
 909
 910 class KeyEventSpec(Spec):
 911     def read(self, frame, data):
 912         events = []
 913         while len(data) >= 5:
 914             events.append(struct.unpack(">bI", data[:5]))
 915             data = data[5:]
 916         return events, data
 917
 918     def write(self, frame, value):
 919         return "".join([struct.pack(">bI", *event) for event in value])
 920
 921     def validate(self, frame, value):
 922         return value
 923
 924 class VolumeAdjustmentsSpec(Spec):
 925     # Not to be confused with VolumeAdjustmentSpec.
 926     def read(self, frame, data):
 927         adjustments = {}
 928         while len(data) >= 4:
 929             freq, adj = struct.unpack(">Hh", data[:4])
 930             data = data[4:]
 931             freq /= 2.0
 932             adj /= 512.0
 933             adjustments[freq] = adj
 934         adjustments = adjustments.items()
 935         adjustments.sort()
 936         return adjustments, data
 937
 938     def write(self, frame, value):
 939         value.sort()
 940         return "".join([struct.pack(">Hh", int(freq * 2), int(adj * 512))
 941                         for (freq, adj) in value])
 942
 943     def validate(self, frame, value):
 944         return value
 945
 946 class ASPIIndexSpec(Spec):
 947     def read(self, frame, data):
 948         if frame.b == 16:
 949             format = "H"
 950             size = 2
 951         elif frame.b == 8:
 952             format = "B"
 953             size = 1
 954         else:
 955             warn("invalid bit count in ASPI (%d)" % frame.b, ID3Warning)
 956             return [], data
 957
 958         indexes = data[:frame.N * size]
 959         data = data[frame.N * size:]
 960         return list(struct.unpack(">" + format * frame.N, indexes)), data
 961
 962     def write(self, frame, values):
 963         if frame.b == 16: format = "H"
 964         elif frame.b == 8: format = "B"
 965         else: raise ValueError("frame.b must be 8 or 16")
 966         return struct.pack(">" + format * frame.N, *values)
 967
 968     def validate(self, frame, values):
 969         return values
 970
 971 class Frame(object):
 972     """Fundamental unit of ID3 data.
 973
 974     ID3 tags are split into frames. Each frame has a potentially
 975     different structure, and so this base class is not very featureful.
 976     """
 977
 978     FLAG23_ALTERTAG     = 0x8000
 979     FLAG23_ALTERFILE    = 0x4000
 980     FLAG23_READONLY     = 0x2000
 981     FLAG23_COMPRESS     = 0x0080
 982     FLAG23_ENCRYPT      = 0x0040
 983     FLAG23_GROUP        = 0x0020
 984
 985     FLAG24_ALTERTAG     = 0x4000
 986     FLAG24_ALTERFILE    = 0x2000
 987     FLAG24_READONLY     = 0x1000
 988     FLAG24_GROUPID      = 0x0040
 989     FLAG24_COMPRESS     = 0x0008
 990     FLAG24_ENCRYPT      = 0x0004
 991     FLAG24_UNSYNCH      = 0x0002
 992     FLAG24_DATALEN      = 0x0001
 993
 994     _framespec = []
 995     def __init__(self, *args, **kwargs):
 996         if len(args)==1 and len(kwargs)==0 and isinstance(args[0], type(self)):
 997             other = args[0]
 998             for checker in self._framespec:
 999                 val = checker.validate(self, getattr(other, checker.name))
1000                 setattr(self, checker.name, val)
1001         else:
1002             for checker, val in zip(self._framespec, args):
1003                 setattr(self, checker.name, checker.validate(self, val))
1004             for checker in self._framespec[len(args):]:
1005                 validated = checker.validate(
1006                     self, kwargs.get(checker.name, None))
1007                 setattr(self, checker.name, validated)
1008
1009     HashKey = property(
1010         lambda s: s.FrameID,
1011         doc="an internal key used to ensure frame uniqueness in a tag")
1012     FrameID = property(
1013         lambda s: type(s).__name__,
1014         doc="ID3v2 three or four character frame ID")
1015
1016     def __repr__(self):
1017         """Python representation of a frame.
1018
1019         The string returned is a valid Python expression to construct
1020         a copy of this frame.
1021         """
1022         kw = []
1023         for attr in self._framespec:
1024             kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
1025         return '%s(%s)' % (type(self).__name__, ', '.join(kw))
1026
1027     def _readData(self, data):
1028         odata = data
1029         for reader in self._framespec:
1030             if len(data):
1031                 try: value, data = reader.read(self, data)
1032                 except UnicodeDecodeError:
1033                     raise ID3JunkFrameError
1034             else: raise ID3JunkFrameError
1035             setattr(self, reader.name, value)
1036         if data.strip('\x00'):
1037             warn('Leftover data: %s: %r (from %r)' % (
1038                     type(self).__name__, data, odata),
1039                     ID3Warning)
1040
1041     def _writeData(self):
1042         data = []
1043         for writer in self._framespec:
1044             data.append(writer.write(self, getattr(self, writer.name)))
1045         return ''.join(data)
1046
1047     def pprint(self):
1048         """Return a human-readable representation of the frame."""
1049         return "%s=%s" % (type(self).__name__, self._pprint())
1050
1051     def _pprint(self):
1052         return "[unrepresentable data]"
1053
1054     def fromData(cls, id3, tflags, data):
1055         """Construct this ID3 frame from raw string data."""
1056
1057         if (2,4,0) <= id3.version:
1058             if tflags & (Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN):
1059                 # The data length int is syncsafe in 2.4 (but not 2.3).
1060                 # However, we don't actually need the data length int,
1061                 # except to work around a QL 0.12 bug, and in that case
1062                 # all we need are the raw bytes.
1063                 datalen_bytes = data[:4]
1064                 data = data[4:]
1065             if tflags & Frame.FLAG24_UNSYNCH or id3.f_unsynch:
1066                 try: data = unsynch.decode(data)
1067                 except ValueError, err:
1068                     if id3.PEDANTIC:
1069                         raise ID3BadUnsynchData, '%s: %r' % (err, data)
1070             if tflags & Frame.FLAG24_ENCRYPT:
1071                 raise ID3EncryptionUnsupportedError
1072             if tflags & Frame.FLAG24_COMPRESS:
1073                 try: data = data.decode('zlib')
1074                 except zlibError, err:
1075                     # the initial mutagen that went out with QL 0.12 did not
1076                     # write the 4 bytes of uncompressed size. Compensate.
1077                     data = datalen_bytes + data
1078                     try: data = data.decode('zlib')
1079                     except zlibError, err:
1080                         if id3.PEDANTIC:
1081                             raise ID3BadCompressedData, '%s: %r' % (err, data)
1082
1083         elif (2,3,0) <= id3.version:
1084             if tflags & Frame.FLAG23_COMPRESS:
1085                 usize, = unpack('>L', data[:4])
1086                 data = data[4:]
1087             if tflags & Frame.FLAG23_ENCRYPT:
1088                 raise ID3EncryptionUnsupportedError
1089             if tflags & Frame.FLAG23_COMPRESS:
1090                 try: data = data.decode('zlib')
1091                 except zlibError, err:
1092                     if id3.PEDANTIC:
1093                         raise ID3BadCompressedData, '%s: %r' % (err, data)
1094
1095         frame = cls()
1096         frame._rawdata = data
1097         frame._flags = tflags
1098         frame._readData(data)
1099         return frame
1100     fromData = classmethod(fromData)
1101
1102     def __hash__(self):
1103         raise TypeError("Frame objects are unhashable")
1104
1105 class FrameOpt(Frame):
1106     """A frame with optional parts.
1107
1108     Some ID3 frames have optional data; this class extends Frame to
1109     provide support for those parts.
1110     """
1111     _optionalspec = []
1112
1113     def __init__(self, *args, **kwargs):
1114         super(FrameOpt, self).__init__(*args, **kwargs)
1115         for spec in self._optionalspec:
1116             if spec.name in kwargs:
1117                 validated = spec.validate(self, kwargs[spec.name])
1118                 setattr(self, spec.name, validated)
1119             else: break
1120
1121     def _readData(self, data):
1122         odata = data
1123         for reader in self._framespec:
1124             if len(data): value, data = reader.read(self, data)
1125             else: raise ID3JunkFrameError
1126             setattr(self, reader.name, value)
1127         if data:
1128             for reader in self._optionalspec:
1129                 if len(data): value, data = reader.read(self, data)
1130                 else: break
1131                 setattr(self, reader.name, value)
1132         if data.strip('\x00'):
1133             warn('Leftover data: %s: %r (from %r)' % (
1134                     type(self).__name__, data, odata),
1135                     ID3Warning)
1136
1137     def _writeData(self):
1138         data = []
1139         for writer in self._framespec:
1140             data.append(writer.write(self, getattr(self, writer.name)))
1141         for writer in self._optionalspec:
1142             try: data.append(writer.write(self, getattr(self, writer.name)))
1143             except AttributeError: break
1144         return ''.join(data)
1145
1146     def __repr__(self):
1147         kw = []
1148         for attr in self._framespec:
1149             kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
1150         for attr in self._optionalspec:
1151             if hasattr(self, attr.name):
1152                 kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
1153         return '%s(%s)' % (type(self).__name__, ', '.join(kw))
1154
1155
1156 class TextFrame(Frame):
1157     """Text strings.
1158
1159     Text frames support casts to unicode or str objects, as well as
1160     list-like indexing, extend, and append.
1161
1162     Iterating over a TextFrame iterates over its strings, not its
1163     characters.
1164
1165     Text frames have a 'text' attribute which is the list of strings,
1166     and an 'encoding' attribute; 0 for ISO-8859 1, 1 UTF-16, 2 for
1167     UTF-16BE, and 3 for UTF-8. If you don't want to worry about
1168     encodings, just set it to 3.
1169     """
1170
1171     _framespec = [ EncodingSpec('encoding'),
1172         MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ]
1173     def __str__(self): return self.__unicode__().encode('utf-8')
1174     def __unicode__(self): return u'\u0000'.join(self.text)
1175     def __eq__(self, other):
1176         if isinstance(other, str): return str(self) == other
1177         elif isinstance(other, unicode): return unicode(self) == other
1178         return self.text == other
1179     __hash__ = Frame.__hash__
1180     def __getitem__(self, item): return self.text[item]
1181     def __iter__(self): return iter(self.text)
1182     def append(self, value): return self.text.append(value)
1183     def extend(self, value): return self.text.extend(value)
1184     def _pprint(self): return " / ".join(self.text)
1185
1186 class NumericTextFrame(TextFrame):
1187     """Numerical text strings.
1188
1189     The numeric value of these frames can be gotten with unary plus, e.g.
1190         frame = TLEN('12345')
1191         length = +frame
1192     """
1193
1194     _framespec = [ EncodingSpec('encoding'),
1195         MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000') ]
1196
1197     def __pos__(self):
1198         """Return the numerical value of the string."""
1199         return int(self.text[0])
1200
1201 class NumericPartTextFrame(TextFrame):
1202     """Multivalue numerical text strings.
1203
1204     These strings indicate 'part (e.g. track) X of Y', and unary plus
1205     returns the first value:
1206         frame = TRCK('4/15')
1207         track = +frame # track == 4
1208     """
1209
1210     _framespec = [ EncodingSpec('encoding'),
1211         MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000') ]
1212     def __pos__(self):
1213         return int(self.text[0].split("/")[0])
1214
1215 class TimeStampTextFrame(TextFrame):
1216     """A list of time stamps.
1217
1218     The 'text' attribute in this frame is a list of ID3TimeStamp
1219     objects, not a list of strings.
1220     """
1221
1222     _framespec = [ EncodingSpec('encoding'),
1223         MultiSpec('text', TimeStampSpec('stamp'), sep=u',') ]
1224     def __str__(self): return self.__unicode__().encode('utf-8')
1225     def __unicode__(self): return ','.join([stamp.text for stamp in self.text])
1226     def _pprint(self):
1227         return " / ".join([stamp.text for stamp in self.text])
1228
1229 class UrlFrame(Frame):
1230     """A frame containing a URL string.
1231
1232     The ID3 specification is silent about IRIs and normalized URL
1233     forms. Mutagen assumes all URLs in files are encoded as Latin 1,
1234     but string conversion of this frame returns a UTF-8 representation
1235     for compatibility with other string conversions.
1236
1237     The only sane way to handle URLs in MP3s is to restrict them to
1238     ASCII.
1239     """
1240
1241     _framespec = [ Latin1TextSpec('url') ]
1242     def __str__(self): return self.url.encode('utf-8')
1243     def __unicode__(self): return self.url
1244     def __eq__(self, other): return self.url == other
1245     __hash__ = Frame.__hash__
1246     def _pprint(self): return self.url
1247
1248 class UrlFrameU(UrlFrame):
1249     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.url))
1250
1251 class TALB(TextFrame): "Album"
1252 class TBPM(NumericTextFrame): "Beats per minute"
1253 class TCOM(TextFrame): "Composer"
1254
1255 class TCON(TextFrame):
1256     """Content type (Genre)
1257
1258     ID3 has several ways genres can be represented; for convenience,
1259     use the 'genres' property rather than the 'text' attribute.
1260     """
1261
1262     from mutagen._constants import GENRES
1263
1264     def __get_genres(self):
1265         genres = []
1266         import re
1267         genre_re = re.compile(r"((?:\((?P<id>[0-9]+|RX|CR)\))*)(?P<str>.+)?")
1268         for value in self.text:
1269             if value.isdigit():
1270                 try: genres.append(self.GENRES[int(value)])
1271                 except IndexError: genres.append(u"Unknown")
1272             elif value == "CR": genres.append(u"Cover")
1273             elif value == "RX": genres.append(u"Remix")
1274             elif value:
1275                 newgenres = []
1276                 genreid, dummy, genrename = genre_re.match(value).groups()
1277
1278                 if genreid:
1279                     for gid in genreid[1:-1].split(")("):
1280                         if gid.isdigit() and int(gid) < len(self.GENRES):
1281                             gid = unicode(self.GENRES[int(gid)])
1282                             newgenres.append(gid)
1283                         elif gid == "CR": newgenres.append(u"Cover")
1284                         elif gid == "RX": newgenres.append(u"Remix")
1285                         else: newgenres.append(u"Unknown")
1286
1287                 if genrename:
1288                     # "Unescaping" the first parenthesis
1289                     if genrename.startswith("(("): genrename = genrename[1:]
1290                     if genrename not in newgenres: newgenres.append(genrename)
1291
1292                 genres.extend(newgenres)
1293
1294         return genres
1295
1296     def __set_genres(self, genres):
1297         if isinstance(genres, basestring): genres = [genres]
1298         self.text = map(self.__decode, genres)
1299
1300     def __decode(self, value):
1301         if isinstance(value, str):
1302             enc = EncodedTextSpec._encodings[self.encoding][0]
1303             return value.decode(enc)
1304         else: return value
1305
1306     genres = property(__get_genres, __set_genres, None,
1307                       "A list of genres parsed from the raw text data.")
1308
1309     def _pprint(self):
1310         return " / ".join(self.genres)
1311
1312 class TCOP(TextFrame): "Copyright (c)"
1313 class TCMP(NumericTextFrame): "iTunes Compilation Flag"
1314 class TDAT(TextFrame): "Date of recording (DDMM)"
1315 class TDEN(TimeStampTextFrame): "Encoding Time"
1316 class TDOR(TimeStampTextFrame): "Original Release Time"
1317 class TDLY(NumericTextFrame): "Audio Delay (ms)"
1318 class TDRC(TimeStampTextFrame): "Recording Time"
1319 class TDRL(TimeStampTextFrame): "Release Time"
1320 class TDTG(TimeStampTextFrame): "Tagging Time"
1321 class TENC(TextFrame): "Encoder"
1322 class TEXT(TextFrame): "Lyricist"
1323 class TFLT(TextFrame): "File type"
1324 class TIME(TextFrame): "Time of recording (HHMM)"
1325 class TIT1(TextFrame): "Content group description"
1326 class TIT2(TextFrame): "Title"
1327 class TIT3(TextFrame): "Subtitle/Description refinement"
1328 class TKEY(TextFrame): "Starting Key"
1329 class TLAN(TextFrame): "Audio Languages"
1330 class TLEN(NumericTextFrame): "Audio Length (ms)"
1331 class TMED(TextFrame): "Source Media Type"
1332 class TMOO(TextFrame): "Mood"
1333 class TOAL(TextFrame): "Original Album"
1334 class TOFN(TextFrame): "Original Filename"
1335 class TOLY(TextFrame): "Original Lyricist"
1336 class TOPE(TextFrame): "Original Artist/Performer"
1337 class TORY(NumericTextFrame): "Original Release Year"
1338 class TOWN(TextFrame): "Owner/Licensee"
1339 class TPE1(TextFrame): "Lead Artist/Performer/Soloist/Group"
1340 class TPE2(TextFrame): "Band/Orchestra/Accompaniment"
1341 class TPE3(TextFrame): "Conductor"
1342 class TPE4(TextFrame): "Interpreter/Remixer/Modifier"
1343 class TPOS(NumericPartTextFrame): "Part of set"
1344 class TPRO(TextFrame): "Produced (P)"
1345 class TPUB(TextFrame): "Publisher"
1346 class TRCK(NumericPartTextFrame): "Track Number"
1347 class TRDA(TextFrame): "Recording Dates"
1348 class TRSN(TextFrame): "Internet Radio Station Name"
1349 class TRSO(TextFrame): "Internet Radio Station Owner"
1350 class TSIZ(NumericTextFrame): "Size of audio data (bytes)"
1351 class TSO2(TextFrame): "iTunes Album Artist Sort"
1352 class TSOA(TextFrame): "Album Sort Order key"
1353 class TSOC(TextFrame): "iTunes Composer Sort"
1354 class TSOP(TextFrame): "Perfomer Sort Order key"
1355 class TSOT(TextFrame): "Title Sort Order key"
1356 class TSRC(TextFrame): "International Standard Recording Code (ISRC)"
1357 class TSSE(TextFrame): "Encoder settings"
1358 class TSST(TextFrame): "Set Subtitle"
1359 class TYER(NumericTextFrame): "Year of recording"
1360
1361 class TXXX(TextFrame):
1362     """User-defined text data.
1363
1364     TXXX frames have a 'desc' attribute which is set to any Unicode
1365     value (though the encoding of the text and the description must be
1366     the same). Many taggers use this frame to store freeform keys.
1367     """
1368     _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'),
1369         MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ]
1370     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1371     def _pprint(self): return "%s=%s" % (self.desc, " / ".join(self.text))
1372
1373 class WCOM(UrlFrameU): "Commercial Information"
1374 class WCOP(UrlFrame): "Copyright Information"
1375 class WOAF(UrlFrame): "Official File Information"
1376 class WOAR(UrlFrameU): "Official Artist/Performer Information"
1377 class WOAS(UrlFrame): "Official Source Information"
1378 class WORS(UrlFrame): "Official Internet Radio Information"
1379 class WPAY(UrlFrame): "Payment Information"
1380 class WPUB(UrlFrame): "Official Publisher Information"
1381
1382 class WXXX(UrlFrame):
1383     """User-defined URL data.
1384
1385     Like TXXX, this has a freeform description associated with it.
1386     """
1387     _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'),
1388         Latin1TextSpec('url') ]
1389     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1390
1391 class PairedTextFrame(Frame):
1392     """Paired text strings.
1393
1394     Some ID3 frames pair text strings, to associate names with a more
1395     specific involvement in the song. The 'people' attribute of these
1396     frames contains a list of pairs:
1397         [['trumpet', 'Miles Davis'], ['bass', 'Paul Chambers']]
1398
1399     Like text frames, these frames also have an encoding attribute.
1400     """
1401
1402     _framespec = [ EncodingSpec('encoding'), MultiSpec('people',
1403         EncodedTextSpec('involvement'), EncodedTextSpec('person')) ]
1404     def __eq__(self, other):
1405         return self.people == other
1406     __hash__ = Frame.__hash__
1407
1408 class TIPL(PairedTextFrame): "Involved People List"
1409 class TMCL(PairedTextFrame): "Musicians Credits List"
1410 class IPLS(TIPL): "Involved People List"
1411
1412 class MCDI(Frame):
1413     """Binary dump of CD's TOC.
1414
1415     The 'data' attribute contains the raw byte string.
1416     """
1417     _framespec = [ BinaryDataSpec('data') ]
1418     def __eq__(self, other): return self.data == other
1419     __hash__ = Frame.__hash__
1420
1421 class ETCO(Frame):
1422     """Event timing codes."""
1423     _framespec = [ ByteSpec("format"), KeyEventSpec("events") ]
1424     def __eq__(self, other): return self.events == other
1425     __hash__ = Frame.__hash__
1426
1427 class MLLT(Frame):
1428     """MPEG location lookup table.
1429
1430     This frame's attributes may be changed in the future based on
1431     feedback from real-world use.
1432     """
1433     _framespec = [ SizedIntegerSpec('frames', 2),
1434                    SizedIntegerSpec('bytes', 3),
1435                    SizedIntegerSpec('milliseconds', 3),
1436                    ByteSpec('bits_for_bytes'),
1437                    ByteSpec('bits_for_milliseconds'),
1438                    BinaryDataSpec('data') ]
1439     def __eq__(self, other): return self.data == other
1440     __hash__ = Frame.__hash__
1441
1442 class SYTC(Frame):
1443     """Synchronised tempo codes.
1444
1445     This frame's attributes may be changed in the future based on
1446     feedback from real-world use.
1447     """
1448     _framespec = [ ByteSpec("format"), BinaryDataSpec("data") ]
1449     def __eq__(self, other): return self.data == other
1450     __hash__ = Frame.__hash__
1451
1452 class USLT(Frame):
1453     """Unsynchronised lyrics/text transcription.
1454
1455     Lyrics have a three letter ISO language code ('lang'), a
1456     description ('desc'), and a block of plain text ('text').
1457     """
1458
1459     _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3),
1460         EncodedTextSpec('desc'), EncodedTextSpec('text') ]
1461     HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang))
1462
1463     def __str__(self): return self.text.encode('utf-8')
1464     def __unicode__(self): return self.text
1465     def __eq__(self, other): return self.text == other
1466     __hash__ = Frame.__hash__
1467
1468 class SYLT(Frame):
1469     """Synchronised lyrics/text."""
1470
1471     _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3),
1472         ByteSpec('format'), ByteSpec('type'), EncodedTextSpec('desc'),
1473         SynchronizedTextSpec('text') ]
1474     HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang))
1475
1476     def __eq__(self, other):
1477         return str(self) == other
1478     __hash__ = Frame.__hash__
1479
1480     def __str__(self):
1481         return "".join([text for (text, time) in self.text]).encode('utf-8')
1482
1483 class COMM(TextFrame):
1484     """User comment.
1485
1486     User comment frames have a descrption, like TXXX, and also a three
1487     letter ISO language code in the 'lang' attribute.
1488     """
1489     _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3),
1490         EncodedTextSpec('desc'),
1491         MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ]
1492     HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang))
1493     def _pprint(self): return "%s=%r=%s" % (
1494         self.desc, self.lang, " / ".join(self.text))
1495
1496 class RVA2(Frame):
1497     """Relative volume adjustment (2).
1498
1499     This frame is used to implemented volume scaling, and in
1500     particular, normalization using ReplayGain.
1501
1502     Attributes:
1503     desc -- description or context of this adjustment
1504     channel -- audio channel to adjust (master is 1)
1505     gain -- a + or - dB gain relative to some reference level
1506     peak -- peak of the audio as a floating point number, [0, 1]
1507
1508     When storing ReplayGain tags, use descriptions of 'album' and
1509     'track' on channel 1.
1510     """
1511
1512     _framespec = [ Latin1TextSpec('desc'), ChannelSpec('channel'),
1513         VolumeAdjustmentSpec('gain'), VolumePeakSpec('peak') ]
1514     _channels = ["Other", "Master volume", "Front right", "Front left",
1515                  "Back right", "Back left", "Front centre", "Back centre",
1516                  "Subwoofer"]
1517     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1518
1519     def __eq__(self, other):
1520         return ((str(self) == other) or
1521                 (self.desc == other.desc and
1522                  self.channel == other.channel and
1523                  self.gain == other.gain and
1524                  self.peak == other.peak))
1525     __hash__ = Frame.__hash__
1526
1527     def __str__(self):
1528         return "%s: %+0.4f dB/%0.4f" % (
1529             self._channels[self.channel], self.gain, self.peak)
1530
1531 class EQU2(Frame):
1532     """Equalisation (2).
1533
1534     Attributes:
1535     method -- interpolation method (0 = band, 1 = linear)
1536     desc -- identifying description
1537     adjustments -- list of (frequency, vol_adjustment) pairs
1538     """
1539     _framespec = [ ByteSpec("method"), Latin1TextSpec("desc"),
1540                    VolumeAdjustmentsSpec("adjustments") ]
1541     def __eq__(self, other): return self.adjustments == other
1542     __hash__ = Frame.__hash__
1543     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1544
1545 # class RVAD: unsupported
1546 # class EQUA: unsupported
1547
1548 class RVRB(Frame):
1549     """Reverb."""
1550     _framespec = [ SizedIntegerSpec('left', 2), SizedIntegerSpec('right', 2),
1551                    ByteSpec('bounce_left'), ByteSpec('bounce_right'),
1552                    ByteSpec('feedback_ltl'), ByteSpec('feedback_ltr'),
1553                    ByteSpec('feedback_rtr'), ByteSpec('feedback_rtl'),
1554                    ByteSpec('premix_ltr'), ByteSpec('premix_rtl') ]
1555
1556     def __eq__(self, other): return (self.left, self.right) == other
1557     __hash__ = Frame.__hash__
1558
1559 class APIC(Frame):
1560     """Attached (or linked) Picture.
1561
1562     Attributes:
1563     encoding -- text encoding for the description
1564     mime -- a MIME type (e.g. image/jpeg) or '-->' if the data is a URI
1565     type -- the source of the image (3 is the album front cover)
1566     desc -- a text description of the image
1567     data -- raw image data, as a byte string
1568
1569     Mutagen will automatically compress large images when saving tags.
1570     """
1571     _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('mime'),
1572         ByteSpec('type'), EncodedTextSpec('desc'), BinaryDataSpec('data') ]
1573     def __eq__(self, other): return self.data == other
1574     __hash__ = Frame.__hash__
1575     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1576     def _pprint(self):
1577         return "%s (%s, %d bytes)" % (
1578             self.desc, self.mime, len(self.data))
1579
1580 class PCNT(Frame):
1581     """Play counter.
1582
1583     The 'count' attribute contains the (recorded) number of times this
1584     file has been played.
1585
1586     This frame is basically obsoleted by POPM.
1587     """
1588     _framespec = [ IntegerSpec('count') ]
1589
1590     def __eq__(self, other): return self.count == other
1591     __hash__ = Frame.__hash__
1592     def __pos__(self): return self.count
1593     def _pprint(self): return unicode(self.count)
1594
1595 class POPM(FrameOpt):
1596     """Popularimeter.
1597
1598     This frame keys a rating (out of 255) and a play count to an email
1599     address.
1600
1601     Attributes:
1602     email -- email this POPM frame is for
1603     rating -- rating from 0 to 255
1604     count -- number of times the files has been played (optional)
1605     """
1606     _framespec = [ Latin1TextSpec('email'), ByteSpec('rating') ]
1607     _optionalspec = [ IntegerSpec('count') ]
1608
1609     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.email))
1610
1611     def __eq__(self, other): return self.rating == other
1612     __hash__ = FrameOpt.__hash__
1613     def __pos__(self): return self.rating
1614     def _pprint(self): return "%s=%r %r/255" % (
1615         self.email, getattr(self, 'count', None), self.rating)
1616
1617 class GEOB(Frame):
1618     """General Encapsulated Object.
1619
1620     A blob of binary data, that is not a picture (those go in APIC).
1621
1622     Attributes:
1623     encoding -- encoding of the description
1624     mime -- MIME type of the data or '-->' if the data is a URI
1625     filename -- suggested filename if extracted
1626     desc -- text description of the data
1627     data -- raw data, as a byte string
1628     """
1629     _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('mime'),
1630         EncodedTextSpec('filename'), EncodedTextSpec('desc'),
1631         BinaryDataSpec('data') ]
1632     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1633
1634     def __eq__(self, other): return self.data == other
1635     __hash__ = Frame.__hash__
1636
1637 class RBUF(FrameOpt):
1638     """Recommended buffer size.
1639
1640     Attributes:
1641     size -- recommended buffer size in bytes
1642     info -- if ID3 tags may be elsewhere in the file (optional)
1643     offset -- the location of the next ID3 tag, if any
1644
1645     Mutagen will not find the next tag itself.
1646     """
1647     _framespec = [ SizedIntegerSpec('size', 3) ]
1648     _optionalspec = [ ByteSpec('info'), SizedIntegerSpec('offset', 4) ]
1649
1650     def __eq__(self, other): return self.size == other
1651     __hash__ = FrameOpt.__hash__
1652     def __pos__(self): return self.size
1653
1654 class AENC(FrameOpt):
1655     """Audio encryption.
1656
1657     Attributes:
1658     owner -- key identifying this encryption type
1659     preview_start -- unencrypted data block offset
1660     preview_length -- number of unencrypted blocks
1661     data -- data required for decryption (optional)
1662
1663     Mutagen cannot decrypt files.
1664     """
1665     _framespec = [ Latin1TextSpec('owner'),
1666                    SizedIntegerSpec('preview_start', 2),
1667                    SizedIntegerSpec('preview_length', 2) ]
1668     _optionalspec = [ BinaryDataSpec('data') ]
1669     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.owner))
1670
1671     def __str__(self): return self.owner.encode('utf-8')
1672     def __unicode__(self): return self.owner
1673     def __eq__(self, other): return self.owner == other
1674     __hash__ = FrameOpt.__hash__
1675
1676 class LINK(FrameOpt):
1677     """Linked information.
1678
1679     Attributes:
1680     frameid -- the ID of the linked frame
1681     url -- the location of the linked frame
1682     data -- further ID information for the frame
1683     """
1684
1685     _framespec = [ StringSpec('frameid', 4), Latin1TextSpec('url') ]
1686     _optionalspec = [ BinaryDataSpec('data') ]
1687     def __HashKey(self):
1688         try:
1689             return "%s:%s:%s:%r" % (
1690                 self.FrameID, self.frameid, self.url, self.data)
1691         except AttributeError:
1692             return "%s:%s:%s" % (self.FrameID, self.frameid, self.url)
1693     HashKey = property(__HashKey)
1694     def __eq__(self, other):
1695         try: return (self.frameid, self.url, self.data) == other
1696         except AttributeError: return (self.frameid, self.url) == other
1697     __hash__ = FrameOpt.__hash__
1698
1699 class POSS(Frame):
1700     """Position synchronisation frame
1701
1702     Attribute:
1703     format -- format of the position attribute (frames or milliseconds)
1704     position -- current position of the file
1705     """
1706     _framespec = [ ByteSpec('format'), IntegerSpec('position') ]
1707
1708     def __pos__(self): return self.position
1709     def __eq__(self, other): return self.position == other
1710     __hash__ = Frame.__hash__
1711
1712 class UFID(Frame):
1713     """Unique file identifier.
1714
1715     Attributes:
1716     owner -- format/type of identifier
1717     data -- identifier
1718     """
1719
1720     _framespec = [ Latin1TextSpec('owner'), BinaryDataSpec('data') ]
1721     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.owner))
1722     def __eq__(s, o):
1723         if isinstance(o, UFI): return s.owner == o.owner and s.data == o.data
1724         else: return s.data == o
1725     __hash__ = Frame.__hash__
1726     def _pprint(self):
1727         isascii = ord(max(self.data)) < 128
1728         if isascii: return "%s=%s" % (self.owner, self.data)
1729         else: return "%s (%d bytes)" % (self.owner, len(self.data))
1730
1731 class USER(Frame):
1732     """Terms of use.
1733
1734     Attributes:
1735     encoding -- text encoding
1736     lang -- ISO three letter language code
1737     text -- licensing terms for the audio
1738     """
1739     _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3),
1740         EncodedTextSpec('text') ]
1741     HashKey = property(lambda s: '%s:%r' % (s.FrameID, s.lang))
1742
1743     def __str__(self): return self.text.encode('utf-8')
1744     def __unicode__(self): return self.text
1745     def __eq__(self, other): return self.text == other
1746     __hash__ = Frame.__hash__
1747     def _pprint(self): return "%r=%s" % (self.lang, self.text)
1748
1749 class OWNE(Frame):
1750     """Ownership frame."""
1751     _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'),
1752                    StringSpec('date', 8), EncodedTextSpec('seller') ]
1753
1754     def __str__(self): return self.seller.encode('utf-8')
1755     def __unicode__(self): return self.seller
1756     def __eq__(self, other): return self.seller == other
1757     __hash__ = Frame.__hash__
1758
1759 class COMR(FrameOpt):
1760     """Commercial frame."""
1761     _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'),
1762                    StringSpec('valid_until', 8), Latin1TextSpec('contact'),
1763                    ByteSpec('format'), EncodedTextSpec('seller'),
1764                    EncodedTextSpec('desc')]
1765     _optionalspec = [ Latin1TextSpec('mime'), BinaryDataSpec('logo') ]
1766     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s._writeData()))
1767     def __eq__(self, other): return self._writeData() == other._writeData()
1768     __hash__ = FrameOpt.__hash__
1769
1770 class ENCR(Frame):
1771     """Encryption method registration.
1772
1773     The standard does not allow multiple ENCR frames with the same owner
1774     or the same method. Mutagen only verifies that the owner is unique.
1775     """
1776     _framespec = [ Latin1TextSpec('owner'), ByteSpec('method'),
1777                    BinaryDataSpec('data') ]
1778     HashKey = property(lambda s: "%s:%s" % (s.FrameID, s.owner))
1779     def __str__(self): return self.data
1780     def __eq__(self, other): return self.data == other
1781     __hash__ = Frame.__hash__
1782
1783 class GRID(FrameOpt):
1784     """Group identification registration."""
1785     _framespec = [ Latin1TextSpec('owner'), ByteSpec('group') ]
1786     _optionalspec = [ BinaryDataSpec('data') ]
1787     HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.group))
1788     def __pos__(self): return self.group
1789     def __str__(self): return self.owner.encode('utf-8')
1790     def __unicode__(self): return self.owner
1791     def __eq__(self, other): return self.owner == other or self.group == other
1792     __hash__ = FrameOpt.__hash__
1793
1794
1795 class PRIV(Frame):
1796     """Private frame."""
1797     _framespec = [ Latin1TextSpec('owner'), BinaryDataSpec('data') ]
1798     HashKey = property(lambda s: '%s:%s:%s' % (
1799         s.FrameID, s.owner, s.data.decode('latin1')))
1800     def __str__(self): return self.data
1801     def __eq__(self, other): return self.data == other
1802     def _pprint(self):
1803         isascii = ord(max(self.data)) < 128
1804         if isascii: return "%s=%s" % (self.owner, self.data)
1805         else: return "%s (%d bytes)" % (self.owner, len(self.data))
1806     __hash__ = Frame.__hash__
1807
1808 class SIGN(Frame):
1809     """Signature frame."""
1810     _framespec = [ ByteSpec('group'), BinaryDataSpec('sig') ]
1811     HashKey = property(lambda s: '%s:%c:%s' % (s.FrameID, s.group, s.sig))
1812     def __str__(self): return self.sig
1813     def __eq__(self, other): return self.sig == other
1814     __hash__ = Frame.__hash__
1815
1816 class SEEK(Frame):
1817     """Seek frame.
1818
1819     Mutagen does not find tags at seek offsets.
1820     """
1821     _framespec = [ IntegerSpec('offset') ]
1822     def __pos__(self): return self.offset
1823     def __eq__(self, other): return self.offset == other
1824     __hash__ = Frame.__hash__
1825
1826 class ASPI(Frame):
1827     """Audio seek point index.
1828
1829     Attributes: S, L, N, b, and Fi. For the meaning of these, see
1830     the ID3v2.4 specification. Fi is a list of integers.
1831     """
1832     _framespec = [ SizedIntegerSpec("S", 4), SizedIntegerSpec("L", 4),
1833                    SizedIntegerSpec("N", 2), ByteSpec("b"),
1834                    ASPIIndexSpec("Fi") ]
1835     def __eq__(self, other): return self.Fi == other
1836     __hash__ = Frame.__hash__
1837
1838 Frames = dict([(k,v) for (k,v) in globals().items()
1839         if len(k)==4 and isinstance(v, type) and issubclass(v, Frame)])
1840 """All supported ID3v2 frames, keyed by frame name."""
1841 del(k); del(v)
1842
1843 # ID3v2.2 frames
1844 class UFI(UFID): "Unique File Identifier"
1845
1846 class TT1(TIT1): "Content group description"
1847 class TT2(TIT2): "Title"
1848 class TT3(TIT3): "Subtitle/Description refinement"
1849 class TP1(TPE1): "Lead Artist/Performer/Soloist/Group"
1850 class TP2(TPE2): "Band/Orchestra/Accompaniment"
1851 class TP3(TPE3): "Conductor"
1852 class TP4(TPE4): "Interpreter/Remixer/Modifier"
1853 class TCM(TCOM): "Composer"
1854 class TXT(TEXT): "Lyricist"
1855 class TLA(TLAN): "Audio Language(s)"
1856 class TCO(TCON): "Content Type (Genre)"
1857 class TAL(TALB): "Album"
1858 class TPA(TPOS): "Part of set"
1859 class TRK(TRCK): "Track Number"
1860 class TRC(TSRC): "International Standard Recording Code (ISRC)"
1861 class TYE(TYER): "Year of recording"
1862 class TDA(TDAT): "Date of recording (DDMM)"
1863 class TIM(TIME): "Time of recording (HHMM)"
1864 class TRD(TRDA): "Recording Dates"
1865 class TMT(TMED): "Source Media Type"
1866 class TFT(TFLT): "File Type"
1867 class TBP(TBPM): "Beats per minute"
1868 class TCP(TCMP): "iTunes Compilation Flag"
1869 class TCR(TCOP): "Copyright (C)"
1870 class TPB(TPUB): "Publisher"
1871 class TEN(TENC): "Encoder"
1872 class TSS(TSSE): "Encoder settings"
1873 class TOF(TOFN): "Original Filename"
1874 class TLE(TLEN): "Audio Length (ms)"
1875 class TSI(TSIZ): "Audio Data size (bytes)"
1876 class TDY(TDLY): "Audio Delay (ms)"
1877 class TKE(TKEY): "Starting Key"
1878 class TOT(TOAL): "Original Album"
1879 class TOA(TOPE): "Original Artist/Perfomer"
1880 class TOL(TOLY): "Original Lyricist"
1881 class TOR(TORY): "Original Release Year"
1882
1883 class TXX(TXXX): "User-defined Text"
1884
1885 class WAF(WOAF): "Official File Information"
1886 class WAR(WOAR): "Official Artist/Performer Information"
1887 class WAS(WOAS): "Official Source Information"
1888 class WCM(WCOM): "Commercial Information"
1889 class WCP(WCOP): "Copyright Information"
1890 class WPB(WPUB): "Official Publisher Information"
1891
1892 class WXX(WXXX): "User-defined URL"
1893
1894 class IPL(IPLS): "Involved people list"
1895 class MCI(MCDI): "Binary dump of CD's TOC"
1896 class ETC(ETCO): "Event timing codes"
1897 class MLL(MLLT): "MPEG location lookup table"
1898 class STC(SYTC): "Synced tempo codes"
1899 class ULT(USLT): "Unsychronised lyrics/text transcription"
1900 class SLT(SYLT): "Synchronised lyrics/text"
1901 class COM(COMM): "Comment"
1902 #class RVA(RVAD)
1903 #class EQU(EQUA)
1904 class REV(RVRB): "Reverb"
1905 class PIC(APIC):
1906     """Attached Picture.
1907
1908     The 'mime' attribute of an ID3v2.2 attached picture must be either
1909     'PNG' or 'JPG'.
1910     """
1911     _framespec = [ EncodingSpec('encoding'), StringSpec('mime', 3),
1912         ByteSpec('type'), EncodedTextSpec('desc'), BinaryDataSpec('data') ]
1913 class GEO(GEOB): "General Encapsulated Object"
1914 class CNT(PCNT): "Play counter"
1915 class POP(POPM): "Popularimeter"
1916 class BUF(RBUF): "Recommended buffer size"
1917
1918 class CRM(Frame):
1919     """Encrypted meta frame"""
1920     _framespec = [ Latin1TextSpec('owner'), Latin1TextSpec('desc'),
1921                    BinaryDataSpec('data') ]
1922     def __eq__(self, other): return self.data == other
1923     __hash__ = Frame.__hash__
1924
1925 class CRA(AENC): "Audio encryption"
1926
1927 class LNK(LINK):
1928     """Linked information"""
1929     _framespec = [ StringSpec('frameid', 3), Latin1TextSpec('url') ]
1930     _optionalspec = [ BinaryDataSpec('data') ]
1931
1932 Frames_2_2 = dict([(k,v) for (k,v) in globals().items()
1933         if len(k)==3 and isinstance(v, type) and issubclass(v, Frame)])
1934
1935 # support open(filename) as interface
1936 Open = ID3
1937
1938 # ID3v1.1 support.
1939 def ParseID3v1(string):
1940     """Parse an ID3v1 tag, returning a list of ID3v2.4 frames."""
1941
1942     try:
1943         string = string[string.index("TAG"):]
1944     except ValueError:
1945         return None
1946     if 128 < len(string) or len(string) < 124:
1947         return None
1948
1949     # Issue #69 - Previous versions of Mutagen, when encountering
1950     # out-of-spec TDRC and TYER frames of less than four characters,
1951     # wrote only the characters available - e.g. "1" or "" - into the
1952     # year field. To parse those, reduce the size of the year field.
1953     # Amazingly, "0s" works as a struct format string.
1954     unpack_fmt = "3s30s30s30s%ds29sBB" % (len(string) - 124)
1955
1956     try:
1957         tag, title, artist, album, year, comment, track, genre = unpack(
1958             unpack_fmt, string)
1959     except StructError:
1960         return None
1961
1962     if tag != "TAG":
1963         return None
1964
1965     def fix(string):
1966         return string.split("\x00")[0].strip().decode('latin1')
1967
1968     title, artist, album, year, comment = map(
1969         fix, [title, artist, album, year, comment])
1970
1971     frames = {}
1972     if title: frames["TIT2"] = TIT2(encoding=0, text=title)
1973     if artist: frames["TPE1"] = TPE1(encoding=0, text=[artist])
1974     if album: frames["TALB"] = TALB(encoding=0, text=album)
1975     if year: frames["TDRC"] = TDRC(encoding=0, text=year)
1976     if comment: frames["COMM"] = COMM(
1977         encoding=0, lang="eng", desc="ID3v1 Comment", text=comment)
1978     # Don't read a track number if it looks like the comment was
1979     # padded with spaces instead of nulls (thanks, WinAmp).
1980     if track and (track != 32 or string[-3] == '\x00'):
1981         frames["TRCK"] = TRCK(encoding=0, text=str(track))
1982     if genre != 255: frames["TCON"] = TCON(encoding=0, text=str(genre))
1983     return frames
1984
1985 def MakeID3v1(id3):
1986     """Return an ID3v1.1 tag string from a dict of ID3v2.4 frames."""
1987
1988     v1 = {}
1989
1990     for v2id, name in {"TIT2": "title", "TPE1": "artist",
1991                        "TALB": "album"}.items():
1992         if v2id in id3:
1993             text = id3[v2id].text[0].encode('latin1', 'replace')[:30]
1994         else:
1995             text = ""
1996         v1[name] = text + ("\x00" * (30 - len(text)))
1997
1998     if "COMM" in id3:
1999         cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28]
2000     else:
2001         cmnt = ""
2002     v1["comment"] = cmnt + ("\x00" * (29 - len(cmnt)))
2003
2004     if "TRCK" in id3:
2005         try: v1["track"] = chr(+id3["TRCK"])
2006         except ValueError: v1["track"] = "\x00"
2007     else: v1["track"] = "\x00"
2008
2009     if "TCON" in id3:
2010         try: genre = id3["TCON"].genres[0]
2011         except IndexError: pass
2012         else:
2013             if genre in TCON.GENRES:
2014                 v1["genre"] = chr(TCON.GENRES.index(genre))
2015     if "genre" not in v1:
2016         v1["genre"] = "\xff"
2017
2018     if "TDRC" in id3:
2019         year = str(id3["TDRC"])
2020     elif "TYER" in id3:
2021         year = str(id3["TYER"])
2022     else:
2023         year = ""
2024     v1["year"] = (year + "\x00\x00\x00\x00")[:4]
2025
2026     return ("TAG%(title)s%(artist)s%(album)s%(year)s%(comment)s"
2027             "%(track)s%(genre)s") % v1
2028
2029 class ID3FileType(mutagen.FileType):
2030     """An unknown type of file with ID3 tags."""
2031
2032     ID3 = ID3
2033
2034     class _Info(object):
2035         length = 0
2036         def __init__(self, fileobj, offset): pass
2037         pprint = staticmethod(lambda: "Unknown format with ID3 tag")
2038
2039     def score(filename, fileobj, header):
2040         return header.startswith("ID3")
2041     score = staticmethod(score)
2042
2043     def add_tags(self, ID3=None):
2044         """Add an empty ID3 tag to the file.
2045
2046         A custom tag reader may be used in instead of the default
2047         mutagen.id3.ID3 object, e.g. an EasyID3 reader.
2048         """
2049         if ID3 is None:
2050             ID3 = self.ID3
2051         if self.tags is None:
2052             self.ID3 = ID3
2053             self.tags = ID3()
2054         else:
2055             raise error("an ID3 tag already exists")
2056
2057     def load(self, filename, ID3=None, **kwargs):
2058         """Load stream and tag information from a file.
2059
2060         A custom tag reader may be used in instead of the default
2061         mutagen.id3.ID3 object, e.g. an EasyID3 reader.
2062         """
2063         if ID3 is None:
2064             ID3 = self.ID3
2065         else:
2066             # If this was initialized with EasyID3, remember that for
2067             # when tags are auto-instantiated in add_tags.
2068             self.ID3 = ID3
2069         self.filename = filename
2070         try: self.tags = ID3(filename, **kwargs)
2071         except error: self.tags = None
2072         if self.tags is not None:
2073             try: offset = self.tags.size
2074             except AttributeError: offset = None
2075         else: offset = None
2076         try:
2077             fileobj = open(filename, "rb")
2078             self.info = self._Info(fileobj, offset)
2079         finally:
2080             fileobj.close()