qtfaststart.py

   1 """
   2     Quicktime/MP4 Fast Start
   3     ------------------------
   4     Enable streaming and pseudo-streaming of Quicktime and MP4 files by
   5     moving metadata and offset information to the front of the file.
   6
   7     This program is based on qt-faststart.c from the ffmpeg project, which is
   8     released into the public domain, as well as ISO 14496-12:2005 (the official
   9     spec for MP4), which can be obtained from the ISO or found online.
  10
  11     The goals of this project are to run anywhere without compilation (in
  12     particular, many Windows and Mac OS X users have trouble getting
  13     qt-faststart.c compiled), to run about as fast as the C version, to be more
  14     user friendly, and to use less actual lines of code doing so.
  15
  16     Features
  17     --------
  18
  19         * Works everywhere Python can be installed
  20         * Handles both 32-bit (stco) and 64-bit (co64) atoms
  21         * Handles any file where the mdat atom is before the moov atom
  22         * Preserves the order of other atoms
  23         * Can replace the original file (if given no output file)
  24
  25     History
  26     -------
  27      * 2015-08-26: Remove "free" atom stripping - wjm3
  28      * 2013-01-28: Support strange zero-name, zero-length atoms, re-license
  29                    under the MIT license, version bump to 1.7
  30      * 2010-02-21: Add support for final mdat atom with zero size, patch by
  31                    Dmitry Simakov <basilio AT j-vista DOT ru>, version bump
  32                    to 1.4.
  33      * 2009-11-05: Add --sample option. Version bump to 1.3.
  34      * 2009-03-13: Update to be more library-friendly by using logging module,
  35                    rename fast_start => process, version bump to 1.2
  36      * 2008-10-04: Bug fixes, support multiple atoms of the same type,
  37                    version bump to 1.1
  38      * 2008-09-02: Initial release
  39
  40     License
  41     -------
  42
  43     Copyright (C) 2008 - 2013  Daniel G. Taylor <dan@programmer-art.org>
  44
  45     Permission is hereby granted, free of charge, to any person
  46     obtaining a copy of this software and associated documentation files
  47     (the "Software"), to deal in the Software without restriction,
  48     including without limitation the rights to use, copy, modify, merge,
  49     publish, distribute, sublicense, and/or sell copies of the Software,
  50     and to permit persons to whom the Software is furnished to do so,
  51     subject to the following conditions:
  52
  53     The above copyright notice and this permission notice shall be
  54     included in all copies or substantial portions of the Software.
  55
  56     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  57     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  58     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  59     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  60     BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  61     ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  62     CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  63     SOFTWARE.
  64 """
  65
  66 import logging
  67 import os
  68 import struct
  69
  70 from StringIO import StringIO
  71
  72 VERSION = "1.7.1wjm3"
  73 CHUNK_SIZE = 8192
  74
  75 log = logging.getLogger('pyTivo.video.qt-faststart')
  76
  77 count = 0
  78
  79 class FastStartException(Exception):
  80     pass
  81
  82 def read_atom(datastream):
  83     """
  84         Read an atom and return a tuple of (size, type) where size is the size
  85         in bytes (including the 8 bytes already read) and type is a "fourcc"
  86         like "ftyp" or "moov".
  87     """
  88     return struct.unpack(">L4s", datastream.read(8))
  89
  90 def get_index(datastream):
  91     """
  92         Return an index of top level atoms, their absolute byte-position in the
  93         file and their size in a list:
  94
  95         index = [
  96             ("ftyp", 0, 24),
  97             ("moov", 25, 2658),
  98             ("free", 2683, 8),
  99             ...
 100         ]
 101
 102         The tuple elements will be in the order that they appear in the file.
 103     """
 104     index = []
 105
 106     log.debug("Getting index of top level atoms...")
 107
 108     # Read atoms until we catch an error
 109     while(datastream):
 110         try:
 111             skip = 8
 112             atom_size, atom_type = read_atom(datastream)
 113             if atom_size == 1:
 114                 atom_size = struct.unpack(">Q", datastream.read(8))[0]
 115                 skip = 16
 116             log.debug("%s: %s" % (atom_type, atom_size))
 117         except:
 118             break
 119
 120         index.append((atom_type, datastream.tell() - skip, atom_size))
 121
 122         if atom_size == 0:
 123             if atom_type == "mdat":
 124                 # Some files may end in mdat with no size set, which
 125                 # generally means to seek to the end of the file. We can
 126                 # just stop indexing as no more entries will be found!
 127                 break
 128             else:
 129                 # Weird, but just continue to try to find more atoms
 130                 atom_size = skip
 131
 132         datastream.seek(atom_size - skip, os.SEEK_CUR)
 133
 134     # Make sure the atoms we need exist
 135     top_level_atoms = set([item[0] for item in index])
 136     for key in ["moov", "mdat"]:
 137         if key not in top_level_atoms:
 138             log.error("%s atom not found, is this a valid MOV/MP4 file?" % key)
 139             raise FastStartException()
 140
 141     return index
 142
 143 def find_atoms(size, datastream):
 144     """
 145         This function is a generator that will yield either "stco" or "co64"
 146         when either atom is found. datastream can be assumed to be 8 bytes
 147         into the stco or co64 atom when the value is yielded.
 148
 149         It is assumed that datastream will be at the end of the atom after
 150         the value has been yielded and processed.
 151
 152         size is the number of bytes to the end of the atom in the datastream.
 153     """
 154     stop = datastream.tell() + size
 155
 156     while datastream.tell() < stop:
 157         try:
 158             atom_size, atom_type = read_atom(datastream)
 159         except:
 160             log.exception("Error reading next atom!")
 161             raise FastStartException()
 162
 163         if atom_type in ["trak", "mdia", "minf", "stbl"]:
 164             # Known ancestor atom of stco or co64, search within it!
 165             for atype in find_atoms(atom_size - 8, datastream):
 166                 yield atype
 167         elif atom_type in ["stco", "co64"]:
 168             yield atom_type
 169         else:
 170             # Ignore this atom, seek to the end of it.
 171             datastream.seek(atom_size - 8, os.SEEK_CUR)
 172
 173 def output(outfile, skip, data):
 174     global count
 175     length = len(data)
 176     if count + length > skip:
 177         if skip > count:
 178             data = data[skip - count:]
 179         outfile.write(data)
 180     count += length
 181
 182 def process(datastream, outfile, skip=0):
 183     """
 184         Convert a Quicktime/MP4 file for streaming by moving the metadata to
 185         the front of the file. This method writes a new file.
 186     """
 187
 188     global count
 189     count = 0
 190
 191     # Get the top level atom index
 192     index = get_index(datastream)
 193
 194     mdat_pos = 999999
 195
 196     # Make sure moov occurs AFTER mdat, otherwise no need to run!
 197     for atom, pos, size in index:
 198         # The atoms are guaranteed to exist from get_index above!
 199         if atom == "moov":
 200             moov_pos = pos
 201             moov_size = size
 202         elif atom == "mdat":
 203             mdat_pos = pos
 204
 205     if moov_pos < mdat_pos:
 206         log.debug('mp4 already streamable -- copying')
 207         datastream.seek(skip)
 208         while True:
 209             block = datastream.read(CHUNK_SIZE)
 210             if not block:
 211                 break
 212             output(outfile, 0, block)
 213         return count
 214
 215     # Read and fix moov
 216     datastream.seek(moov_pos)
 217     moov = StringIO(datastream.read(moov_size))
 218
 219     # Ignore moov identifier and size, start reading children
 220     moov.seek(8)
 221
 222     for atom_type in find_atoms(moov_size - 8, moov):
 223         # Read either 32-bit or 64-bit offsets
 224         ctype, csize = atom_type == "stco" and ("L", 4) or ("Q", 8)
 225
 226         # Get number of entries
 227         version, entry_count = struct.unpack(">2L", moov.read(8))
 228
 229         log.info("Patching %s with %d entries" % (atom_type, entry_count))
 230
 231         # Read entries
 232         entries = struct.unpack(">" + ctype * entry_count,
 233                                 moov.read(csize * entry_count))
 234
 235         # Patch and write entries
 236         moov.seek(-csize * entry_count, os.SEEK_CUR)
 237         moov.write(struct.pack(">" + ctype * entry_count,
 238                                *[entry + moov_size for entry in entries]))
 239
 240     log.info("Writing output...")
 241
 242     # Write ftype
 243     for atom, pos, size in index:
 244         if atom == "ftyp":
 245             datastream.seek(pos)
 246             output(outfile, skip, datastream.read(size))
 247
 248     # Write moov
 249     moov.seek(0)
 250     output(outfile, skip, moov.read())
 251
 252     # Write the rest
 253     atoms = [item for item in index if item[0] not in ["ftyp", "moov"]]
 254     for atom, pos, size in atoms:
 255         datastream.seek(pos)
 256
 257         # Write in chunks to not use too much memory
 258         for x in range(size / CHUNK_SIZE):
 259             output(outfile, skip, datastream.read(CHUNK_SIZE))
 260
 261         if size % CHUNK_SIZE:
 262             output(outfile, skip, datastream.read(size % CHUNK_SIZE))
 263
 264     return count - skip