Quick update to the README file. For intros and books we now point to
[python/dscho.git] / Lib / sndhdr.py
blobb2cdd307f4afd69d1159da50b838675f74afa8a6
1 """Routines to help recognizing sound files.
3 Function whathdr() recognizes various types of sound file headers.
4 It understands almost all headers that SOX can decode.
6 The return tuple contains the following items, in this order:
7 - file type (as SOX understands it)
8 - sampling rate (0 if unknown or hard to decode)
9 - number of channels (0 if unknown or hard to decode)
10 - number of frames in the file (-1 if unknown or hard to decode)
11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
13 If the file doesn't have a recognizable type, it returns None.
14 If the file can't be opened, IOError is raised.
16 To compute the total time, divide the number of frames by the
17 sampling rate (a frame contains a sample for each channel).
19 Function what() calls whathdr(). (It used to also use some
20 heuristics for raw data, but this doesn't work very well.)
22 Finally, the function test() is a simple main program that calls
23 what() for all files mentioned on the argument list. For directory
24 arguments it calls what() for all files in that directory. Default
25 argument is "." (testing all files in the current directory). The
26 option -r tells it to recurse down directories found inside
27 explicitly given directories.
28 """
30 # The file structure is top-down except that the test program and its
31 # subroutine come last.
34 def what(filename):
35 """Guess the type of a sound file"""
36 res = whathdr(filename)
37 return res
40 def whathdr(filename):
41 """Recognize sound headers"""
42 f = open(filename, 'r')
43 h = f.read(512)
44 for tf in tests:
45 res = tf(h, f)
46 if res:
47 return res
48 return None
51 #-----------------------------------#
52 # Subroutines per sound header type #
53 #-----------------------------------#
55 tests = []
57 def test_aifc(h, f):
58 import aifc
59 if h[:4] <> 'FORM':
60 return None
61 if h[8:12] == 'AIFC':
62 fmt = 'aifc'
63 elif h[8:12] == 'AIFF':
64 fmt = 'aiff'
65 else:
66 return None
67 f.seek(0)
68 try:
69 a = aifc.openfp(f, 'r')
70 except (EOFError, aifc.Error):
71 return None
72 return (fmt, a.getframerate(), a.getnchannels(), \
73 a.getnframes(), 8*a.getsampwidth())
75 tests.append(test_aifc)
78 def test_au(h, f):
79 if h[:4] == '.snd':
80 f = get_long_be
81 elif h[:4] in ('\0ds.', 'dns.'):
82 f = get_long_le
83 else:
84 return None
85 type = 'au'
86 hdr_size = f(h[4:8])
87 data_size = f(h[8:12])
88 encoding = f(h[12:16])
89 rate = f(h[16:20])
90 nchannels = f(h[20:24])
91 sample_size = 1 # default
92 if encoding == 1:
93 sample_bits = 'U'
94 elif encoding == 2:
95 sample_bits = 8
96 elif encoding == 3:
97 sample_bits = 16
98 sample_size = 2
99 else:
100 sample_bits = '?'
101 frame_size = sample_size * nchannels
102 return type, rate, nchannels, data_size/frame_size, sample_bits
104 tests.append(test_au)
107 def test_hcom(h, f):
108 if h[65:69] <> 'FSSD' or h[128:132] <> 'HCOM':
109 return None
110 divisor = get_long_be(h[128+16:128+20])
111 return 'hcom', 22050/divisor, 1, -1, 8
113 tests.append(test_hcom)
116 def test_voc(h, f):
117 if h[:20] <> 'Creative Voice File\032':
118 return None
119 sbseek = get_short_le(h[20:22])
120 rate = 0
121 if 0 <= sbseek < 500 and h[sbseek] == '\1':
122 ratecode = ord(h[sbseek+4])
123 rate = int(1000000.0 / (256 - ratecode))
124 return 'voc', rate, 1, -1, 8
126 tests.append(test_voc)
129 def test_wav(h, f):
130 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
131 if h[:4] <> 'RIFF' or h[8:12] <> 'WAVE' or h[12:16] <> 'fmt ':
132 return None
133 style = get_short_le(h[20:22])
134 nchannels = get_short_le(h[22:24])
135 rate = get_long_le(h[24:28])
136 sample_bits = get_short_le(h[34:36])
137 return 'wav', rate, nchannels, -1, sample_bits
139 tests.append(test_wav)
142 def test_8svx(h, f):
143 if h[:4] <> 'FORM' or h[8:12] <> '8SVX':
144 return None
145 # Should decode it to get #channels -- assume always 1
146 return '8svx', 0, 1, 0, 8
148 tests.append(test_8svx)
151 def test_sndt(h, f):
152 if h[:5] == 'SOUND':
153 nsamples = get_long_le(h[8:12])
154 rate = get_short_le(h[20:22])
155 return 'sndt', rate, 1, nsamples, 8
157 tests.append(test_sndt)
160 def test_sndr(h, f):
161 if h[:2] == '\0\0':
162 rate = get_short_le(h[2:4])
163 if 4000 <= rate <= 25000:
164 return 'sndr', rate, 1, -1, 8
166 tests.append(test_sndr)
169 #---------------------------------------------#
170 # Subroutines to extract numbers from strings #
171 #---------------------------------------------#
173 def get_long_be(s):
174 return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3])
176 def get_long_le(s):
177 return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0])
179 def get_short_be(s):
180 return (ord(s[0])<<8) | ord(s[1])
182 def get_short_le(s):
183 return (ord(s[1])<<8) | ord(s[0])
186 #--------------------#
187 # Small test program #
188 #--------------------#
190 def test():
191 import sys
192 recursive = 0
193 if sys.argv[1:] and sys.argv[1] == '-r':
194 del sys.argv[1:2]
195 recursive = 1
196 try:
197 if sys.argv[1:]:
198 testall(sys.argv[1:], recursive, 1)
199 else:
200 testall(['.'], recursive, 1)
201 except KeyboardInterrupt:
202 sys.stderr.write('\n[Interrupted]\n')
203 sys.exit(1)
205 def testall(list, recursive, toplevel):
206 import sys
207 import os
208 for filename in list:
209 if os.path.isdir(filename):
210 print filename + '/:',
211 if recursive or toplevel:
212 print 'recursing down:'
213 import glob
214 names = glob.glob(os.path.join(filename, '*'))
215 testall(names, recursive, 0)
216 else:
217 print '*** directory (use -r) ***'
218 else:
219 print filename + ':',
220 sys.stdout.flush()
221 try:
222 print what(filename)
223 except IOError:
224 print '*** not found ***'
226 if __name__ == '__main__':
227 test()