1 # This module contains several routines that help recognizing sound
4 # Function whathdr() recognizes various types of sound file headers.
5 # It understands almost all headers that SOX can decode.
7 # The return tuple contains the following items, in this order:
8 # - file type (as SOX understands it)
9 # - sampling rate (0 if unknown or hard to decode)
10 # - number of channels (0 if unknown or hard to decode)
11 # - number of frames in the file (-1 if unknown or hard to decode)
12 # - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
14 # If the file doesn't have a recognizable type, it returns None.
15 # If the file can't be opened, IOError is raised.
17 # To compute the total time, divide the number of frames by the
18 # sampling rate (a frame contains a sample for each channel).
20 # Function whatraw() calls the "whatsound" program and interprets its
21 # output. You'll have to guess the sampling rate by listening though!
23 # Function what() calls whathdr() and if it doesn't recognize the file
24 # then calls whatraw().
26 # Finally, the function test() is a simple main program that calls
27 # what() for all files mentioned on the argument list. For directory
28 # arguments it calls what() for all files in that directory. Default
29 # argument is "." (testing all files in the current directory). The
30 # option -r tells it to recurse down directories found inside
31 # explicitly given directories.
33 # The file structure is top-down except that the test program and its
34 # subroutine come last.
37 #------------------------------------------------------#
38 # Guess the type of any sound file, raw or with header #
39 #------------------------------------------------------#
42 res
= whathdr(filename
)
44 res
= whatraw(filename
)
48 #-----------------------------#
49 # Guess the type of raw sound #
50 #-----------------------------#
52 def whatraw(filename
):
53 # Assume it's always 1 channel, byte-sized samples
54 # Don't assume anything about the rate
56 from stat
import ST_SIZE
57 # XXX "whatsound" should be part of the distribution somehow...
58 cmd
= 'whatsound ' + filename
+ ' 2>/dev/null'
59 cmd
= 'PATH=$PATH:/ufs/guido/bin/sgi\n' + cmd
60 pipe
= os
.popen(cmd
, 'r')
65 if data
[:13] == '-t raw -b -s ':
68 elif data
[:13] == '-t raw -b -u ':
71 elif data
[:13] == '-t raw -b -U ':
77 frame_count
= os
.stat(filename
)[ST_SIZE
]
80 return type, 0, 1, frame_count
, sample_size
83 #-------------------------#
84 # Recognize sound headers #
85 #-------------------------#
87 def whathdr(filename
):
88 f
= open(filename
, 'r')
97 #-----------------------------------#
98 # Subroutines per sound header type #
99 #-----------------------------------#
107 if h
[8:12] == 'AIFC':
109 elif h
[8:12] == 'AIFF':
115 a
= aifc
.openfp(f
, 'r')
116 except (EOFError, aifc
.Error
):
118 return (fmt
, a
.getframerate(), a
.getnchannels(), \
119 a
.getnframes(), 8*a
.getsampwidth())
121 tests
.append(test_aifc
)
127 elif h
[:4] in ('\0ds.', 'dns.'):
133 data_size
= f(h
[8:12])
134 encoding
= f(h
[12:16])
136 nchannels
= f(h
[20:24])
137 sample_size
= 1 # default
147 frame_size
= sample_size
* nchannels
148 return type, rate
, nchannels
, data_size
/frame_size
, sample_bits
150 tests
.append(test_au
)
154 if h
[65:69] <> 'FSSD' or h
[128:132] <> 'HCOM':
156 divisor
= get_long_be(h
[128+16:128+20])
157 return 'hcom', 22050/divisor
, 1, -1, 8
159 tests
.append(test_hcom
)
163 if h
[:20] <> 'Creative Voice File\032':
165 sbseek
= get_short_le(h
[20:22])
167 if 0 <= sbseek
< 500 and h
[sbseek
] == '\1':
168 ratecode
= ord(h
[sbseek
+4])
169 rate
= int(1000000.0 / (256 - ratecode
))
170 return 'voc', rate
, 1, -1, 8
172 tests
.append(test_voc
)
176 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
177 if h
[:4] <> 'RIFF' or h
[8:12] <> 'WAVE' or h
[12:16] <> 'fmt ':
179 style
= get_short_le(h
[20:22])
180 nchannels
= get_short_le(h
[22:24])
181 rate
= get_long_le(h
[24:28])
182 sample_bits
= get_short_le(h
[34:36])
183 return 'wav', rate
, nchannels
, -1, sample_bits
185 tests
.append(test_wav
)
189 if h
[:4] <> 'FORM' or h
[8:12] <> '8SVX':
191 # Should decode it to get #channels -- assume always 1
192 return '8svx', 0, 1, 0, 8
194 tests
.append(test_8svx
)
199 nsamples
= get_long_le(h
[8:12])
200 rate
= get_short_le(h
[20:22])
201 return 'sndt', rate
, 1, nsamples
, 8
203 tests
.append(test_sndt
)
208 rate
= get_short_le(h
[2:4])
209 if 4000 <= rate
<= 25000:
210 return 'sndr', rate
, 1, -1, 8
212 tests
.append(test_sndr
)
215 #---------------------------------------------#
216 # Subroutines to extract numbers from strings #
217 #---------------------------------------------#
220 return (ord(s
[0])<<24) |
(ord(s
[1])<<16) |
(ord(s
[2])<<8) |
ord(s
[3])
223 return (ord(s
[3])<<24) |
(ord(s
[2])<<16) |
(ord(s
[1])<<8) |
ord(s
[0])
226 return (ord(s
[0])<<8) |
ord(s
[1])
229 return (ord(s
[1])<<8) |
ord(s
[0])
232 #--------------------#
233 # Small test program #
234 #--------------------#
239 if sys
.argv
[1:] and sys
.argv
[1] == '-r':
244 testall(sys
.argv
[1:], recursive
, 1)
246 testall(['.'], recursive
, 1)
247 except KeyboardInterrupt:
248 sys
.stderr
.write('\n[Interrupted]\n')
251 def testall(list, recursive
, toplevel
):
254 for filename
in list:
255 if os
.path
.isdir(filename
):
256 print filename
+ '/:',
257 if recursive
or toplevel
:
258 print 'recursing down:'
260 names
= glob
.glob(os
.path
.join(filename
, '*'))
261 testall(names
, recursive
, 0)
263 print '*** directory (use -r) ***'
265 print filename
+ ':',
270 print '*** not found ***'