1 # A parallelized "find(1)" using the thread module.
3 # This demonstrates the use of a work queue and worker threads.
4 # It really does do more stats/sec when using multiple threads,
5 # although the improvement is only about 20-30 percent.
7 # I'm too lazy to write a command line parser for the full find(1)
8 # command line syntax, so the predicate it searches for is wired-in,
9 # see function selector() below. (It currently searches for files with
10 # group or world write permission.)
12 # Usage: parfind.py [-w nworkers] [directory] ...
13 # Default nworkers is 4, maximum appears to be 8 (on Irix 4.0.2)
25 # Work queue class. Usage:
27 # wq.addwork(func, (arg1, arg2, ...)) # one or more calls
29 # The work is done when wq.run() completes.
30 # The function calls executed by the workers may add more work.
31 # Don't use keyboard interrupts!
37 # - busy and work are only modified when mutex is locked
38 # - len(work) is the number of jobs ready to be taken
39 # - busy is the number of jobs being done
40 # - todo is locked iff there is no work and somebody is busy
43 self
.mutex
= thread
.allocate()
44 self
.todo
= thread
.allocate()
49 def addwork(self
, func
, args
):
54 if len(self
.work
) == 1:
60 if self
.busy
== 0 and len(self
.work
) == 0:
66 self
.busy
= self
.busy
+ 1
68 if len(self
.work
) > 0:
74 self
.busy
= self
.busy
- 1
75 if self
.busy
== 0 and len(self
.work
) == 0:
80 time
.sleep(0.00001) # Let other threads run
89 def run(self
, nworkers
):
91 return # Nothing to do
92 for i
in range(nworkers
-1):
93 thread
.start_new(self
._worker
, ())
101 sys
.argv
.append("/tmp")
103 opts
, args
= getopt
.getopt(sys
.argv
[1:], '-w:')
104 for opt
, arg
in opts
:
106 nworkers
= string
.atoi(arg
)
112 wq
.addwork(find
, (dir, selector
, wq
))
118 sys
.stderr
.write('Total time ' + `t2
-t1`
+ ' sec.\n')
121 # The predicate -- defines what files we look for.
122 # Feel free to change this to suit your purpose
124 def selector(dir, name
, fullname
, stat
):
125 # Look for group or world writable files
126 return (stat
[ST_MODE
] & 0022) != 0
129 # The find procedure -- calls wq.addwork() for subdirectories
131 def find(dir, pred
, wq
):
133 names
= os
.listdir(dir)
134 except os
.error
, msg
:
135 print `
dir`
, ':', msg
138 if name
not in (os
.curdir
, os
.pardir
):
139 fullname
= os
.path
.join(dir, name
)
141 stat
= os
.lstat(fullname
)
142 except os
.error
, msg
:
143 print `fullname`
, ':', msg
145 if pred(dir, name
, fullname
, stat
):
147 if S_ISDIR(stat
[ST_MODE
]):
148 if not os
.path
.ismount(fullname
):
149 wq
.addwork(find
, (fullname
, pred
, wq
))
152 # Call the main program