1 commit 4e68d9bfd12de51d8e6dc8ac622317f7755f1080
2 Author: Palmer Cox <p@lmercox.com>
3 Date: Tue May 14 23:07:48 2024 -0400
5 python312Packages.pybrowserid: Fix on Darwin
7 The WorkerPoolVerifier works by spawning one or more child processes in
8 order to run the verification process. This is fine on Linux when using
9 the default "fork" method of the multiprocessing module. However, this
10 breaks when using the "spawn" or "forkserver" method since those methods
11 require that the arguments passed into the Process object be pickleable.
12 Specifically we're passing threading.Lock objects to the child process
13 which are not pickleable.
15 Passing a Lock to a child process spawned via fork mostly does nothing -
16 the child process ends up with its own copy of the Lock which its free
17 to lock or unlock as it pleases and it has no effect on the parent
18 process. So, we don't actually need to pass the Lock to the child
19 process since it has never done anything. All we need to do is _not_
20 pass it since it causes errors when its passed because its not
21 pickleable. We don't need to re-create its functionality.
23 Anyway, there are two places where we are passing locks to the child
24 process. The first is the WorkerPoolVerifier class. This lock isn't
25 actually being used - its just passed because we're passing the "self"
26 object to the worker thread. So, we can fix this by making the target
27 method to run in the worker thread a free-function and only passing the
28 object we need - thus avoiding passing the unused Lock that triggers the
31 The other place that a Lock is passed ia via the FIFOCache. We can work
32 around this by making the Lock a global variable instead of an instance
33 variable. This shouldn't cause any significant performance issues
34 because the FIFOCache only holds this lock for brief periods when its
35 updating itself - its not doing any network call or long running
36 operations. Anyway, because its a global variable now, its not passed to
37 the process and we avoid the pickle error.
39 The other problem we have to fix are the tests on Darwin. There are two
40 problems - both due to Darwin defaulting to the "spawn" start method
43 1. When we spawn a new Python process, it inherits the same __main__
44 module as its parent. When running tests, this __main__ module is the
45 unittest module. And, it start trying to run tests when its loaded.
46 This spawns more processes which also try to run tests, and so on.
48 2. The test code tries to mock out the network access. However, when we
49 spawn a new Python process these mocks are lost.
51 Anyway, we fix this issues by creating a temporary replacement for the
52 __main__ module which we install before running the WorkerPoolVerifier
53 tests. This module avoids trying to run the unit tests and also applies
54 the necessary mock to avoid network access.
56 diff --git a/browserid/supportdoc.py b/browserid/supportdoc.py
57 index d995fed..249b37e 100644
58 --- a/browserid/supportdoc.py
59 +++ b/browserid/supportdoc.py
60 @@ -26,6 +26,9 @@ DEFAULT_TRUSTED_SECONDARIES = ("browserid.org", "diresworb.org",
64 +FIFO_CACHE_LOCK = threading.Lock()
67 class SupportDocumentManager(object):
68 """Manager for mapping hostnames to their BrowserID support documents.
70 @@ -131,7 +134,6 @@ class FIFOCache(object):
71 self.max_size = max_size
73 self.items_queue = collections.deque()
74 - self._lock = threading.Lock()
76 def __getitem__(self, key):
77 """Lookup the given key in the cache.
78 @@ -147,7 +149,7 @@ class FIFOCache(object):
79 # it hasn't been updated by another thread in the meantime.
80 # This is a little more work during eviction, but it means we
81 # can avoid locking in the common case of non-expired items.
82 - self._lock.acquire()
83 + FIFO_CACHE_LOCK.acquire()
85 if self.items_map[key][0] == timestamp:
86 # Just delete it from items_map. Trying to find
87 @@ -157,7 +159,7 @@ class FIFOCache(object):
91 - self._lock.release()
92 + FIFO_CACHE_LOCK.release()
96 @@ -168,7 +170,7 @@ class FIFOCache(object):
97 there's enough room in the cache and evicting items if necessary.
101 + with FIFO_CACHE_LOCK:
102 # First we need to make sure there's enough room.
103 # This is a great opportunity to evict any expired items,
104 # helping to keep memory small for sparse caches.
105 diff --git a/browserid/tests/dummy_main_module_for_unit_test_worker_processes.py b/browserid/tests/dummy_main_module_for_unit_test_worker_processes.py
107 index 0000000..388f86f
109 +++ b/browserid/tests/dummy_main_module_for_unit_test_worker_processes.py
112 +This module is necessary as a hack to get tests to pass on Darwin.
114 +The reason is that the tests attempt to start up a multiprocessing.Process.
115 +Doing that spawns a new Python process with the same __main__ module as the
116 +parent process. The first problem is that the default start mode for Processes
117 +on Darwin with "spawn" as opposed to "fork" as on Linux. This means that a
118 +fully new Python interpretter is started with the same __main__ module. When
119 +running tests, that __main__ module is the standard library unittest module.
120 +Unfortunately, that module unconditionally runs unit tests without checking
121 +that __name__ == "__main__". The end result is that every time a worker process
122 +is spawned it immediately attemps to run the tests which spawn more processes
125 +So, we work around that by replacing the __main__ module with this one before
126 +any processes are spawned. This prevents the infinite spawning of processes
127 +since this module doesn't try to run any unit tests.
129 +Additionally, this also patches the supportdoc fetching methods - which is
130 +necessary to actually get the tests to pass.
133 +import multiprocessing
135 +from browserid.tests.support import patched_supportdoc_fetching
137 +if multiprocessing.current_process().name != "MainProcess":
138 + patched_supportdoc_fetching().__enter__()
140 diff --git a/browserid/tests/test_verifiers.py b/browserid/tests/test_verifiers.py
141 index d95ff8f..e8b867f 100644
142 --- a/browserid/tests/test_verifiers.py
143 +++ b/browserid/tests/test_verifiers.py
144 @@ -454,15 +454,27 @@ class TestDummyVerifier(unittest.TestCase, VerifierTestCases):
145 class TestWorkerPoolVerifier(TestDummyVerifier):
148 + from browserid.tests import dummy_main_module_for_unit_test_worker_processes
151 super(TestWorkerPoolVerifier, self).setUp()
153 + self.__old_main = sys.modules["__main__"]
154 + sys.modules["__main__"] = dummy_main_module_for_unit_test_worker_processes
156 self.verifier = WorkerPoolVerifier(
157 verifier=LocalVerifier(["*"])
163 super(TestWorkerPoolVerifier, self).tearDown()
165 self.verifier.close()
167 + sys.modules["__main__"] = self.__old_main
170 class TestShortcutFunction(unittest.TestCase):
172 diff --git a/browserid/verifiers/workerpool.py b/browserid/verifiers/workerpool.py
173 index c669107..d250222 100644
174 --- a/browserid/verifiers/workerpool.py
175 +++ b/browserid/verifiers/workerpool.py
176 @@ -32,7 +32,6 @@ class WorkerPoolVerifier(object):
178 verifier = LocalVerifier()
179 self.num_procs = num_procs
180 - self.verifier = verifier
181 # Create the various communication channels.
182 # Yes, this duplicates a lot of the logic from multprocessing.Pool.
183 # I don't want to have to constantly pickle the verifier object
184 @@ -53,7 +52,7 @@ class WorkerPoolVerifier(object):
185 self._result_thread.start()
187 for n in range(num_procs):
188 - proc = multiprocessing.Process(target=self._run_worker)
189 + proc = multiprocessing.Process(target=_run_worker, args=(self._work_queue, verifier, self._result_queue))
190 self._processes.append(proc)
193 @@ -128,21 +127,21 @@ class WorkerPoolVerifier(object):
194 self._waiting_conds[job_id] = (ok, result)
197 - def _run_worker(self):
198 - """Method to run for the background worker processes.
199 +def _run_worker(work_queue, verifier, result_queue):
200 + """Method to run for the background worker processes.
202 - This method loops through jobs from the work queue, executing them
203 - with the verifier object and pushing the result back via the result
207 - job_id, args, kwds = self._work_queue.get()
211 - result = self.verifier.verify(*args, **kwds)
213 - except Exception as e:
216 - self._result_queue.put((job_id, ok, result))
217 + This method loops through jobs from the work queue, executing them
218 + with the verifier object and pushing the result back via the result
222 + job_id, args, kwds = work_queue.get()
226 + result = verifier.verify(*args, **kwds)
228 + except Exception as e:
231 + result_queue.put((job_id, ok, result))