-
Notifications
You must be signed in to change notification settings - Fork 0
Replace multiprocessing with threading #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8be9729
ac39f60
26a940e
06189a4
2ecc94d
b038b65
b8aea39
31d5d63
3a0a58e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,3 +9,5 @@ | |
| # emacs, vim | ||
| *~ | ||
| .*.swp | ||
|
|
||
| .claude/ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| import inspect | ||
| import queue | ||
| import sys | ||
| import textwrap | ||
| import threading | ||
|
|
||
| from fabric import state | ||
| from fabric.utils import abort, warn, error | ||
|
|
@@ -182,17 +184,16 @@ def _is_network_error_ignored(): | |
| return not state.env.use_exceptions_for['network'] and state.env.skip_bad_hosts | ||
|
|
||
|
|
||
| def _parallel_wrap(task, args, kwargs, queue, name, env): | ||
| def _parallel_wrap(task, args, kwargs, task_queue, name, env): | ||
| # Wrap in another callable that: | ||
| # * expands the env it's given to ensure parallel, linewise, etc are | ||
| # all set correctly and explicitly | ||
| # * nukes the connection cache to prevent shared-access problems | ||
| # * installs a thread-local copy of env for isolation | ||
| # * gives this thread its own connection cache | ||
| # * knows how to send the tasks' return value back over a Queue | ||
| # * captures exceptions raised by the task | ||
| state.env.update(env) | ||
| state.env._set_thread_local(env) | ||
rafecolton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| state.connections._set_thread_local() | ||
| try: | ||
| state.connections.clear() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is no longer needed?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no because the connections object is now a thread local object. this was reset to prevent shared-access problems because state was copied between processes |
||
| queue.put({'name': name, 'result': task.run(*args, **kwargs)}) | ||
| task_queue.put({'name': name, 'result': task.run(*args, **kwargs)}) | ||
| except BaseException as e: # We really do want to capture everything | ||
| # SystemExit implies use of abort(), which prints its own | ||
| # traceback, host info etc -- so we don't want to double up | ||
|
|
@@ -202,14 +203,16 @@ def _parallel_wrap(task, args, kwargs, queue, name, env): | |
| if type(e) is not SystemExit: | ||
| if not (isinstance(e, NetworkError) and _is_network_error_ignored()): | ||
| sys.stderr.write("!!! Parallel execution exception under host %r:\n" % name) | ||
| queue.put({'name': name, 'result': e}) | ||
| # Here, anything -- unexpected exceptions, or abort() | ||
| # driven SystemExits -- will bubble up and terminate the | ||
| # child process. | ||
| if not (isinstance(e, NetworkError) and _is_network_error_ignored()): | ||
| raise | ||
|
|
||
| def _execute(task, host, my_env, args, kwargs, jobs, queue, multiprocessing): | ||
| task_queue.put({'name': name, 'result': e}) | ||
| # NOTE: We intentionally do NOT re-raise here. With threads (unlike | ||
| # processes), re-raising would trigger threading.excepthook and | ||
| # duplicate the traceback on stderr. | ||
| finally: | ||
| disconnect_all() | ||
| state.connections._clear_thread_local() | ||
| state.env._clear_thread_local() | ||
|
|
||
| def _execute(task, host, my_env, args, kwargs, jobs, task_queue): | ||
| """ | ||
| Primary single-host work body of execute() | ||
| """ | ||
|
|
@@ -220,22 +223,23 @@ def _execute(task, host, my_env, args, kwargs, jobs, queue, multiprocessing): | |
| local_env = to_dict(host) | ||
| local_env.update(my_env) | ||
| # Set a few more env flags for parallelism | ||
| if queue is not None: | ||
| if task_queue is not None: | ||
rafecolton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| local_env.update({'parallel': True, 'linewise': True}) | ||
| # Handle parallel execution | ||
| if queue is not None: # Since queue is only set for parallel | ||
| if task_queue is not None: # Since queue is only set for parallel | ||
| name = local_env['host_string'] | ||
|
|
||
| # Stuff into Process wrapper | ||
| # Stuff into Thread wrapper | ||
| kwarg_dict = { | ||
| 'task': task, | ||
| 'args': args, | ||
| 'kwargs': kwargs, | ||
| 'queue': queue, | ||
| 'task_queue': task_queue, | ||
| 'name': name, | ||
| 'env': local_env, | ||
| } | ||
| p = multiprocessing.Process(target=_parallel_wrap, kwargs=kwarg_dict) | ||
| p = threading.Thread(target=_parallel_wrap, kwargs=kwarg_dict) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name |
||
| p.daemon = True | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, I think this can result in background threads sticking around even if the main process is killed, is that right? Are you sure we want this?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interesting point, i'll think about this a little more
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the main process waits for all the threads to finish, so I don't think there is a concern about it killing threads prematurely. If somebody ^C's the main process, we actively want it to kill threads.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In daemon mode the main process does not wait for all threads to finish, it kills them all immediately if the main process terminates, so orphan threads cannot be created, which is what we want. The only difference between the current This can be fixed by using stop events, but that may slow shut down for interrupts if a thread is blocked on i/o and would require a little more rewriting here. That may be preferred, though.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting 🤔 do we actually do any signal handling in anything we expect to run in parallel? I can't think of anything. If we do, then we should probably handle it in this PR. If not, then I agree events may still be better, but we can defer it to a follow-up
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Signal handling would only be for
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, do we do any for ^C for tasks we run in parallel? I'll thing I can think of is deploy prep, which runs in serial
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No we don't do any today |
||
| # Name/id is host string | ||
| p.name = name | ||
| # Add to queue | ||
|
|
@@ -324,17 +328,13 @@ def execute(task, *args, **kwargs): | |
|
|
||
| parallel = requires_parallel(task) | ||
| if parallel: | ||
| import multiprocessing | ||
| ctx = multiprocessing.get_context('fork') | ||
| # Set up job queue for parallel cases | ||
| queue = ctx.Queue() | ||
| task_queue = queue.Queue() | ||
| else: | ||
| ctx = None | ||
| queue = None | ||
| task_queue = None | ||
|
|
||
| # Get pool size for this task | ||
| pool_size = task.get_pool_size(my_env['all_hosts'], state.env.pool_size) | ||
| jobs = JobQueue(pool_size, queue) | ||
| jobs = JobQueue(pool_size, task_queue) | ||
| if state.output.debug: | ||
| jobs._debug = True | ||
|
|
||
|
|
@@ -344,7 +344,7 @@ def execute(task, *args, **kwargs): | |
| for host in my_env['all_hosts']: | ||
| try: | ||
| results[host] = _execute( | ||
| task, host, my_env, args, new_kwargs, jobs, queue, ctx, | ||
| task, host, my_env, args, new_kwargs, jobs, task_queue, | ||
| ) | ||
| except NetworkError as e: | ||
| results[host] = e | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Signature change seems extraneous, so unless it's needed, I'd rather see it reverted to reduce the diff and chances of bugs. Seems reasonable to update the comment, though would like to see it be a little more informative (i.e. consistency with what)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updating the comment without changing the code doesn't really make sense. The only reason this exists is a hack for pickling to work in a multiprocessing queue which we removed entirely. i can clarify the comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 I'm inclined to leave the code as it was then, but if you're confident this works, then we can move forward with an updated comment