|
16 | 16 | import org.elasticsearch.action.ActionListener;
|
17 | 17 | import org.elasticsearch.action.search.SearchRequest;
|
18 | 18 | import org.elasticsearch.action.search.TransportSearchAction;
|
| 19 | +import org.elasticsearch.action.support.ListenerTimeouts; |
19 | 20 | import org.elasticsearch.action.support.master.AcknowledgedResponse;
|
20 | 21 | import org.elasticsearch.client.internal.Client;
|
21 | 22 | import org.elasticsearch.common.Strings;
|
@@ -79,6 +80,7 @@ public class DeploymentManager {
|
79 | 80 | private static final Logger logger = LogManager.getLogger(DeploymentManager.class);
|
80 | 81 | private static final AtomicLong requestIdCounter = new AtomicLong(1);
|
81 | 82 | public static final int NUM_RESTART_ATTEMPTS = 3;
|
| 83 | + private static final TimeValue WORKER_QUEUE_COMPLETION_TIMEOUT = TimeValue.timeValueMinutes(5); |
82 | 84 |
|
83 | 85 | private final Client client;
|
84 | 86 | private final NamedXContentRegistry xContentRegistry;
|
@@ -674,25 +676,38 @@ private synchronized void stopProcessAfterCompletingPendingWork(ActionListener<A
|
674 | 676 | prepareInternalStateForShutdown();
|
675 | 677 |
|
676 | 678 | // Waiting for the process worker to finish the pending work could
|
677 |
| - // take a long time. Best not to block the thread so register |
678 |
| - // a function with the process worker that is called when the |
679 |
| - // work is finished. Then proceed to closing the native process |
| 679 | + // take a long time. To avoid blocking the calling thread register |
| 680 | + // a function with the process worker queue that is called when the |
| 681 | + // worker queue is finished. Then proceed to closing the native process |
680 | 682 | // and wait for all results to be processed, the second part can be
|
681 | 683 | // done synchronously as it is not expected to take long.
|
682 |
| - // The ShutdownTracker will handle this. |
683 |
| - |
684 |
| - // Shutdown tracker will stop the process work and start a race with |
685 |
| - // a timeout condition. |
686 |
| - new ShutdownTracker(() -> { |
687 |
| - // Stopping the process worker timed out, kill the process |
688 |
| - logger.warn(format("[%s] Timed out waiting for process worker to complete, forcing a shutdown", task.getDeploymentId())); |
689 |
| - forcefullyStopProcess(); |
690 |
| - }, () -> { |
| 684 | + |
| 685 | + // This listener closes the native process and waits for the results |
| 686 | + // after the worker queue has finished |
| 687 | + var closeProcessListener = listener.delegateResponse((l, r) -> { |
691 | 688 | // process worker stopped within allotted time, close process
|
692 | 689 | closeProcessAndWaitForResultProcessor();
|
693 | 690 | closeNlpTaskProcessor();
|
694 |
| - }, threadPool, priorityProcessWorker, listener); |
| 691 | + l.onResponse(AcknowledgedResponse.TRUE); |
| 692 | + }); |
| 693 | + |
| 694 | + // Timeout listener waits |
| 695 | + var listenWithTimeout = ListenerTimeouts.wrapWithTimeout( |
| 696 | + threadPool, |
| 697 | + WORKER_QUEUE_COMPLETION_TIMEOUT, |
| 698 | + threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME), |
| 699 | + closeProcessListener, |
| 700 | + (l) -> { |
| 701 | + // Stopping the process worker timed out, kill the process |
| 702 | + logger.warn( |
| 703 | + format("[%s] Timed out waiting for process worker to complete, forcing a shutdown", task.getDeploymentId()) |
| 704 | + ); |
| 705 | + forcefullyStopProcess(); |
| 706 | + l.onResponse(AcknowledgedResponse.FALSE); |
| 707 | + } |
| 708 | + ); |
695 | 709 |
|
| 710 | + priorityProcessWorker.shutdownWithCallback(() -> listenWithTimeout.onResponse(AcknowledgedResponse.TRUE)); |
696 | 711 | }
|
697 | 712 |
|
698 | 713 | private void closeProcessAndWaitForResultProcessor() {
|
|
0 commit comments