neuralmagic
diff --git a/‎src/deepsparse/v2/operators/__init__.py
+2 b/‎src/deepsparse/v2/operators/__init__.py
+2
diff --git a/‎src/deepsparse/v2/operators/engine_operator.py
+32 b/‎src/deepsparse/v2/operators/engine_operator.py
+32
diff --git a/‎src/deepsparse/v2/schedulers/__init__.py
+2 b/‎src/deepsparse/v2/schedulers/__init__.py
+2
diff --git a/‎src/deepsparse/v2/schedulers/continuous_batching_scheduler.py
+175 b/‎src/deepsparse/v2/schedulers/continuous_batching_scheduler.py
+175
diff --git a/‎src/deepsparse/v2/schedulers/utils/__init__.py
+19 b/‎src/deepsparse/v2/schedulers/utils/__init__.py
+19
diff --git a/‎src/deepsparse/v2/schedulers/utils/continuous_batching_executor.py
+79 b/‎src/deepsparse/v2/schedulers/utils/continuous_batching_executor.py
+79
@@ -1,4 +1,5 @@
 # flake8: noqa
+# isort: skip_file
 
 # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
 #
@@ -14,3 +15,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .operator import *
+from .engine_operator import *
@@ -39,13 +39,45 @@ class EngineOperatorInputs(BaseModel):
         default=None,
     )
 
+    @classmethod
+    def join(cls, inputs: List["EngineOperatorInputs"]) -> "EngineOperatorInputs":
+        """
+        :param inputs: list of separate EngineOperatorInputs, batch size must be 1
+        :return: list of inputs joined into a single input with a multi batch size
+        """
+        all_engine_inputs = [engine_input.engine_inputs for engine_input in inputs]
+
+        for engine_inputs in all_engine_inputs:
+            if engine_inputs[0].shape[0] != 1:
+                raise RuntimeError(
+                    "join requires all inputs to have batch size 1, found input with "
+                    f"batch size {engine_inputs[0].shape[0]}"
+                )
+
+        # use join_engine_outputs since dtype is the same
+        joined_engine_inputs = join_engine_outputs(
+            all_engine_inputs, len(all_engine_inputs)
+        )
+
+        return cls(engine_inputs=joined_engine_inputs)
+
     class Config:
         arbitrary_types_allowed = True
 
 
 class EngineOperatorOutputs(BaseModel):
     engine_outputs: List = Field(description="engine outputs")
 
+    def split(self) -> List["EngineOperatorOutputs"]:
+        """
+        :return: list of the current outputs split to a batch size of 1 each
+        """
+        # using split_engine_inputs since input/output dtypes
+        # are the same (List[ndarray])
+        split_outputs, _ = split_engine_inputs(self.engine_outputs, batch_size=1)
+
+        return [self.__class__(engine_outputs=outputs) for outputs in split_outputs]
+
 
 class EngineOperator(Operator):
     input_schema = EngineOperatorInputs
 
@@ -1,4 +1,5 @@
 # flake8: noqa
+# isort: skip_file
 
 # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
 #
@@ -16,3 +17,4 @@
 
 from .scheduler import *
 from .scheduler_group import *
+from .continuous_batching_scheduler import *
@@ -0,0 +1,175 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from concurrent.futures import Future
+from threading import Lock
+from typing import List
+
+from deepsparse.v2.operators import EngineOperator, Operator
+from deepsparse.v2.schedulers.scheduler import OperatorScheduler
+from deepsparse.v2.schedulers.utils import (
+    ContinuousBatchingExecutorThread,
+    ContinuousBatchingQueues,
+)
+
+
+__all__ = ["ContinuousBatchingScheduler"]
+
+
+_GLOBAL_SCHEDULER = None
+
+
+class ContinuousBatchingScheduler(OperatorScheduler):
+    """
+    Manages EngineOperator jobs that should be run with continuous batching.
+    Groups requests for the same engine into larger batches and returns
+    the result to the respective request threads after scheduled completion
+
+    Example code for getting or creating a shared instance for scheduling
+    between pipelines and adding an engine operator to the scheduler
+    within a pipeline
+
+    ```python
+
+    class MyPipeline(Pipeline):
+
+        def __init__(self):
+            ...
+            engine_operator = EngineOperator(...)
+            ...
+            continuous_batching_scheduler = ContinuousBatchingScheduler.get_instance()
+            continuous_batching_scheduler.add_engine_operator(engine_operator)
+
+            super.__init__(...)
+    ```
+
+    :param max_workers: maximum number of threads to execute at once, default 1
+    """
+
+    def __init__(self, max_workers: int = 1):
+        self._max_workers = max_workers
+
+        self._mutex = Lock()
+
+        # Dict[EngineOperator, Dict[batch_size, Engine]]
+        self._operators_to_engines = {}  # EngineOperator -> Dict[batch_size, Engine]
+        self._queues = ContinuousBatchingQueues()
+
+        # create and start max number of worker threads
+        self._threads = [
+            ContinuousBatchingExecutorThread(self._queues, self._operators_to_engines)
+            for _ in range(self.max_workers)
+        ]
+        for worker_thread in self._threads:
+            worker_thread.start()
+
+    @classmethod
+    def get_instance(cls) -> "ContinuousBatchingScheduler":
+        """
+        :return: global instance of the continuous batching scheduler. If one
+            does not exist yet, a scheduler with a single worker thread to
+            schedule all jobs is created and started
+        """
+        if _GLOBAL_SCHEDULER is not None:
+            return _GLOBAL_SCHEDULER  # noqa: F823
+
+        _GLOBAL_SCHEDULER = cls(max_workers=1)
+        return _GLOBAL_SCHEDULER
+
+    @property
+    def max_workers(self) -> int:
+        """
+        :return: maximum number of threads to execute at once
+        """
+        return self._max_workers
+
+    def submit(self, *args, operator: Operator, **kwargs) -> Future:
+        """
+        :param operator: operator to run
+        :param operator_input: input schema to the operator
+        :return: future referencing the asynchronously run output of the operator
+        """
+        inputs = args[0]
+        if not isinstance(inputs, operator.input_schema):
+            raise ValueError(
+                "Inputs to ContinuousBatchingScheduler must be the specific "
+                f"input schema to the given operator. Expected {operator.input_schema}"
+                f"found {type(inputs)}"
+            )
+
+        future = Future()
+        self._queues.add_queue_item(key=operator, item=inputs, future=future)
+
+        return future
+
+    def can_process(self, *args, operator: Operator, **kwargs) -> bool:
+        """
+        :param operator: operator to check
+        :param operator_input: operator_input to check
+        :return: True if this Operator can process the given operator and input.
+            SchedulerGroup always returns True
+        """
+        return operator in self._operators_to_engines and operator in self._queues
+
+    def add_engine_operator(
+        self, engine_operator: EngineOperator, batch_sizes: List[int]
+    ):
+        """
+        Adds tracking for an engine operator to this scheduler
+        with continuous batching for the given sizes
+
+        :param engine_operator: an EngineOperator, must be compiled with
+            batch_size=1
+        :param batch_sizes: batch sizes to use for continuous batching
+        """
+        # lock updates to _operators_to_engines while updating
+        self._mutex.acquire()
+
+        # validation
+        if engine_operator in self._operators_to_engines:
+            # operator already added
+            return
+
+        if not isinstance(engine_operator, EngineOperator):
+            raise ValueError(
+                f"Expected an EngineOperator instance, found {type(engine_operator)}"
+            )
+        if engine_operator.batch_size != 1:
+            raise ValueError(
+                "For continuous batching, EngineOperator must have batch_size=1. "
+                f"found batch_size={engine_operator.batch_size}"
+            )
+
+        # build EngineOperator -> List[batch_size] dict
+        operator_engines = {}
+        # base engine, expected batch size is 1
+        operator_engines[engine_operator.batch_size] = engine_operator.engine
+
+        # compile auxillary engines for continuous batching
+        for batch_size in batch_sizes:
+            if batch_size == 1:
+                continue  # already added
+            operator_engines[batch_size] = operator_engines.create_engine(
+                batch_size=batch_size
+            )
+
+        self._operators_to_engines[engine_operator] = operator_engines
+        self._queues.add_queue(
+            key=engine_operator,
+            batch_sizes=list(operator_engines.keys()),
+        )
+
+        # release lock
+        self._mutex.release()
@@ -0,0 +1,19 @@
+# flake8: noqa
+# isort: skip_file
+
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .continuous_batching_queues import *
+from .continuous_batching_executor import *
@@ -0,0 +1,79 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from threading import Thread
+from typing import Dict
+
+from deepsparse import Engine
+from deepsparse.v2.operators import EngineOperator
+from deepsparse.v2.schedulers.utils.continuous_batching_queues import (
+    ContinuousBatchingQueues,
+)
+
+
+__all__ = [
+    "ContinuousBatchingExecutorThread",
+]
+
+
+class ContinuousBatchingExecutorThread(Thread):
+    """
+    Thread that when started runs indefinitely, grabbing a valid batch from
+    the queues when possible and running them in the correct engine
+
+    :param queues: ContinuousBatchingQueues object containing a queue for
+        each valid engine
+    :param operators_to_engines: dictionary mapping valid engine operators
+        to a dictionary of its valid batch sizes mapped to an engine compiled
+        for that batch size
+    """
+
+    def __init__(
+        self,
+        queues: ContinuousBatchingQueues,
+        operators_to_engines: Dict[EngineOperator, Dict[int, Engine]],
+    ):
+        self._queues = queues
+        self._operators_to_engines = operators_to_engines
+        self._should_stop = False
+
+        super().__init__(target=self._working_loop)
+        self.daemon = True  # worker thread should exit when main thread exits
+
+    def _working_loop(self):
+        # indefinitely wait for batch, run batch, split and resolve futures
+        while True:
+            # wait for next batch to be available
+            engine_operator, batch = self._queues.pop_batch(block=True)
+
+            # unpack batch of QueueEntry objects
+            engine_inputs, futures, _ = list(zip(*batch))
+            batch_size = len(engine_inputs)
+
+            # type is EngineOperatorInputs
+            joined_inputs = engine_operator.input_schema.join(engine_inputs)
+
+            # get engine for this operator compiled to the popped batch size
+            # and set the inputs to execute with it
+            joined_inputs.engine = self._operators_to_engines[engine_operator][
+                batch_size
+            ]
+
+            # run the engine operator with the given engine at the joined batch size
+            joined_outputs = engine_operator(joined_inputs)
+
+            # split outputs and return the results to their respective futures
+            split_outputs = joined_outputs.split()
+            for output, future in zip(split_outputs, futures):
+                future.set_result(output)
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`# flake8: noqa`
	`2`	`+# isort: skip_file`
`2`	`3`
`3`	`4`	`# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.`
`4`	`5`	`#`
`@@ -14,3 +15,4 @@`
`14`	`15`	`# See the License for the specific language governing permissions and`
`15`	`16`	`# limitations under the License.`
`16`	`17`	`from .operator import *`
	`18`	`+from .engine_operator import *`