Coelescing doesn't work for every method yet

minrk · minrk · commit 96fc95356576 · 2021-09-28T11:34:01.000+02:00
coalescing only preserves apply results, not messages where the content matters

This means several things don't work:

- scatter/gather
- execute replies with errors
- parallel datapub

Some of these can be fixed, but not all without a substantial change to how coalescing works
diff --git a/ipyparallel/client/client.py b/ipyparallel/client/client.py
@@ -833,6 +833,12 @@ def _extract_metadata(self, msg):
 
         if md['engine_uuid'] is not None:
             md['engine_id'] = self._engines.get(md['engine_uuid'], None)
+
+        if md['is_coalescing']:
+            # get destinations from target metadata
+            targets = msg_meta.get("broadcast_targets", [])
+            md['engine_uuid'], md['engine_id'] = map(list, zip(*targets))
+
         if 'date' in parent:
             md['submitted'] = parent['date']
         if 'started' in msg_meta:
@@ -917,9 +923,16 @@ def _handle_execute_reply(self, msg):
         md = self.metadata[msg_id]
         md.update(self._extract_metadata(msg))
 
-        e_outstanding = self._outstanding_dict[md['engine_uuid']]
-        if msg_id in e_outstanding:
-            e_outstanding.remove(msg_id)
+        if md['is_coalescing']:
+            engine_uuids = md['engine_uuid'] or []
+        else:
+            engine_uuids = [md['engine_uuid']]
+
+        for engine_uuid in engine_uuids:
+            if engine_uuid is not None:
+                e_outstanding = self._outstanding_dict[engine_uuid]
+                if msg_id in e_outstanding:
+                    e_outstanding.remove(msg_id)
 
         # construct result:
         if content['status'] == 'ok':
@@ -972,9 +985,16 @@ def _handle_apply_reply(self, msg):
         md = self.metadata[msg_id]
         md.update(self._extract_metadata(msg))
 
-        e_outstanding = self._outstanding_dict[md['engine_uuid']]
-        if msg_id in e_outstanding:
-            e_outstanding.remove(msg_id)
+        if md['is_coalescing']:
+            engine_uuids = md['engine_uuid'] or []
+        else:
+            engine_uuids = [md['engine_uuid']]
+
+        for engine_uuid in engine_uuids:
+            if engine_uuid is not None:
+                e_outstanding = self._outstanding_dict[engine_uuid]
+                if msg_id in e_outstanding:
+                    e_outstanding.remove(msg_id)
 
         # construct result:
         if content['status'] == 'ok':
diff --git a/ipyparallel/client/view.py b/ipyparallel/client/view.py
@@ -868,6 +868,17 @@ def activate(self, suffix=''):
         ip.magics_manager.register(M)
 
 
+@decorator
+def _not_coalescing(method, self, *args, **kwargs):
+    """Decorator for broadcast methods that can't use reply coalescing"""
+    is_coalescing = self.is_coalescing
+    try:
+        self.is_coalescing = False
+        return method(self, *args, **kwargs)
+    finally:
+        self.is_coalescing = is_coalescing
+
+
 class BroadcastView(DirectView):
     is_coalescing = Bool(False)
 
@@ -962,6 +973,7 @@ def make_asyncresult(message_future):
 
     @sync_results
     @save_ids
+    @_not_coalescing
     def execute(self, code, silent=True, targets=None, block=None):
         """Executes `code` on `targets` in blocking or nonblocking manner.
 
@@ -1010,6 +1022,10 @@ def make_asyncresult(message_future):
     def map(self, f, *sequences, **kwargs):
         raise NotImplementedError("BroadcastView.map not yet implemented")
 
+    # scatter/gather cannot be coalescing yet
+    scatter = _not_coalescing(DirectView.scatter)
+    gather = _not_coalescing(DirectView.gather)
+
 
 class LoadBalancedView(View):
     """An load-balancing View that only executes via the Task scheduler.
diff --git a/ipyparallel/controller/app.py b/ipyparallel/controller/app.py
@@ -974,6 +974,10 @@ def get_python_scheduler_args(
         in_addr=None,
         out_addr=None,
     ):
+        if identity is not None:
+            logname = f"{scheduler_name}-{identity}"
+        else:
+            logname = scheduler_name
         return {
             'scheduler_class': scheduler_class,
             'in_addr': in_addr or self.client_url(scheduler_name),
@@ -984,7 +988,7 @@ def get_python_scheduler_args(
             'identity': identity
             if identity is not None
             else bytes(scheduler_name, 'utf8'),
-            'logname': 'scheduler',
+            'logname': logname,
             'loglevel': self.log_level,
             'log_url': self.log_url,
             'config': dict(self.config),
diff --git a/ipyparallel/controller/broadcast_scheduler.py b/ipyparallel/controller/broadcast_scheduler.py
@@ -5,6 +5,7 @@
 from traitlets import Bytes
 from traitlets import Integer
 from traitlets import List
+from traitlets import Unicode
 
 from ipyparallel import util
 from ipyparallel.controller.scheduler import get_common_scheduler_streams
@@ -15,11 +16,13 @@
 class BroadcastScheduler(Scheduler):
     port_name = 'broadcast'
     accumulated_replies = {}
+    accumulated_targets = {}
     is_leaf = Bool(False)
     connected_sub_scheduler_ids = List(Bytes())
     outgoing_streams = List()
     depth = Integer()
     max_depth = Integer()
+    name = Unicode()
 
     def start(self):
         self.client_stream.on_recv(self.dispatch_submission, copy=False)
@@ -28,12 +31,14 @@ def start(self):
         else:
             for outgoing_stream in self.outgoing_streams:
                 outgoing_stream.on_recv(self.dispatch_result, copy=False)
+        self.log.info(f"BroadcastScheduler {self.name} started")
 
     def send_to_targets(self, msg, original_msg_id, targets, idents, is_coalescing):
         if is_coalescing:
             self.accumulated_replies[original_msg_id] = {
-                bytes(target, 'utf8'): None for target in targets
+                target.encode('utf8'): None for target in targets
             }
+            self.accumulated_targets[original_msg_id] = targets
 
         for target in targets:
             new_msg = self.append_new_msg_id_to_msg(
@@ -44,11 +49,6 @@ def send_to_targets(self, msg, original_msg_id, targets, idents, is_coalescing):
     def send_to_sub_schedulers(
         self, msg, original_msg_id, targets, idents, is_coalescing
     ):
-        if is_coalescing:
-            self.accumulated_replies[original_msg_id] = {
-                scheduler_id: None for scheduler_id in self.connected_sub_scheduler_ids
-            }
-
         trunc = 2 ** self.max_depth
         fmt = f"0{self.max_depth + 1}b"
 
@@ -62,10 +62,21 @@ def send_to_sub_schedulers(
             next_idx = int(path[self.depth + 1])  # 0 or 1
             targets_by_scheduler[next_idx].append(target_tuple)
 
+        if is_coalescing:
+            self.accumulated_replies[original_msg_id] = {
+                scheduler_id: None for scheduler_id in self.connected_sub_scheduler_ids
+            }
+            self.accumulated_targets[original_msg_id] = {}
+
         for i, scheduler_id in enumerate(self.connected_sub_scheduler_ids):
             targets_for_scheduler = targets_by_scheduler[i]
-            if not targets_for_scheduler and is_coalescing:
-                del self.accumulated_replies[original_msg_id][scheduler_id]
+            if is_coalescing:
+                if targets_for_scheduler:
+                    self.accumulated_targets[original_msg_id][
+                        scheduler_id
+                    ] = targets_for_scheduler
+                else:
+                    del self.accumulated_replies[original_msg_id][scheduler_id]
             msg['metadata']['targets'] = targets_for_scheduler
 
             new_msg = self.append_new_msg_id_to_msg(
@@ -76,28 +87,36 @@ def send_to_sub_schedulers(
             )
             self.outgoing_streams[i].send_multipart(new_msg, copy=False)
 
-    def coalescing_reply(self, raw_msg, msg, original_msg_id, outgoing_id):
+    def coalescing_reply(self, raw_msg, msg, original_msg_id, outgoing_id, idents):
+        # accumulate buffers
+        self.accumulated_replies[original_msg_id][outgoing_id] = msg['buffers']
         if all(
-            msg is not None or stored_outgoing_id == outgoing_id
-            for stored_outgoing_id, msg in self.accumulated_replies[
-                original_msg_id
-            ].items()
+            msg_buffers is not None
+            for msg_buffers in self.accumulated_replies[original_msg_id].values()
         ):
-            new_msg = raw_msg[1:]
-            new_msg.extend(
-                [
-                    buffer
-                    for msg_buffers in self.accumulated_replies[
-                        original_msg_id
-                    ].values()
-                    if msg_buffers
-                    for buffer in msg_buffers
-                ]
+            replies = self.accumulated_replies.pop(original_msg_id)
+            self.log.debug(f"Coalescing {len(replies)} reply to {original_msg_id}")
+            targets = self.accumulated_targets.pop(original_msg_id)
+
+            new_msg = msg.copy()
+            # begin rebuilding message
+            # metadata['targets']
+            if self.is_leaf:
+                new_msg['metadata']['broadcast_targets'] = targets
+            else:
+                new_msg['metadata']['broadcast_targets'] = []
+
+            # avoid duplicated msg buffers
+            buffers = []
+            for sub_target, msg_buffers in replies.items():
+                buffers.extend(msg_buffers)
+                if not self.is_leaf:
+                    new_msg['metadata']['broadcast_targets'].extend(targets[sub_target])
+
+            new_raw_msg = self.session.serialize(new_msg)
+            self.client_stream.send_multipart(
+                idents + new_raw_msg + buffers, copy=False
             )
-            self.client_stream.send_multipart(new_msg, copy=False)
-            del self.accumulated_replies[original_msg_id]
-        else:
-            self.accumulated_replies[original_msg_id][outgoing_id] = msg['buffers']
 
     @util.log_errors
     def dispatch_submission(self, raw_msg):
@@ -144,7 +163,9 @@ def dispatch_result(self, raw_msg):
         original_msg_id = msg['metadata']['original_msg_id']
         is_coalescing = msg['metadata']['is_coalescing']
         if is_coalescing:
-            self.coalescing_reply(raw_msg, msg, original_msg_id, outgoing_id)
+            self.coalescing_reply(
+                raw_msg, msg, original_msg_id, outgoing_id, idents[1:]
+            )
         else:
             self.client_stream.send_multipart(raw_msg[1:], copy=False)
 
@@ -223,6 +244,7 @@ def launch_broadcast_scheduler(
         config=config,
         depth=depth,
         max_depth=max_depth,
+        name=identity,
     )
     if is_leaf:
         scheduler_args.update(engine_stream=outgoing_streams[0], is_leaf=True)
diff --git a/ipyparallel/controller/task_scheduler.py b/ipyparallel/controller/task_scheduler.py
@@ -215,7 +215,7 @@ def start(self):
             registration_notification=self._register_engine,
             unregistration_notification=self._unregister_engine,
         )
-        self.log.info("Scheduler started [%s]" % self.scheme_name)
+        self.log.info("Task scheduler started [%s]" % self.scheme_name)
         self.notifier_stream.on_recv(self.dispatch_notification)
 
     # -----------------------------------------------------------------------
diff --git a/ipyparallel/tests/test_view_broadcast.py b/ipyparallel/tests/test_view_broadcast.py
@@ -15,7 +15,7 @@ def setUp(self):
         super().setUp()
         self._broadcast_view_used = False
         # use broadcast view for direct API
-        real_direct_view = self.client.direct_view
+        real_direct_view = self.client.real_direct_view = self.client.direct_view
 
         def broadcast_or_direct(targets):
             if isinstance(targets, int):
@@ -65,3 +65,54 @@ def test_scatter_tracked(self):
 
 class TestBroadcastViewCoalescing(TestBroadcastView):
     is_coalescing = True
+
+    @pytest.mark.xfail(reason="coalescing view doesn't preserve target order")
+    def test_target_ordering(self):
+        self.minimum_engines(4)
+        ids_in_order = self.client.ids
+        dv = self.client.real_direct_view(ids_in_order)
+
+        dv.scatter('rank', ids_in_order, flatten=True, block=True)
+        assert dv['rank'] == ids_in_order
+
+        view = self.client.broadcast_view(ids_in_order, is_coalescing=True)
+        assert view['rank'] == ids_in_order
+
+        view = self.client.broadcast_view(ids_in_order[::-1], is_coalescing=True)
+        assert view['rank'] == ids_in_order[::-1]
+
+        view = self.client.broadcast_view(ids_in_order[::2], is_coalescing=True)
+        assert view['rank'] == ids_in_order[::2]
+
+        view = self.client.broadcast_view(ids_in_order[::-2], is_coalescing=True)
+        assert view['rank'] == ids_in_order[::-2]
+
+    def test_engine_metadata(self):
+        self.minimum_engines(4)
+        ids_in_order = sorted(self.client.ids)
+        dv = self.client.real_direct_view(ids_in_order)
+        dv.scatter('rank', ids_in_order, flatten=True, block=True)
+        view = self.client.broadcast_view(ids_in_order, is_coalescing=True)
+        ar = view.pull('rank', block=False)
+        result = ar.get(timeout=10)
+        assert isinstance(ar.engine_id, list)
+        assert isinstance(ar.engine_uuid, list)
+        assert result == ar.engine_id
+        assert sorted(ar.engine_id) == ids_in_order
+
+        even_ids = ids_in_order[::-2]
+        view = self.client.broadcast_view(even_ids, is_coalescing=True)
+        ar = view.pull('rank', block=False)
+        result = ar.get(timeout=10)
+        assert isinstance(ar.engine_id, list)
+        assert isinstance(ar.engine_uuid, list)
+        assert result == ar.engine_id
+        assert sorted(ar.engine_id) == sorted(even_ids)
+
+    @pytest.mark.xfail(reason="displaypub ordering not preserved")
+    def test_apply_displaypub(self):
+        pass
+
+
+# FIXME
+del TestBroadcastView
diff --git a/ipyparallel/util.py b/ipyparallel/util.py
@@ -427,7 +427,7 @@ def local_logger(logname, loglevel=logging.DEBUG):
     handler = logging.StreamHandler()
     handler.setLevel(loglevel)
     formatter = logging.Formatter(
-        "%(asctime)s.%(msecs).03d [%(levelname)1.1s %(name)s] %(message)s",
+        "%(asctime)s.%(msecs).03d [%(name)s] %(message)s",
         datefmt="%Y-%m-%d %H:%M:%S",
     )
     handler.setFormatter(formatter)

Original file line number	Diff line number	Diff line change
`@@ -215,7 +215,7 @@ def start(self):`
`215`	`215`	`registration_notification=self._register_engine,`
`216`	`216`	`unregistration_notification=self._unregister_engine,`
`217`	`217`	`)`
`218`		`- self.log.info("Scheduler started [%s]" % self.scheme_name)`
	`218`	`+ self.log.info("Task scheduler started [%s]" % self.scheme_name)`
`219`	`219`	`self.notifier_stream.on_recv(self.dispatch_notification)`
`220`	`220`
`221`	`221`	`# -----------------------------------------------------------------------`
Original file line number	Diff line number	Diff line change
`@@ -427,7 +427,7 @@ def local_logger(logname, loglevel=logging.DEBUG):`
`427`	`427`	`handler = logging.StreamHandler()`
`428`	`428`	`handler.setLevel(loglevel)`
`429`	`429`	`formatter = logging.Formatter(`
`430`		`- "%(asctime)s.%(msecs).03d [%(levelname)1.1s %(name)s] %(message)s",`
	`430`	`+ "%(asctime)s.%(msecs).03d [%(name)s] %(message)s",`
`431`	`431`	`datefmt="%Y-%m-%d %H:%M:%S",`
`432`	`432`	`)`
`433`	`433`	`handler.setFormatter(formatter)`