Skip to content

Commit 83023ce

Browse files
Be able to shutdown homeserver that failed to start (#19232)
For example, a homeserver can fail to `start` if the port is already in use or the port number is invalid (not 0-65535) Fix #19189 Follow-up to #18828 ### Background As part of Element's plan to support a light form of vhosting (virtual host) (multiple instances of Synapse in the same Python process) (c.f [Synapse Pro for small hosts](https://docs.element.io/latest/element-server-suite-pro/synapse-pro-for-small-hosts/overview/)), we're currently diving into the details and implications of running multiple instances of Synapse in the same Python process. "Clean tenant deprovisioning" tracked internally by element-hq/synapse-small-hosts#50
1 parent 3931667 commit 83023ce

File tree

3 files changed

+91
-31
lines changed

3 files changed

+91
-31
lines changed

changelog.d/19232.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix `HomeServer.shutdown()` failing if the homeserver failed to `start`.

synapse/app/_base.py

Lines changed: 83 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from wsgiref.simple_server import WSGIServer
4242

4343
from cryptography.utils import CryptographyDeprecationWarning
44-
from typing_extensions import ParamSpec
44+
from typing_extensions import ParamSpec, assert_never
4545

4646
import twisted
4747
from twisted.internet import defer, error, reactor as _reactor
@@ -64,7 +64,12 @@
6464
from synapse.config import ConfigError
6565
from synapse.config._base import format_config_error
6666
from synapse.config.homeserver import HomeServerConfig
67-
from synapse.config.server import ListenerConfig, ManholeConfig, TCPListenerConfig
67+
from synapse.config.server import (
68+
ListenerConfig,
69+
ManholeConfig,
70+
TCPListenerConfig,
71+
UnixListenerConfig,
72+
)
6873
from synapse.crypto import context_factory
6974
from synapse.events.auto_accept_invites import InviteAutoAccepter
7075
from synapse.events.presence_router import load_legacy_presence_router
@@ -413,6 +418,37 @@ def listen_unix(
413418
]
414419

415420

421+
class ListenerException(RuntimeError):
422+
"""
423+
An exception raised when we fail to listen with the given `ListenerConfig`.
424+
425+
Attributes:
426+
listener_config: The listener config that caused the exception.
427+
"""
428+
429+
def __init__(
430+
self,
431+
listener_config: ListenerConfig,
432+
):
433+
listener_human_name = ""
434+
port = ""
435+
if isinstance(listener_config, TCPListenerConfig):
436+
listener_human_name = "TCP port"
437+
port = str(listener_config.port)
438+
elif isinstance(listener_config, UnixListenerConfig):
439+
listener_human_name = "unix socket"
440+
port = listener_config.path
441+
else:
442+
assert_never(listener_config)
443+
444+
super().__init__(
445+
"Failed to listen on %s (%s) with the given listener config: %s"
446+
% (listener_human_name, port, listener_config)
447+
)
448+
449+
self.listener_config = listener_config
450+
451+
416452
def listen_http(
417453
hs: "HomeServer",
418454
listener_config: ListenerConfig,
@@ -447,39 +483,55 @@ def listen_http(
447483
hs=hs,
448484
)
449485

450-
if isinstance(listener_config, TCPListenerConfig):
451-
if listener_config.is_tls():
452-
# refresh_certificate should have been called before this.
453-
assert context_factory is not None
454-
ports = listen_ssl(
455-
listener_config.bind_addresses,
456-
listener_config.port,
457-
site,
458-
context_factory,
459-
reactor=reactor,
486+
try:
487+
if isinstance(listener_config, TCPListenerConfig):
488+
if listener_config.is_tls():
489+
# refresh_certificate should have been called before this.
490+
assert context_factory is not None
491+
ports = listen_ssl(
492+
listener_config.bind_addresses,
493+
listener_config.port,
494+
site,
495+
context_factory,
496+
reactor=reactor,
497+
)
498+
logger.info(
499+
"Synapse now listening on TCP port %d (TLS)", listener_config.port
500+
)
501+
else:
502+
ports = listen_tcp(
503+
listener_config.bind_addresses,
504+
listener_config.port,
505+
site,
506+
reactor=reactor,
507+
)
508+
logger.info(
509+
"Synapse now listening on TCP port %d", listener_config.port
510+
)
511+
512+
elif isinstance(listener_config, UnixListenerConfig):
513+
ports = listen_unix(
514+
listener_config.path, listener_config.mode, site, reactor=reactor
460515
)
516+
# getHost() returns a UNIXAddress which contains an instance variable of 'name'
517+
# encoded as a byte string. Decode as utf-8 so pretty.
461518
logger.info(
462-
"Synapse now listening on TCP port %d (TLS)", listener_config.port
519+
"Synapse now listening on Unix Socket at: %s",
520+
ports[0].getHost().name.decode("utf-8"),
463521
)
464522
else:
465-
ports = listen_tcp(
466-
listener_config.bind_addresses,
467-
listener_config.port,
468-
site,
469-
reactor=reactor,
470-
)
471-
logger.info("Synapse now listening on TCP port %d", listener_config.port)
472-
473-
else:
474-
ports = listen_unix(
475-
listener_config.path, listener_config.mode, site, reactor=reactor
476-
)
477-
# getHost() returns a UNIXAddress which contains an instance variable of 'name'
478-
# encoded as a byte string. Decode as utf-8 so pretty.
479-
logger.info(
480-
"Synapse now listening on Unix Socket at: %s",
481-
ports[0].getHost().name.decode("utf-8"),
482-
)
523+
assert_never(listener_config)
524+
except Exception as exc:
525+
# The Twisted interface says that "Users should not call this function
526+
# themselves!" but this appears to be the correct/only way handle proper cleanup
527+
# of the site when things go wrong. In the normal case, a `Port` is created
528+
# which we can call `Port.stopListening()` on to do the same thing (but no
529+
# `Port` is created when an error occurs).
530+
#
531+
# We use `site.stopFactory()` instead of `site.doStop()` as the latter assumes
532+
# that `site.doStart()` was called (which won't be the case if an error occurs).
533+
site.stopFactory()
534+
raise ListenerException(listener_config) from exc
483535

484536
return ports
485537

synapse/http/site.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,13 @@ def stopFactory(self) -> None:
815815
protocol.transport.loseConnection()
816816
self.connections.clear()
817817

818+
# Replace the resource tree with an empty resource to break circular references
819+
# to the resource tree which holds a bunch of homeserver references. This is
820+
# important if we try to call `hs.shutdown()` after `start` fails. For some
821+
# reason, this doesn't seem to be necessary in the normal case where `start`
822+
# succeeds and we call `hs.shutdown()` later.
823+
self.resource = Resource()
824+
818825
def log(self, request: SynapseRequest) -> None: # type: ignore[override]
819826
pass
820827

0 commit comments

Comments
 (0)