Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/19232.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix `HomeServer.shutdown()` failing if the homeserver failed to `start`.
74 changes: 44 additions & 30 deletions synapse/app/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,39 +447,53 @@ def listen_http(
hs=hs,
)

if isinstance(listener_config, TCPListenerConfig):
if listener_config.is_tls():
# refresh_certificate should have been called before this.
assert context_factory is not None
ports = listen_ssl(
listener_config.bind_addresses,
listener_config.port,
site,
context_factory,
reactor=reactor,
try:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Best to review this part with the "Hide whitespace" option when viewing the diff

if isinstance(listener_config, TCPListenerConfig):
if listener_config.is_tls():
# refresh_certificate should have been called before this.
assert context_factory is not None
ports = listen_ssl(
listener_config.bind_addresses,
listener_config.port,
site,
context_factory,
reactor=reactor,
)
logger.info(
"Synapse now listening on TCP port %d (TLS)", listener_config.port
)
else:
ports = listen_tcp(
listener_config.bind_addresses,
listener_config.port,
site,
reactor=reactor,
)
logger.info(
"Synapse now listening on TCP port %d", listener_config.port
)

else:
ports = listen_unix(
listener_config.path, listener_config.mode, site, reactor=reactor
)
# getHost() returns a UNIXAddress which contains an instance variable of 'name'
# encoded as a byte string. Decode as utf-8 so pretty.
logger.info(
"Synapse now listening on TCP port %d (TLS)", listener_config.port
)
else:
ports = listen_tcp(
listener_config.bind_addresses,
listener_config.port,
site,
reactor=reactor,
"Synapse now listening on Unix Socket at: %s",
ports[0].getHost().name.decode("utf-8"),
)
logger.info("Synapse now listening on TCP port %d", listener_config.port)

else:
ports = listen_unix(
listener_config.path, listener_config.mode, site, reactor=reactor
)
# getHost() returns a UNIXAddress which contains an instance variable of 'name'
# encoded as a byte string. Decode as utf-8 so pretty.
logger.info(
"Synapse now listening on Unix Socket at: %s",
ports[0].getHost().name.decode("utf-8"),
)
except Exception as exc:
# The Twisted interface says that "Users should not call this function
# themselves!" but this appears to be the correct way handle proper cleanup of
# the site when things go wrong. In the normal case, a `Port` is created which
# we can call `Port.stopListening()` on to do the same thing (but no `Port` is
# created when an error occurs).
#
# We use `site.stopFactory()` instead of `site.doStop()` as the latter assumes
# that `site.doStart()` was called (which won't be the case if an error occurs).
site.stopFactory()
raise Exception("asdf failed to listen") from exc

return ports

Expand Down
7 changes: 7 additions & 0 deletions synapse/http/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,13 @@ def stopFactory(self) -> None:
protocol.transport.loseConnection()
self.connections.clear()

# Replace the resource tree with an empty resource to break circular references
# to the resource tree which holds a bunch of homeserver references. This is
# important if we try to call `hs.shutdown()` after `start` fails. For some
# reason, this doesn't seem to be necessary in the normal case where `start`
# succeeds and we call `hs.shutdown()` later.
self.resource = Resource()
Comment on lines +818 to +823
Copy link
Contributor Author

@MadLittleMods MadLittleMods Nov 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've spent too long trying to figure out why this works exactly. Specifically, why the normal case works without this change but the error case requires it.

The internal references to hs should be the same between the normal and error cases. And we have even less external references to SynapseSite since it just gets orphaned in this function context in the error case and drops away as soon as we handle the traceback/exception in the layers above.

In the normal case, Port holds a reference to the site and then when we call Port.stopListening(), it calls site.stopFactory() down the line just like we're doing in the error case now. But in the error case, it doesn't work without this additional change to sever the circular references.

I've looked through the Twisted internals to try to spot the difference but was unsuccessful. Also tried throwing an LLM at the problem but they were also unable to spot anything.

In any case, clearing circular references in these kinds of callbacks are pretty normal. For example, it's even called out in the HTTPChannel.connectionLost docstring. twisted.web.server.Site.stopFactory describes it as:

This can be overridden to perform 'shutdown' tasks such as disconnecting database connections, closing files, etc.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to have this + the comment saying we don't understand why it's necessary only sometimes.

Replacing your inners with empty values on shutdown/destruction is totally fine as a practice to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I'm assuming the current comment is sufficient)


def log(self, request: SynapseRequest) -> None: # type: ignore[override]
pass

Expand Down
14 changes: 14 additions & 0 deletions tests/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,20 @@ class ThreadPool:
See twisted.python.threadpool.ThreadPool
"""

min = 1
"""
"minimum number of threads in the pool"

This is a "threadless" thread pool, so we always have one thread.
"""

max = 1
"""
"maximum number of threads in the pool"

This is a "threadless" thread pool, so we always have one thread.
"""

def __init__(self, reactor: IReactorTime):
self._reactor = reactor

Expand Down
Loading