diff --git a/beacon_chain/beacon_node.nim b/beacon_chain/beacon_node.nim index bb989f2d36..c1fcfd8e37 100644 --- a/beacon_chain/beacon_node.nim +++ b/beacon_chain/beacon_node.nim @@ -14,6 +14,7 @@ import # Nimble packages chronos, presto, bearssl/rand, + metrics, metrics/chronos_httpserver, # Local modules "."/[beacon_clock, beacon_chain_db, conf, light_client], @@ -86,6 +87,7 @@ type elManager*: ELManager restServer*: RestServerRef keymanagerHost*: ref KeymanagerHost + metricsServer*: Opt[MetricsHttpServerRef] keymanagerServer*: RestServerRef keystoreCache*: KeystoreCacheRef eventBus*: EventBus diff --git a/beacon_chain/nimbus_beacon_node.nim b/beacon_chain/nimbus_beacon_node.nim index c6a97c6d24..ac749c849b 100644 --- a/beacon_chain/nimbus_beacon_node.nim +++ b/beacon_chain/nimbus_beacon_node.nim @@ -8,7 +8,7 @@ {.push raises: [].} import - std/[os, random, terminal, times], + std/[os, random, terminal, times, exitprocs], chronos, chronicles, metrics, metrics/chronos_httpserver, stew/[byteutils, io2], @@ -2111,6 +2111,8 @@ proc stop(node: BeaconNode) = except CatchableError as exc: warn "Couldn't stop network", msg = exc.msg + waitFor node.metricsServer.stopMetricsServer() + node.attachedValidators[].slashingProtection.close() node.attachedValidators[].close() node.db.close() @@ -2166,7 +2168,7 @@ var gPidFile: string proc createPidFile(filename: string) {.raises: [IOError].} = writeFile filename, $os.getCurrentProcessId() gPidFile = filename - addQuitProc proc {.noconv.} = discard io2.removeFile(gPidFile) + addExitProc proc {.noconv.} = discard io2.removeFile(gPidFile) proc initializeNetworking(node: BeaconNode) {.async.} = node.installMessageValidators() @@ -2378,16 +2380,8 @@ proc doRunBeaconNode(config: var BeaconNodeConf, rng: ref HmacDrbgContext) {.rai config.createDumpDirs() - if config.metricsEnabled: - let metricsAddress = config.metricsAddress - notice "Starting metrics HTTP server", - url = "http://" & $metricsAddress & ":" & $config.metricsPort & "/metrics" - try: - startMetricsHttpServer($metricsAddress, config.metricsPort) - except CatchableError as exc: - raise exc - except Exception as exc: - raiseAssert exc.msg # TODO fix metrics + let metricsServer = (waitFor config.initMetricsServer()).valueOr: + return # Nim GC metrics (for the main thread) will be collected in onSecond(), but # we disable piggy-backing on other metrics here. @@ -2435,6 +2429,8 @@ proc doRunBeaconNode(config: var BeaconNodeConf, rng: ref HmacDrbgContext) {.rai let node = waitFor BeaconNode.init(rng, config, metadata) + node.metricsServer = metricsServer + if bnStatus == BeaconNodeStatus.Stopping: return diff --git a/beacon_chain/nimbus_binary_common.nim b/beacon_chain/nimbus_binary_common.nim index a5acbd1596..6eea868a1d 100644 --- a/beacon_chain/nimbus_binary_common.nim +++ b/beacon_chain/nimbus_binary_common.nim @@ -1,5 +1,5 @@ # beacon_chain -# Copyright (c) 2018-2024 Status Research & Development GmbH +# Copyright (c) 2018-2025 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). @@ -16,7 +16,7 @@ import # Nimble packages chronos, confutils, presto, toml_serialization, metrics, chronicles, chronicles/helpers as chroniclesHelpers, chronicles/topics_registry, - stew/io2, + stew/io2, metrics, metrics/chronos_httpserver, # Local modules ./spec/[helpers, keystore], @@ -448,6 +448,40 @@ proc initKeymanagerServer*( KeymanagerInitResult(server: keymanagerServer, token: token) +proc initMetricsServer*( + config: AnyConf +): Future[Result[Opt[MetricsHttpServerRef], string]] {. + async: (raises: [CancelledError]).} = + if config.metricsEnabled: + let + metricsAddress = config.metricsAddress + metricsPort = config.metricsPort + url = "http://" & $metricsAddress & ":" & $metricsPort & "/metrics" + + info "Starting metrics HTTP server", url = url + + let server = MetricsHttpServerRef.new($metricsAddress, metricsPort).valueOr: + fatal "Could not start metrics HTTP server", + url = url, reason = error + return err($error) + + try: + await server.start() + except MetricsError as exc: + fatal "Could not start metrics HTTP server", + url = url, reason = exc.msg + return err(exc.msg) + + ok(Opt.some(server)) + else: + ok(Opt.none(MetricsHttpServerRef)) + +proc stopMetricsServer*(v: Opt[MetricsHttpServerRef]) {. + async: (raises: []).} = + if v.isSome(): + info "Shutting down metrics HTTP server" + await v.get().close() + proc quitDoppelganger*() = # Avoid colliding with # https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Exit%20Codes diff --git a/beacon_chain/nimbus_signing_node.nim b/beacon_chain/nimbus_signing_node.nim index 0c0cfca2aa..3d357fa91c 100644 --- a/beacon_chain/nimbus_signing_node.nim +++ b/beacon_chain/nimbus_signing_node.nim @@ -1,5 +1,5 @@ # nimbus_signing_node -# Copyright (c) 2018-2024 Status Research & Development GmbH +# Copyright (c) 2018-2025 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). @@ -414,7 +414,7 @@ proc asyncInit(sn: SigningNodeRef) {.async: (raises: [SigningNodeError]).} = raise newException(SigningNodeError, "") SigningNodeServer(kind: SigningNodeKind.NonSecure, nserver: res.get()) -proc asyncRun*(sn: SigningNodeRef) {.async: (raises: []).} = +proc asyncRun*(sn: SigningNodeRef) {.async: (raises: [SigningNodeError]).} = sn.runKeystoreCachePruningLoopFut = runKeystoreCachePruningLoop(sn.keystoreCache) sn.installApiHandlers() @@ -429,6 +429,11 @@ proc asyncRun*(sn: SigningNodeRef) {.async: (raises: []).} = warn "Main loop failed with unexpected error", err_name = $exc.name, reason = $exc.msg + # This is trick to fool `asyncraises` from generating warning: + # No exceptions possible with this operation, `error` always returns nil. + if false: + raise newException(SigningNodeError, "This error should never happen") + debug "Stopping main processing loop" var pending: seq[Future[void]] if not(sn.runKeystoreCachePruningLoopFut.finished()): diff --git a/beacon_chain/nimbus_validator_client.nim b/beacon_chain/nimbus_validator_client.nim index 73533ba130..843a2fef4a 100644 --- a/beacon_chain/nimbus_validator_client.nim +++ b/beacon_chain/nimbus_validator_client.nim @@ -1,5 +1,5 @@ # beacon_chain -# Copyright (c) 2018-2024 Status Research & Development GmbH +# Copyright (c) 2018-2025 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). @@ -161,38 +161,6 @@ proc initClock( current_slot = currentSlot, current_epoch = currentEpoch res -proc initMetrics( - vc: ValidatorClientRef -): Future[bool] {.async: (raises: [CancelledError]).} = - if vc.config.metricsEnabled: - let - metricsAddress = vc.config.metricsAddress - metricsPort = vc.config.metricsPort - url = "http://" & $metricsAddress & ":" & $metricsPort & "/metrics" - info "Starting metrics HTTP server", url = url - let server = - block: - let res = MetricsHttpServerRef.new($metricsAddress, metricsPort) - if res.isErr(): - error "Could not start metrics HTTP server", url = url, - error_msg = res.error() - return false - res.get() - vc.metricsServer = Opt.some(server) - try: - await server.start() - except MetricsError as exc: - error "Could not start metrics HTTP server", url = url, - error_msg = exc.msg, error_name = exc.name - return false - true - -proc shutdownMetrics(vc: ValidatorClientRef) {.async: (raises: []).} = - if vc.config.metricsEnabled: - if vc.metricsServer.isSome(): - info "Shutting down metrics HTTP server" - await vc.metricsServer.get().close() - proc shutdownSlashingProtection(vc: ValidatorClientRef) = info "Closing slashing protection", path = vc.config.validatorsDir() vc.attachedValidators[].slashingProtection.close() @@ -351,7 +319,7 @@ proc asyncInit(vc: ValidatorClientRef): Future[ValidatorClientRef] {. vc.beaconClock = await vc.initClock() - if not(await initMetrics(vc)): + vc.metricsServer = (await vc.config.initMetricsServer()).valueOr: raise newException(ValidatorClientError, "Could not initialize metrics server") @@ -368,7 +336,7 @@ proc asyncInit(vc: ValidatorClientRef): Future[ValidatorClientRef] {. vc.attachedValidators = validatorPool if not(await initValidators(vc)): - await vc.shutdownMetrics() + await vc.metricsServer.stopMetricsServer() raise newException(ValidatorClientError, "Could not initialize local validators") @@ -432,7 +400,7 @@ proc asyncInit(vc: ValidatorClientRef): Future[ValidatorClientRef] {. ) except CancelledError: debug "Initialization process interrupted" - await vc.shutdownMetrics() + await vc.metricsServer.stopMetricsServer() vc.shutdownSlashingProtection() return @@ -522,7 +490,7 @@ proc asyncRun*( except CancelledError: debug "Main loop interrupted" - await vc.shutdownMetrics() + await vc.metricsServer.stopMetricsServer() vc.shutdownSlashingProtection() if doppelEventFut.completed():