From b3e03f72d4df8c37ea1f0ced4dff6b9449b7e1ac Mon Sep 17 00:00:00 2001 From: jack Date: Wed, 10 Dec 2025 19:19:03 -1000 Subject: [PATCH 1/2] Fix threading issues in BLEService and NoiseSessionManager - Fix isAppActive race condition by dispatching notification handlers to bleQueue - Ensure BLE operations (stopScan, startScanning) execute on bleQueue - Add re-entrancy detection to NoiseSessionManager to prevent deadlocks - Add assertions for barrier operations to catch threading issues in debug builds --- bitchat/Noise/NoiseSessionManager.swift | 58 +++++++++++++++++-------- bitchat/Services/BLE/BLEService.swift | 30 ++++++++----- 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/bitchat/Noise/NoiseSessionManager.swift b/bitchat/Noise/NoiseSessionManager.swift index 92ccf9d6a..12b2e8e96 100644 --- a/bitchat/Noise/NoiseSessionManager.swift +++ b/bitchat/Noise/NoiseSessionManager.swift @@ -15,25 +15,37 @@ final class NoiseSessionManager { private let localStaticKey: Curve25519.KeyAgreement.PrivateKey private let keychain: KeychainManagerProtocol private let managerQueue = DispatchQueue(label: "chat.bitchat.noise.manager", attributes: .concurrent) - + private let managerQueueKey = DispatchSpecificKey() + // Callbacks var onSessionEstablished: ((PeerID, Curve25519.KeyAgreement.PublicKey) -> Void)? var onSessionFailed: ((PeerID, Error) -> Void)? - + init(localStaticKey: Curve25519.KeyAgreement.PrivateKey, keychain: KeychainManagerProtocol) { self.localStaticKey = localStaticKey self.keychain = keychain + managerQueue.setSpecific(key: managerQueueKey, value: ()) + } + + /// Check if currently executing on the manager queue (for debugging re-entrancy issues) + private var isOnManagerQueue: Bool { + DispatchQueue.getSpecific(key: managerQueueKey) != nil } // MARK: - Session Management - + func getSession(for peerID: PeerID) -> NoiseSession? { + // Re-entrancy check: if already on managerQueue, access directly to avoid deadlock + if isOnManagerQueue { + return sessions[peerID] + } return managerQueue.sync { return sessions[peerID] } } - + func removeSession(for peerID: PeerID) { + assert(!isOnManagerQueue, "removeSession called from managerQueue - potential deadlock") managerQueue.sync(flags: .barrier) { if let session = sessions.removeValue(forKey: peerID) { session.reset() // Clear sensitive data before removing @@ -42,6 +54,7 @@ final class NoiseSessionManager { } func removeAllSessions() { + assert(!isOnManagerQueue, "removeAllSessions called from managerQueue - potential deadlock") managerQueue.sync(flags: .barrier) { for (_, session) in sessions { session.reset() @@ -51,8 +64,9 @@ final class NoiseSessionManager { } // MARK: - Handshake Helpers - + func initiateHandshake(with peerID: PeerID) throws -> Data { + assert(!isOnManagerQueue, "initiateHandshake called from managerQueue - potential deadlock") return try managerQueue.sync(flags: .barrier) { // Check if we already have an established session if let existingSession = sessions[peerID], existingSession.isEstablished() { @@ -87,6 +101,7 @@ final class NoiseSessionManager { } func handleIncomingHandshake(from peerID: PeerID, message: Data) throws -> Data? { + assert(!isOnManagerQueue, "handleIncomingHandshake called from managerQueue - potential deadlock") // Process everything within the synchronized block to prevent race conditions return try managerQueue.sync(flags: .barrier) { var shouldCreateNew = false @@ -184,27 +199,34 @@ final class NoiseSessionManager { } // MARK: - Session Rekeying - + func getSessionsNeedingRekey() -> [(peerID: PeerID, needsRekey: Bool)] { + // Re-entrancy check: if already on managerQueue, access directly to avoid deadlock + if isOnManagerQueue { + return computeSessionsNeedingRekey() + } return managerQueue.sync { - var needingRekey: [(peerID: PeerID, needsRekey: Bool)] = [] - - for (peerID, session) in sessions { - if let secureSession = session as? SecureNoiseSession, - secureSession.isEstablished(), - secureSession.needsRenegotiation() { - needingRekey.append((peerID: peerID, needsRekey: true)) - } + return computeSessionsNeedingRekey() + } + } + + private func computeSessionsNeedingRekey() -> [(peerID: PeerID, needsRekey: Bool)] { + var needingRekey: [(peerID: PeerID, needsRekey: Bool)] = [] + for (peerID, session) in sessions { + if let secureSession = session as? SecureNoiseSession, + secureSession.isEstablished(), + secureSession.needsRenegotiation() { + needingRekey.append((peerID: peerID, needsRekey: true)) } - - return needingRekey } + return needingRekey } - + func initiateRekey(for peerID: PeerID) throws { + assert(!isOnManagerQueue, "initiateRekey called from managerQueue - potential deadlock") // Remove old session removeSession(for: peerID) - + // Initiate new handshake _ = try initiateHandshake(with: peerID) } diff --git a/bitchat/Services/BLE/BLEService.swift b/bitchat/Services/BLE/BLEService.swift index 50a170ffb..3197513e2 100644 --- a/bitchat/Services/BLE/BLEService.swift +++ b/bitchat/Services/BLE/BLEService.swift @@ -2713,23 +2713,31 @@ extension BLEService { #if os(iOS) @objc private func appDidBecomeActive() { - isAppActive = true - // Restart scanning with allow duplicates when app becomes active - if centralManager?.state == .poweredOn { - centralManager?.stopScan() - startScanning() + // Dispatch to bleQueue for thread-safe access to isAppActive and BLE operations + bleQueue.async { [weak self] in + guard let self = self else { return } + self.isAppActive = true + // Restart scanning with allow duplicates when app becomes active + if self.centralManager?.state == .poweredOn { + self.centralManager?.stopScan() + self.startScanning() + } } logBluetoothStatus("became-active") scheduleBluetoothStatusSample(after: 5.0, context: "active-5s") // No Local Name; nothing to refresh for advertising policy } - + @objc private func appDidEnterBackground() { - isAppActive = false - // Restart scanning without allow duplicates in background - if centralManager?.state == .poweredOn { - centralManager?.stopScan() - startScanning() + // Dispatch to bleQueue for thread-safe access to isAppActive and BLE operations + bleQueue.async { [weak self] in + guard let self = self else { return } + self.isAppActive = false + // Restart scanning without allow duplicates in background + if self.centralManager?.state == .poweredOn { + self.centralManager?.stopScan() + self.startScanning() + } } logBluetoothStatus("entered-background") scheduleBluetoothStatusSample(after: 15.0, context: "background-15s") From 8b9c056bdf4e078d2f6c5b61298f44dc41ee5230 Mon Sep 17 00:00:00 2001 From: jack Date: Wed, 10 Dec 2025 19:35:55 -1000 Subject: [PATCH 2/2] Fix flaky FragmentationTests by increasing timeouts for CI --- .../Fragmentation/FragmentationTests.swift | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/bitchatTests/Fragmentation/FragmentationTests.swift b/bitchatTests/Fragmentation/FragmentationTests.swift index 1632f9ad4..0588f26ba 100644 --- a/bitchatTests/Fragmentation/FragmentationTests.swift +++ b/bitchatTests/Fragmentation/FragmentationTests.swift @@ -45,16 +45,16 @@ struct FragmentationTests { // Inject fragments spaced out to avoid concurrent mutation inside BLEService for (i, fragment) in shuffled.enumerated() { - let delay = 5 * Double(i) * 0.001 + let delay = 10 * Double(i) * 0.001 Task { try await sleep(delay) ble._test_handlePacket(fragment, fromPeerID: remoteShortID) } } - - // Allow async processing - try await sleep(0.5) - + + // Allow async processing (longer timeout for CI environments) + try await sleep(1.5) + #expect(capture.publicMessages.count == 1) #expect(capture.publicMessages.first?.content.count == 3_000) } @@ -79,15 +79,15 @@ struct FragmentationTests { } for (i, fragment) in frags.enumerated() { - let delay = 5 * Double(i) * 0.001 + let delay = 10 * Double(i) * 0.001 Task { try await sleep(delay) ble._test_handlePacket(fragment, fromPeerID: remoteShortID) } } - - // Allow async processing - try await sleep(0.5) + + // Allow async processing (longer timeout for CI environments) + try await sleep(1.5) #expect(capture.publicMessages.count == 1) #expect(capture.publicMessages.first?.content.count == 2048) @@ -180,15 +180,15 @@ struct FragmentationTests { } for (i, fragment) in corrupted.enumerated() { - let delay = 5 * Double(i) * 0.001 + let delay = 10 * Double(i) * 0.001 Task { try await sleep(delay) ble._test_handlePacket(fragment, fromPeerID: remoteShortID) } } - - // Allow async processing - try await sleep(0.5) + + // Allow async processing (longer timeout for CI environments) + try await sleep(1.5) // Should not deliver since one fragment is invalid and reassembly can't complete #expect(capture.publicMessages.isEmpty)