diff --git a/Modules/Sources/NetworkingCore/Network/AlamofireNetworkErrorHandler.swift b/Modules/Sources/NetworkingCore/Network/AlamofireNetworkErrorHandler.swift index 3cde0cc24b4..5a86be6b522 100644 --- a/Modules/Sources/NetworkingCore/Network/AlamofireNetworkErrorHandler.swift +++ b/Modules/Sources/NetworkingCore/Network/AlamofireNetworkErrorHandler.swift @@ -4,6 +4,8 @@ import Alamofire /// Thread-safe handler for network error tracking and retry logic final class AlamofireNetworkErrorHandler { private let queue = DispatchQueue(label: "com.networkingcore.errorhandler", attributes: .concurrent) + /// Serial queue for UserDefaults operations to prevent race conditions while avoiding deadlocks + private let userDefaultsQueue = DispatchQueue(label: "com.networkingcore.errorhandler.userdefaults") private let userDefaults: UserDefaults private let credentials: Credentials? private let notificationCenter: NotificationCenter @@ -162,7 +164,11 @@ final class AlamofireNetworkErrorHandler { } func flagSiteAsUnsupported(for siteID: Int64, flow: RequestFlow, cause: AppPasswordFlagCause, error: Error) { - queue.sync(flags: .barrier) { + // Use dedicated serial queue for UserDefaults operations to: + // 1. Prevent race conditions where concurrent writes overwrite each other + // 2. Avoid deadlock by not using the main queue that KVO observers may need + userDefaultsQueue.sync { [weak self] in + guard let self else { return } var currentList = userDefaults.applicationPasswordUnsupportedList currentList[String(siteID)] = Date() userDefaults.applicationPasswordUnsupportedList = currentList @@ -233,11 +239,16 @@ private extension AlamofireNetworkErrorHandler { } func clearUnsupportedFlag(for siteID: Int64) { - queue.sync(flags: .barrier) { + // Use dedicated serial queue for UserDefaults operations to: + // 1. Prevent race conditions where concurrent writes overwrite each other + // 2. Avoid deadlock by not using the main queue that KVO observers may need + userDefaultsQueue.sync { [weak self] in + guard let self else { return } let currentList = userDefaults.applicationPasswordUnsupportedList - userDefaults.applicationPasswordUnsupportedList = currentList.filter { flag in + let filteredList = currentList.filter { flag in flag.key != String(siteID) } + userDefaults.applicationPasswordUnsupportedList = filteredList } } diff --git a/Modules/Tests/NetworkingTests/Network/AlamofireNetworkErrorHandlerTests.swift b/Modules/Tests/NetworkingTests/Network/AlamofireNetworkErrorHandlerTests.swift index 1b6b7278cf6..3fdc113337b 100644 --- a/Modules/Tests/NetworkingTests/Network/AlamofireNetworkErrorHandlerTests.swift +++ b/Modules/Tests/NetworkingTests/Network/AlamofireNetworkErrorHandlerTests.swift @@ -606,6 +606,109 @@ final class AlamofireNetworkErrorHandlerTests: XCTestCase { // Then - no crashes should occur (especially no EXC_BREAKPOINT from array index out of bounds) wait(for: [expectation], timeout: 5.0) } + + // MARK: - Deadlock Regression Test + + func test_no_deadlock_when_kvo_observer_triggers_during_flagSiteAsUnsupported() { + // This test reproduces the exact deadlock scenario from the production crash: + // 1. flagSiteAsUnsupported writes to UserDefaults + // 2. UserDefaults triggers KVO notification synchronously + // 3. KVO observer calls prepareAppPasswordSupport + // 4. prepareAppPasswordSupport accesses appPasswordFailures + // + // BEFORE FIX: This would deadlock because: + // - flagSiteAsUnsupported used queue.sync(flags: .barrier) around UserDefaults write + // - KVO fired synchronously during the barrier + // - prepareAppPasswordSupport tried queue.sync while barrier was still active + // + // AFTER FIX: No deadlock because: + // - flagSiteAsUnsupported uses userDefaultsQueue.async for UserDefaults write + // - KVO fires on userDefaultsQueue, not the main queue + // - prepareAppPasswordSupport can safely use queue.sync on the main queue + + // Given - Set up KVO observer to simulate the production scenario + let siteID: Int64 = 12345 + let kvoTriggered = XCTestExpectation(description: "KVO observer triggered") + let preparePasswordSupportCalled = XCTestExpectation(description: "prepareAppPasswordSupport called from KVO") + let operationCompleted = XCTestExpectation(description: "Operation completed without deadlock") + + var kvoObservation: NSKeyValueObservation? + kvoObservation = userDefaults.observe(\.applicationPasswordUnsupportedList, options: [.new]) { [weak self] _, _ in + kvoTriggered.fulfill() + + // Simulate what happens in production: + // AlamofireNetwork.observeSelectedSite gets triggered by KVO + // and calls prepareAppPasswordSupport + self?.errorHandler.prepareAppPasswordSupport(for: siteID) + preparePasswordSupportCalled.fulfill() + } + + // When - Trigger the scenario that caused the deadlock + DispatchQueue.global().async { + // This will write to UserDefaults, triggering KVO + self.errorHandler.flagSiteAsUnsupported( + for: siteID, + flow: .apiRequest, + cause: .majorError, + error: NetworkError.notFound(response: nil) + ) + operationCompleted.fulfill() + } + + // Then - All expectations should complete without timing out (no deadlock) + // The timeout of 2 seconds is generous - if there's a deadlock, this will timeout + let result = XCTWaiter.wait( + for: [kvoTriggered, preparePasswordSupportCalled, operationCompleted], + timeout: 2.0, + enforceOrder: false + ) + + XCTAssertEqual(result, .completed, "Test should complete without deadlock. If this times out, the deadlock bug has returned!") + + // Cleanup + kvoObservation?.invalidate() + } + + func test_no_deadlock_with_concurrent_kvo_observers_and_flag_operations() { + // This test creates even more stress by having multiple KVO observers + // and concurrent flag operations to ensure the fix is robust + + let completionExpectation = XCTestExpectation(description: "All operations complete") + completionExpectation.expectedFulfillmentCount = 10 // 10 flag operations + + var observations: [NSKeyValueObservation] = [] + + // Set up multiple KVO observers (simulating multiple parts of the app observing) + for i in 1...3 { + let observation = userDefaults.observe(\.applicationPasswordUnsupportedList, options: [.new]) { [weak self] _, _ in + let siteID = Int64(1000 + i) + // Each observer tries to access the error handler + self?.errorHandler.prepareAppPasswordSupport(for: siteID) + } + observations.append(observation) + } + + // When - Perform multiple concurrent operations that trigger KVO + for i in 0..<10 { + DispatchQueue.global().async { + let siteID = Int64(i) + self.errorHandler.flagSiteAsUnsupported( + for: siteID, + flow: .apiRequest, + cause: .majorError, + error: NetworkError.notFound(response: nil) + ) + completionExpectation.fulfill() + } + } + + // Then - Should complete without deadlock + let result = XCTWaiter.wait(for: [completionExpectation], timeout: 3.0) + XCTAssertEqual(result, .completed, "Concurrent operations with multiple KVO observers should not deadlock") + + // Cleanup + observations.forEach { $0.invalidate() } + } } // MARK: - Helper Methods