From 02787c2512618bdd47a197c089e01e7c582bf70d Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Wed, 3 Jun 2026 16:24:01 +0200 Subject: [PATCH 1/8] retry mechanism with retry breaking system --- lib/OnyxUtils.ts | 59 +++---- lib/storage/errors.ts | 76 +++++++++ .../IDBKeyValProvider/createStore.ts | 76 +++------ tests/unit/onyxUtilsTest.ts | 16 +- .../unit/storage/providers/createStoreTest.ts | 158 +++++++++++------- 5 files changed, 227 insertions(+), 158 deletions(-) create mode 100644 lib/storage/errors.ts diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index c5c277cd7..dbf166cb4 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -6,8 +6,8 @@ import * as Logger from './Logger'; import type Onyx from './Onyx'; import cache, {TASK} from './OnyxCache'; import OnyxKeys from './OnyxKeys'; -import * as Str from './Str'; import Storage from './storage'; +import {StorageErrorClass, classifyStorageError} from './storage/errors'; import type { CollectionKeyBase, ConnectOptions, @@ -49,26 +49,6 @@ const METHOD = { CLEAR: 'clear', } as const; -// IndexedDB errors that indicate storage capacity issues where eviction can help -const IDB_STORAGE_ERRORS = [ - 'quotaexceedederror', // Browser storage quota exceeded -] as const; - -// SQLite errors that indicate storage capacity issues where eviction can help -const SQLITE_STORAGE_ERRORS = [ - 'database or disk is full', // Device storage is full -] as const; - -const STORAGE_ERRORS = [...IDB_STORAGE_ERRORS, ...SQLITE_STORAGE_ERRORS]; - -// IndexedDB errors where retrying is futile because the underlying connection/store is broken. -// The healing path (separate from retryOperation) is responsible for recovery. -const IDB_NON_RETRIABLE_ERRORS = [ - 'internal error opening backing store', // LevelDB backing store is broken at the filesystem level -] as const; - -const NON_RETRIABLE_ERRORS = [...IDB_NON_RETRIABLE_ERRORS]; - // Max number of retries for failed storage operations const MAX_STORAGE_OPERATION_RETRY_ATTEMPTS = 5; @@ -791,8 +771,11 @@ function remove(key: TKey, isProcessingCollectionUpdate?: function reportStorageQuota(error?: Error): Promise { return Storage.getDatabaseSize() .then(({bytesUsed, bytesRemaining, usageDetails}) => { + // `bytesRemaining` comes from navigator.storage.estimate() and is an ORIGIN-WIDE estimate, + // not headroom for this database. The browser allocates IndexedDB storage dynamically, so a + // QuotaExceededError can legitimately occur even when this number still looks large. Logger.logInfo( - `Storage Quota Check -- bytesUsed: ${bytesUsed} bytesRemaining: ${bytesRemaining}${ + `Storage Quota Check -- bytesUsed: ${bytesUsed} originWideBytesRemaining (estimate, not per-DB headroom): ${bytesRemaining}${ usageDetails ? ` usageDetails: ${JSON.stringify(usageDetails)}` : '' }. Original error: ${error}`, ); @@ -803,39 +786,39 @@ function reportStorageQuota(error?: Error): Promise { } /** - * Handles storage operation failures based on the error type: - * - Storage capacity errors: evicts data and retries the operation - * - Invalid data errors: logs an alert and throws an error - * - Non-retriable errors: logs an alert and resolves without retrying - * - Other errors: retries the operation + * Handles storage operation failures based on the error class (see lib/storage/errors.ts). + * The connection layer (createStore) owns connection/transport recovery; this operation layer owns + * capacity recovery (eviction) so that a given failure is retried by exactly one layer: + * - INVALID_DATA: logs an alert and throws (the same data will always fail). + * - TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget + * and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly. + * - CAPACITY: evicts the least recently accessed evictable key and retries. + * - UNKNOWN: bounded retry. */ function retryOperation(error: Error, onyxMethod: TMethod, defaultParams: Parameters[0], retryAttempt: number | undefined): Promise { const currentRetryAttempt = retryAttempt ?? 0; const nextRetryAttempt = currentRetryAttempt + 1; + const errorClass = classifyStorageError(error); - Logger.logInfo(`Failed to save to storage. Error: ${error}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); + Logger.logInfo(`Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); - if (error && Str.startsWith(error.message, "Failed to execute 'put' on 'IDBObjectStore'")) { + if (errorClass === StorageErrorClass.INVALID_DATA) { Logger.logAlert(`Attempted to set invalid data set in Onyx. Please ensure all data is serializable. Error: ${error}`); throw error; } - const errorMessage = error?.message?.toLowerCase?.(); - const errorName = error?.name?.toLowerCase?.(); - const isStorageCapacityError = STORAGE_ERRORS.some((storageError) => errorName?.includes(storageError) || errorMessage?.includes(storageError)); - const isNonRetriableError = NON_RETRIABLE_ERRORS.some((nonRetriableError) => errorName?.includes(nonRetriableError) || errorMessage?.includes(nonRetriableError)); - - if (isNonRetriableError) { - Logger.logAlert(`Storage operation skipped retry for non-retriable error. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); + if (errorClass === StorageErrorClass.TRANSIENT || errorClass === StorageErrorClass.FATAL) { + Logger.logInfo(`Storage operation skipped retry; ${errorClass} errors are handled by the connection layer. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); return Promise.resolve(); } if (nextRetryAttempt > MAX_STORAGE_OPERATION_RETRY_ATTEMPTS) { - Logger.logAlert(`Storage operation failed after 5 retries. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); + Logger.logAlert(`Storage operation failed after ${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS} retries. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); return Promise.resolve(); } - if (!isStorageCapacityError) { + if (errorClass !== StorageErrorClass.CAPACITY) { + // UNKNOWN error — bounded retry without eviction. // @ts-expect-error No overload matches this call. return onyxMethod(defaultParams, nextRetryAttempt); } diff --git a/lib/storage/errors.ts b/lib/storage/errors.ts new file mode 100644 index 000000000..7318b5a3e --- /dev/null +++ b/lib/storage/errors.ts @@ -0,0 +1,76 @@ +import type {ValueOf} from 'type-fest'; + +/** + * Single source of truth for classifying storage (IndexedDB / SQLite) write failures. + * + * Both layers that react to storage errors consult this: + * - the connection layer (`createStore`) recovers TRANSIENT and FATAL errors by reopening the DB, and + * - the operation layer (`OnyxUtils.retryOperation`) recovers CAPACITY by eviction and retries UNKNOWN. + * + * Keeping the matchers here (instead of duplicated string lists in each layer) guarantees the two + * layers agree on what an error *is*, even though they react to it differently. This module has no + * Onyx dependencies so it can live in the storage layer without creating an import cycle. + */ +const StorageErrorClass = { + /** Connection/transport failure (stale connection). Owner: connection layer — reopen + retry once. */ + TRANSIENT: 'transient', + /** Quota exceeded / disk full. Owner: operation layer — evict and retry. */ + CAPACITY: 'capacity', + /** Non-serializable payload. Never retriable — the same data will always fail. */ + INVALID_DATA: 'invalidData', + /** Backing-store corruption. Owner: connection layer — budgeted heal, then give up. */ + FATAL: 'fatal', + /** Unmatched. Owner: operation layer — bounded retry. */ + UNKNOWN: 'unknown', +} as const; + +type StorageErrorClassValue = ValueOf; + +function getErrorParts(error: unknown): {name: string; message: string} { + if (error instanceof Error || error instanceof DOMException) { + return {name: (error.name ?? '').toLowerCase(), message: (error.message ?? '').toLowerCase()}; + } + return {name: '', message: String(error ?? '').toLowerCase()}; +} + +/** + * Classifies a storage write error into one of the {@link StorageErrorClass} buckets. + * Matching is done on the lowercased error name and message. + */ +function classifyStorageError(error: unknown): StorageErrorClassValue { + const {name, message} = getErrorParts(error); + + // Non-serializable data passed to IDBObjectStore.put — retrying is futile. + if (message.includes("failed to execute 'put' on 'idbobjectstore'")) { + return StorageErrorClass.INVALID_DATA; + } + + // Storage capacity: browser quota exceeded (IDB) or device disk full (SQLite). + if (name.includes('quotaexceedederror') || message.includes('quotaexceedederror') || message.includes('database or disk is full')) { + return StorageErrorClass.CAPACITY; + } + + // Backing-store corruption (Chromium LevelDB). Recoverable only via a budgeted reopen. + if (message.includes('internal error opening backing store')) { + return StorageErrorClass.FATAL; + } + + // Transient connection/transport failures — the cached connection is stale and a reopen fixes it: + // - InvalidStateError: connection closed between getDB() resolving and db.transaction(). + // - AbortError: write transaction aborted (connection close / versionchange / sibling abort). + // - Safari/WebKit IDB server termination for backgrounded tabs. + if ( + name.includes('invalidstateerror') || + name.includes('aborterror') || + message.includes('connection to indexed database server lost') || + message.includes('connection is closing') || + message.includes('idb write transaction aborted without an error') + ) { + return StorageErrorClass.TRANSIENT; + } + + return StorageErrorClass.UNKNOWN; +} + +export {StorageErrorClass, classifyStorageError}; +export type {StorageErrorClassValue}; diff --git a/lib/storage/providers/IDBKeyValProvider/createStore.ts b/lib/storage/providers/IDBKeyValProvider/createStore.ts index 0d3f07913..cb5b36267 100644 --- a/lib/storage/providers/IDBKeyValProvider/createStore.ts +++ b/lib/storage/providers/IDBKeyValProvider/createStore.ts @@ -1,44 +1,10 @@ import * as IDB from 'idb-keyval'; import type {UseStore} from 'idb-keyval'; import * as Logger from '../../../Logger'; +import {StorageErrorClass, classifyStorageError} from '../../errors'; const HEAL_ATTEMPTS_MAX = 3; -/** - * Detects the Chromium-specific IDB backing store corruption error. - * Fires when LevelDB files backing IndexedDB are corrupted and Chrome's - * internal recovery (RepairDB -> delete -> recreate) also fails. - */ -function isBackingStoreError(error: unknown): boolean { - return (error instanceof Error || error instanceof DOMException) && (error as Error).message.includes('Internal error opening backing store'); -} - -/** - * Detects Safari/WebKit IDB connection termination errors. - * Fires when Safari kills the IDB server process for backgrounded tabs. - * WebKit bugs: https://bugs.webkit.org/show_bug.cgi?id=197050, https://bugs.webkit.org/show_bug.cgi?id=201483 - */ -function isConnectionLostError(error: unknown): boolean { - if (!(error instanceof Error || error instanceof DOMException)) return false; - const msg = (error as Error).message.toLowerCase(); - return msg.includes('connection to indexed database server lost') || msg.includes('connection is closing'); -} - -function isInvalidStateError(error: unknown): boolean { - return (error instanceof Error || error instanceof DOMException) && (error as Error).name === 'InvalidStateError'; -} - -/** Errors that trigger a budgeted heal-and-retry in store(). */ -function isBudgetedHealError(error: unknown): boolean { - return isBackingStoreError(error) || isConnectionLostError(error); -} - -function getBudgetedHealErrorLabel(error: unknown): string { - if (isBackingStoreError(error)) return 'backing store'; - if (isConnectionLostError(error)) return 'connection lost'; - return 'unknown'; -} - // This is a copy of the createStore function from idb-keyval, we need a custom implementation // because we need to create the database manually in order to ensure that the store exists before we use it. // If the store does not exist, idb-keyval will throw an error @@ -127,22 +93,27 @@ function createStore(dbName: string, storeName: string): UseStore { return result; } - // Handles three recoverable error classes: - // 1. InvalidStateError — connection closed between getDB() resolving and db.transaction(). - // Retry once with a fresh connection. No budget limit (transient, always worth one reopen). - // 2. Backing store corruption (Chromium UnknownError) — drop cached connection and reopen. - // 3. Connection lost (Safari UnknownError) — IDB server terminated for backgrounded tabs. - // Both 2 and 3 share a heal budget (3 attempts, reset on success). - // Mirrors Dexie's PR1398_maxLoop pattern: https://github.com/dexie/Dexie.js/blob/master/src/functions/temp-transaction.ts - // Note: concurrent store() calls share the budget. Under overlapping failures each caller + // The connection layer owns recovery for connection/transport failures. It reacts per the shared + // error taxonomy (see lib/storage/errors.ts): + // - TRANSIENT (InvalidStateError, AbortError, Safari connection lost) — the cached connection is + // stale. Drop it and retry once with a fresh one. Unbudgeted: a single reopen is always worth it + // and is bounded per operation. + // - FATAL (Chromium backing-store corruption) — reopening can recover transient corruption, but + // repeating forever is futile, so the heal is budgeted (3 attempts, reset on success). + // Mirrors Dexie's PR1398_maxLoop pattern: https://github.com/dexie/Dexie.js/blob/master/src/functions/temp-transaction.ts + // - CAPACITY / UNKNOWN are NOT the connection layer's responsibility — propagate to the operation + // layer (OnyxUtils.retryOperation) without retrying here, to avoid compounding retries. + // Note: concurrent store() calls share the heal budget. Under overlapping failures each caller // decrements independently, so the budget may drain faster than one-per-incident. This is // acceptable — same as Dexie's approach — and the budget resets on any success. return (txMode, callback) => executeTransaction(txMode, callback) .then(resetHealBudget) .catch((error) => { - if (isInvalidStateError(error)) { - Logger.logInfo('IDB InvalidStateError — dropping cached connection and retrying', { + const errorClass = classifyStorageError(error); + + if (errorClass === StorageErrorClass.TRANSIENT) { + Logger.logInfo('IDB transient error — dropping cached connection and retrying once', { dbName, storeName, txMode, @@ -152,29 +123,30 @@ function createStore(dbName: string, storeName: string): UseStore { return executeTransaction(txMode, callback).then(resetHealBudget); } - if (isBudgetedHealError(error) && healAttemptsRemaining > 0) { + if (errorClass === StorageErrorClass.FATAL && healAttemptsRemaining > 0) { healAttemptsRemaining--; - const label = getBudgetedHealErrorLabel(error); - Logger.logInfo(`IDB heal: ${label} error detected — dropping cached connection and reopening (${healAttemptsRemaining} attempts left)`, { + Logger.logInfo(`IDB heal: backing store error detected — dropping cached connection and reopening (${healAttemptsRemaining} attempts left)`, { dbName, storeName, }); dbp = undefined; return executeTransaction(txMode, callback).then((result) => { - Logger.logInfo(`IDB heal: successfully recovered after ${label} error`, {dbName, storeName}); + Logger.logInfo('IDB heal: successfully recovered after backing store error', {dbName, storeName}); return resetHealBudget(result); }); } - if (isBudgetedHealError(error)) { - Logger.logAlert(`IDB heal: ${getBudgetedHealErrorLabel(error)} error — heal budget exhausted, giving up`, { + if (errorClass === StorageErrorClass.FATAL) { + Logger.logAlert('IDB heal: backing store error — heal budget exhausted, giving up', { dbName, storeName, }); } else { - Logger.logAlert('IDB error is not recoverable, giving up', { + // CAPACITY / UNKNOWN — let the operation layer decide (evict or bounded retry). + Logger.logInfo('IDB error not recoverable at the connection layer, propagating', { dbName, storeName, + errorClass, errorMessage: error instanceof Error ? error.message : String(error), }); } diff --git a/tests/unit/onyxUtilsTest.ts b/tests/unit/onyxUtilsTest.ts index 5be3b93da..d3a9c7baf 100644 --- a/tests/unit/onyxUtilsTest.ts +++ b/tests/unit/onyxUtilsTest.ts @@ -764,15 +764,17 @@ describe('OnyxUtils', () => { expect(retryOperationSpy).toHaveBeenCalledTimes(1); }); - it('should log a single skip alert for non-retriable errors', async () => { + it('should skip retry quietly (info, not alert) for fatal connection-layer errors', async () => { const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + const logInfoSpy = jest.spyOn(Logger, 'logInfo'); StorageMock.setItem = jest.fn().mockRejectedValue(nonRetriableIdbError); await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); - expect(logAlertSpy).toHaveBeenCalledWith(`Storage operation skipped retry for non-retriable error. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`); - // Not paired with the "5 retries exhausted" alert - expect(logAlertSpy).toHaveBeenCalledTimes(1); + // The connection layer (createStore) owns and alerts on fatal errors; the operation layer + // just skips the retry at info level. No alert here, and no "5 retries exhausted" alert. + expect(logInfoSpy).toHaveBeenCalledWith(`Storage operation skipped retry; fatal errors are handled by the connection layer. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should include the error in logAlert for IDBObjectStore invalid data errors', async () => { @@ -792,7 +794,7 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logAlertSpy).toHaveBeenCalledWith(`Out of storage. But found no acceptable keys to remove. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 bytesRemaining: Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`); }); it('should include usageDetails in the storage quota log when available', async () => { @@ -804,7 +806,7 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith( - `Storage Quota Check -- bytesUsed: 13289269 bytesRemaining: 5000000 usageDetails: ${JSON.stringify(usageDetails)}. Original error: ${diskFullError}`, + `Storage Quota Check -- bytesUsed: 13289269 originWideBytesRemaining (estimate, not per-DB headroom): 5000000 usageDetails: ${JSON.stringify(usageDetails)}. Original error: ${diskFullError}`, ); }); @@ -1347,7 +1349,7 @@ describe('OnyxUtils', () => { await LocalOnyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith(`Out of storage. Evicting least recently accessed key (${key1}) and retrying. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 bytesRemaining: Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`); }); }); diff --git a/tests/unit/storage/providers/createStoreTest.ts b/tests/unit/storage/providers/createStoreTest.ts index bb6530c3b..3ca68f587 100644 --- a/tests/unit/storage/providers/createStoreTest.ts +++ b/tests/unit/storage/providers/createStoreTest.ts @@ -77,7 +77,7 @@ describe('createStore', () => { }); await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(DOMException); - expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('IDB InvalidStateError'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('IDB transient error'), expect.anything()); }); it('should not retry on non-InvalidStateError DOMException', async () => { @@ -96,8 +96,11 @@ describe('createStore', () => { await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(DOMException); expect(callCount).toBe(1); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Not found'})); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith( + 'IDB error not recoverable at the connection layer, propagating', + expect.objectContaining({errorMessage: 'Not found', errorClass: 'unknown'}), + ); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should not retry on non-DOMException errors', async () => { @@ -116,8 +119,11 @@ describe('createStore', () => { await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(TypeError); expect(callCount).toBe(1); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Something went wrong'})); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith( + 'IDB error not recoverable at the connection layer, propagating', + expect.objectContaining({errorMessage: 'Something went wrong', errorClass: 'unknown'}), + ); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should preserve data integrity after a successful retry', async () => { @@ -176,7 +182,7 @@ describe('createStore', () => { return IDB.promisifyRequest(s.transaction); }); - expect(logInfoSpy).toHaveBeenCalledWith('IDB InvalidStateError — dropping cached connection and retrying', { + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', { dbName, storeName: STORE_NAME, txMode: 'readwrite', @@ -412,24 +418,29 @@ describe('createStore', () => { jest.restoreAllMocks(); logAlertSpy = jest.spyOn(Logger, 'logAlert'); + logInfoSpy = jest.spyOn(Logger, 'logInfo'); - // QuotaExceededError + // QuotaExceededError — a CAPACITY error the connection layer does not own; it propagates + // to the operation layer (OnyxUtils.retryOperation) which handles eviction. jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { throw new DOMException('Quota exceeded', 'QuotaExceededError'); }); await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow('Quota exceeded'); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Quota exceeded'})); + expect(logAlertSpy).not.toHaveBeenCalled(); + expect(logInfoSpy).toHaveBeenCalledWith( + 'IDB error not recoverable at the connection layer, propagating', + expect.objectContaining({errorMessage: 'Quota exceeded', errorClass: 'capacity'}), + ); }); }); - describe('connection lost healing', () => { + describe('connection lost recovery (transient, unbudgeted)', () => { function connectionLostError() { return new DOMException('Connection to Indexed Database server lost. Refresh the page to try again', 'UnknownError'); } - it('should heal by dropping cached connection and reopening', async () => { + it('should recover by dropping cached connection and retrying once', async () => { const store = createStore(uniqueDBName(), STORE_NAME); await store('readwrite', (s) => { @@ -451,19 +462,17 @@ describe('createStore', () => { expect(result).toBe('value'); expect(callCount).toBe(2); expect(logInfoSpy).toHaveBeenCalledWith( - expect.stringContaining('connection lost error detected — dropping cached connection and reopening'), + 'IDB transient error — dropping cached connection and retrying once', expect.objectContaining({dbName: expect.any(String)}), ); - expect(logInfoSpy).toHaveBeenCalledWith('IDB heal: successfully recovered after connection lost error', expect.objectContaining({dbName: expect.any(String)})); }); - it('should stop healing after budget exhausts', async () => { + it('should not be budgeted — reopens on every call without ever exhausting a budget', async () => { const store = createStore(uniqueDBName(), STORE_NAME); // All transaction calls fail permanently with connection lost. - // The heal path clears dbp and calls indexedDB.open() again — mock that to - // also fail so fake-indexeddb doesn't deadlock waiting for the old connection - // to close (same pattern as the backing store budget exhaustion test). + // The transient path clears dbp and calls indexedDB.open() again — mock that to + // also fail so fake-indexeddb doesn't deadlock waiting for the old connection to close. jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { throw connectionLostError(); }); @@ -476,19 +485,15 @@ describe('createStore', () => { return req; }); - // Each call drains 1 heal attempt (initial fails, heal retry also fails) - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - - // Budget exhausted — 4th call should NOT attempt healing, but should log budget exhausted - logAlertSpy.mockClear(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); + // Many calls — each attempts a single reopen and propagates. No budget, so it never logs + // "heal budget exhausted" no matter how many times it fails. + for (let i = 0; i < 5; i++) { + await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); + } + expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); }); - it('should also heal "connection is closing" variant', async () => { + it('should also recover the "connection is closing" variant', async () => { const store = createStore(uniqueDBName(), STORE_NAME); await store('readwrite', (s) => { @@ -511,46 +516,77 @@ describe('createStore', () => { expect(callCount).toBe(2); }); - it('should share heal budget with backing store errors', async () => { + it('should not consume the backing-store heal budget', async () => { const store = createStore(uniqueDBName(), STORE_NAME); - // All transaction calls fail permanently, alternating error types. - // The heal path clears dbp and calls indexedDB.open() again — mock that to - // also fail so fake-indexeddb doesn't deadlock waiting for the old connection - // to close. - const txErrors = [ - new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - connectionLostError(), - new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - ]; - let txErrorIndex = 0; - jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { - const err = txErrors[Math.min(txErrorIndex, txErrors.length - 1)]; - txErrorIndex++; - throw err; + await store('readwrite', (s) => { + s.put('value', 'key1'); + return IDB.promisifyRequest(s.transaction); }); - jest.spyOn(indexedDB, 'open').mockImplementation(() => { - const req = {} as IDBOpenDBRequest; - Promise.resolve().then(() => { - Object.defineProperty(req, 'error', { - value: new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - configurable: true, - }); - req.onerror?.(new Event('error') as Event & {target: IDBOpenDBRequest}); + + const original = IDBDatabase.prototype.transaction; + const backingStoreError = () => new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'); + + // Connection-lost errors are transient and must NOT decrement the backing-store budget. + // Fail with connection-lost on the first transaction of 4 separate operations (each recovers + // on its single reopen). If they wrongly shared the budget, the backing-store budget (3) + // would be drained; afterwards a backing-store error must still heal. + for (let i = 0; i < 4; i++) { + let callCount = 0; + const spy = jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + callCount++; + if (callCount === 1) { + throw connectionLostError(); + } + spy.mockRestore(); + return original.apply(this, args); }); - return req; + await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + } + + // A backing-store error still heals — proving the budget was untouched by the 4 transient failures. + let backingCallCount = 0; + const backingSpy = jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + backingCallCount++; + if (backingCallCount === 1) { + throw backingStoreError(); + } + backingSpy.mockRestore(); + return original.apply(this, args); }); + const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + expect(result).toBe('value'); + expect(logInfoSpy).toHaveBeenCalledWith( + expect.stringContaining('backing store error detected — dropping cached connection and reopening (2 attempts left)'), + expect.objectContaining({dbName: expect.any(String)}), + ); + }); + }); - // 3 calls drain the budget (each call: fail + heal retry fail = 1 budget) - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); + describe('AbortError recovery (transient)', () => { + it('should treat a normalized write-abort AbortError as transient and retry once', async () => { + const store = createStore(uniqueDBName(), STORE_NAME); - // Budget exhausted — no more healing for either error type - logAlertSpy.mockClear(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); + await store('readwrite', (s) => { + s.put('value', 'key1'); + return IDB.promisifyRequest(s.transaction); + }); + + const original = IDBDatabase.prototype.transaction; + let callCount = 0; + jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + callCount++; + if (callCount === 1) { + throw new DOMException('IDB write transaction aborted without an error', 'AbortError'); + } + return original.apply(this, args); + }); + + const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + expect(result).toBe('value'); + expect(callCount).toBe(2); + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', expect.objectContaining({dbName: expect.any(String)})); + expect(logAlertSpy).not.toHaveBeenCalled(); }); }); }); From 624cd666f9d920209baa81ea4aff2830dc38b615 Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 8 Jun 2026 15:26:33 +0200 Subject: [PATCH 2/8] introduce storage circuit breaker --- lib/OnyxUtils.ts | 26 ++++- lib/StorageCircuitBreaker.ts | 106 +++++++++++++++++ lib/storage/errors.ts | 1 + .../IDBKeyValProvider/createStore.ts | 7 +- tests/unit/StorageCircuitBreakerTest.ts | 108 ++++++++++++++++++ tests/unit/onyxUtilsTest.ts | 41 +++++++ .../unit/storage/providers/createStoreTest.ts | 8 +- 7 files changed, 288 insertions(+), 9 deletions(-) create mode 100644 lib/StorageCircuitBreaker.ts create mode 100644 tests/unit/StorageCircuitBreakerTest.ts diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index dbf166cb4..96065e415 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -6,6 +6,7 @@ import * as Logger from './Logger'; import type Onyx from './Onyx'; import cache, {TASK} from './OnyxCache'; import OnyxKeys from './OnyxKeys'; +import StorageCircuitBreaker from './StorageCircuitBreaker'; import Storage from './storage'; import {StorageErrorClass, classifyStorageError} from './storage/errors'; import type { @@ -792,7 +793,9 @@ function reportStorageQuota(error?: Error): Promise { * - INVALID_DATA: logs an alert and throws (the same data will always fail). * - TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget * and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly. - * - CAPACITY: evicts the least recently accessed evictable key and retries. + * - CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level + * circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making + * progress or failures storm — the per-operation budget alone cannot stop a session-wide storm. * - UNKNOWN: bounded retry. */ function retryOperation(error: Error, onyxMethod: TMethod, defaultParams: Parameters[0], retryAttempt: number | undefined): Promise { @@ -800,6 +803,13 @@ function retryOperation(error: Error, on const nextRetryAttempt = currentRetryAttempt + 1; const errorClass = classifyStorageError(error); + // Once the breaker is open, every capacity write is going to fail the same way. Drop it silently — + // the breaker already emitted its single alert, and logging per failed write is exactly the storm + // we are suppressing. (We return before the log line below on purpose.) + if (errorClass === StorageErrorClass.CAPACITY && StorageCircuitBreaker.isTripped()) { + return Promise.resolve(); + } + Logger.logInfo(`Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); if (errorClass === StorageErrorClass.INVALID_DATA) { @@ -823,6 +833,16 @@ function retryOperation(error: Error, on return onyxMethod(defaultParams, nextRetryAttempt); } + // CAPACITY: feed the session-level circuit breaker before evicting. The per-operation budget above + // cannot stop a session-wide storm — each evicted key triggers an OnyxDerived recompute that spawns + // a fresh write with its own budget — so the breaker is what actually halts the meltdown. (The + // already-open case returned silently at the top of this function.) + StorageCircuitBreaker.recordCapacityFailure(); + if (StorageCircuitBreaker.isTripped()) { + // This failure tripped the breaker; it already emitted its single alert. Stop here. + return Promise.resolve(); + } + // Find the least recently accessed evictable key that we can remove const keyForRemoval = cache.getKeyForEviction(); if (!keyForRemoval) { @@ -833,9 +853,11 @@ function retryOperation(error: Error, on return reportStorageQuota(error); } - // Remove the least recently accessed key and retry. + // Remove the least recently accessed key and retry. Tell the breaker we evicted so that, if the + // retry comes back as another capacity failure, it counts as a no-progress cycle. Logger.logInfo(`Out of storage. Evicting least recently accessed key (${keyForRemoval}) and retrying. Error: ${error}`); reportStorageQuota(error); + StorageCircuitBreaker.recordEviction(); // @ts-expect-error No overload matches this call. return remove(keyForRemoval).then(() => onyxMethod(defaultParams, nextRetryAttempt)); diff --git a/lib/StorageCircuitBreaker.ts b/lib/StorageCircuitBreaker.ts new file mode 100644 index 000000000..0e4ecfac0 --- /dev/null +++ b/lib/StorageCircuitBreaker.ts @@ -0,0 +1,106 @@ +import * as Logger from './Logger'; + +/** + * Process-scoped circuit breaker for storage CAPACITY failures. + * + * The per-operation retry budget in `OnyxUtils.retryOperation` cannot stop a session-level storm: + * each evict -> OnyxDerived recompute -> new write starts its own fresh budget, so a full disk or + * exhausted quota can drive tens of thousands of evict+retry cycles that never make progress and + * freeze the app. This breaker is the session-level brake — `retryOperation` consults it before + * every eviction. + * + * It trips when EITHER: + * - capacity failures within {@link ROLLING_WINDOW_MS} exceed {@link FAILURE_THRESHOLD}, or + * - {@link NO_PROGRESS_CAP} consecutive evictions are each immediately followed by another capacity + * failure (the eviction freed nothing the next write could use — a no-progress cycle). This is a + * cheap proxy for `getDatabaseSize()`, which is costly and only reports origin-wide usage. + * + * On trip it emits exactly ONE alert and self-resets once the rolling window clears, so a persistent + * condition produces at most one alert per window instead of one log line per failed write. + */ + +/** Rolling window over which capacity failures are counted, and how long a trip stays open. */ +const ROLLING_WINDOW_MS = 60 * 1000; + +/** Capacity failures within the window above which the breaker trips (storm backstop). */ +const FAILURE_THRESHOLD = 50; + +/** Consecutive no-progress evictions (evict -> still capacity failure) above which the breaker trips. */ +const NO_PROGRESS_CAP = 5; + +let failureTimestamps: number[] = []; +let consecutiveNoProgressEvictions = 0; +let evictionAwaitingResult = false; +let trippedUntil = 0; + +function reset(): void { + failureTimestamps = []; + consecutiveNoProgressEvictions = 0; + evictionAwaitingResult = false; + trippedUntil = 0; +} + +/** Whether the breaker is currently open. Self-resets once the window since the trip has cleared. */ +function isTripped(): boolean { + if (trippedUntil === 0) { + return false; + } + if (Date.now() >= trippedUntil) { + reset(); + return false; + } + return true; +} + +function trip(reason: string): void { + trippedUntil = Date.now() + ROLLING_WINDOW_MS; + Logger.logAlert(`Storage circuit breaker tripped: ${reason}. Halting eviction/retry for ${ROLLING_WINDOW_MS / 1000}s to stop a storage failure storm.`); +} + +/** + * Record a CAPACITY failure. Call once per capacity failure in `retryOperation`, BEFORE deciding + * whether to evict; then check {@link isTripped} to decide whether to proceed. + */ +function recordCapacityFailure(): void { + // While open, recording is a no-op: no extra timestamps, no second alert, and nothing to keep the + // window from clearing. `isTripped()` self-resets here once the window has elapsed. + if (isTripped()) { + return; + } + + const now = Date.now(); + failureTimestamps = failureTimestamps.filter((timestamp) => now - timestamp < ROLLING_WINDOW_MS); + + // A fresh storm (nothing left in the window) resets the no-progress tracking so a stale eviction + // from an earlier, unrelated incident can't be miscounted as no-progress for this one. + if (failureTimestamps.length === 0) { + consecutiveNoProgressEvictions = 0; + evictionAwaitingResult = false; + } + + // We evicted on the previous cycle and we're back here with another capacity failure, so that + // eviction freed no usable space. + if (evictionAwaitingResult) { + consecutiveNoProgressEvictions += 1; + evictionAwaitingResult = false; + } + + failureTimestamps.push(now); + + if (failureTimestamps.length > FAILURE_THRESHOLD) { + trip(`${failureTimestamps.length} capacity failures within ${ROLLING_WINDOW_MS / 1000}s`); + return; + } + if (consecutiveNoProgressEvictions >= NO_PROGRESS_CAP) { + trip(`${consecutiveNoProgressEvictions} consecutive evictions freed no usable space`); + } +} + +/** Record that `retryOperation` just evicted a key, so the next capacity failure counts as no-progress. */ +function recordEviction(): void { + evictionAwaitingResult = true; +} + +const StorageCircuitBreaker = {recordCapacityFailure, recordEviction, isTripped, reset, ROLLING_WINDOW_MS, FAILURE_THRESHOLD, NO_PROGRESS_CAP}; + +export default StorageCircuitBreaker; diff --git a/lib/storage/errors.ts b/lib/storage/errors.ts index 7318b5a3e..dffe9f55a 100644 --- a/lib/storage/errors.ts +++ b/lib/storage/errors.ts @@ -64,6 +64,7 @@ function classifyStorageError(error: unknown): StorageErrorClassValue { name.includes('aborterror') || message.includes('connection to indexed database server lost') || message.includes('connection is closing') || + // This is related to https://github.com/Expensify/react-native-onyx/pull/796 — remove this comment when #796 is merged. message.includes('idb write transaction aborted without an error') ) { return StorageErrorClass.TRANSIENT; diff --git a/lib/storage/providers/IDBKeyValProvider/createStore.ts b/lib/storage/providers/IDBKeyValProvider/createStore.ts index cb5b36267..e238d07c9 100644 --- a/lib/storage/providers/IDBKeyValProvider/createStore.ts +++ b/lib/storage/providers/IDBKeyValProvider/createStore.ts @@ -141,8 +141,11 @@ function createStore(dbName: string, storeName: string): UseStore { dbName, storeName, }); - } else { - // CAPACITY / UNKNOWN — let the operation layer decide (evict or bounded retry). + } else if (errorClass === StorageErrorClass.UNKNOWN) { + // UNKNOWN — unexpected at this layer; record it so it's visible. CAPACITY is the + // expected propagation path (the operation layer owns its logging, and suppresses it + // entirely once the circuit breaker is open), so we do NOT log it here — doing so was a + // per-failed-write line that dominated the storm. Logger.logInfo('IDB error not recoverable at the connection layer, propagating', { dbName, storeName, diff --git a/tests/unit/StorageCircuitBreakerTest.ts b/tests/unit/StorageCircuitBreakerTest.ts new file mode 100644 index 000000000..8322c0337 --- /dev/null +++ b/tests/unit/StorageCircuitBreakerTest.ts @@ -0,0 +1,108 @@ +import * as Logger from '../../lib/Logger'; +import StorageCircuitBreaker from '../../lib/StorageCircuitBreaker'; + +describe('StorageCircuitBreaker', () => { + let currentTime = 1_000_000; + let nowSpy: jest.SpyInstance; + + const advance = (ms: number) => { + currentTime += ms; + }; + + beforeEach(() => { + currentTime = 1_000_000; + nowSpy = jest.spyOn(Date, 'now').mockImplementation(() => currentTime); + StorageCircuitBreaker.reset(); + }); + + afterEach(() => { + nowSpy.mockRestore(); + jest.restoreAllMocks(); + }); + + it('should not trip below the failure threshold', () => { + for (let i = 0; i < StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should trip once capacity failures exceed the threshold within the window', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + }); + + it('should not trip when failures are spread across multiple windows', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + // Space each failure out so older ones fall out of the rolling window before the count builds up. + advance(2_000); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should trip after consecutive no-progress evictions', () => { + // Each cycle is a capacity failure followed by an eviction that frees no usable space. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + } + // The next capacity failure observes that the last eviction made no progress, tipping it over. + StorageCircuitBreaker.recordCapacityFailure(); + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + }); + + it('should not count a failure as no-progress when no eviction preceded it', () => { + // Capacity failures with no interleaved evictions must not accumulate no-progress cycles. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP + 2; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should emit exactly one alert when it trips, even as failures continue', () => { + const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + // Further failures while open must not produce more alerts. + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordCapacityFailure(); + + expect(logAlertSpy).toHaveBeenCalledTimes(1); + expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('Storage circuit breaker tripped')); + }); + + it('should self-reset once the rolling window clears', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + expect(StorageCircuitBreaker.isTripped()).toBe(true); + + advance(StorageCircuitBreaker.ROLLING_WINDOW_MS); + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should reset no-progress tracking after the window clears between storms', () => { + // First storm: some no-progress evictions, but not enough to trip. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP - 1; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + } + + // Let the window fully clear so the next failure starts a fresh storm. + advance(StorageCircuitBreaker.ROLLING_WINDOW_MS + 1); + StorageCircuitBreaker.recordCapacityFailure(); + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); +}); diff --git a/tests/unit/onyxUtilsTest.ts b/tests/unit/onyxUtilsTest.ts index d3a9c7baf..240c2dc12 100644 --- a/tests/unit/onyxUtilsTest.ts +++ b/tests/unit/onyxUtilsTest.ts @@ -8,6 +8,7 @@ import type GenericCollection from '../utils/GenericCollection'; import OnyxCache from '../../lib/OnyxCache'; import * as Logger from '../../lib/Logger'; import StorageMock from '../../lib/storage'; +import StorageCircuitBreaker from '../../lib/StorageCircuitBreaker'; import createDeferredTask from '../../lib/createDeferredTask'; import waitForPromisesToResolve from '../utils/waitForPromisesToResolve'; @@ -722,6 +723,9 @@ describe('OnyxUtils', () => { const diskFullError = new Error('database or disk is full'); const nonRetriableIdbError = Object.assign(new Error('Internal error opening backing store for indexedDB.open.'), {name: 'UnknownError'}); + // The circuit breaker is process-scoped, so reset it between tests to avoid state leaking. + beforeEach(() => StorageCircuitBreaker.reset()); + it('should retry only one time if the operation is firstly failed and then passed', async () => { StorageMock.setItem = jest.fn(StorageMock.setItem).mockRejectedValueOnce(genericError).mockImplementation(StorageMock.setItem); @@ -821,6 +825,43 @@ describe('OnyxUtils', () => { expect(logAlertSpy).toHaveBeenCalledWith(`Unable to get database size. getDatabaseSize error: ${dbSizeError}. Original error: ${diskFullError}`); }); + it('should trip the circuit breaker and alert once after sustained capacity failures', async () => { + const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + StorageMock.setItem = jest.fn().mockRejectedValue(diskFullError); + + // No evictable keys are configured, so each failing write records exactly one capacity + // failure with the breaker (it cannot evict). Enough of them within one window trips it. + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + await Onyx.set(ONYXKEYS.TEST_KEY, {test: i}); + } + await waitForPromisesToResolve(); + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('Storage circuit breaker tripped')); + }); + + it('should drop capacity writes silently while the circuit breaker is open', async () => { + // Trip the breaker deterministically so every capacity failure below is observed while open. + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + expect(StorageCircuitBreaker.isTripped()).toBe(true); + + // Clear so we only observe logging caused by the write below, not the trip alert above. + const logInfoSpy = jest.spyOn(Logger, 'logInfo').mockClear(); + const logAlertSpy = jest.spyOn(Logger, 'logAlert').mockClear(); + StorageMock.setItem = jest.fn().mockRejectedValue(diskFullError); + + await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); + await waitForPromisesToResolve(); + + // The write and any cascading derived writes are dropped without per-write log spam, and + // without re-alerting — the single trip alert is the only signal while open. + expect(StorageCircuitBreaker.isTripped()).toBe(true); + expect(logInfoSpy).not.toHaveBeenCalledWith(expect.stringContaining('Failed to save to storage')); + expect(logAlertSpy).not.toHaveBeenCalled(); + }); + it('should not re-add an evicted key to recentlyAccessedKeys after removal', async () => { // Re-init with evictable keys so getKeyForEviction() has something to return Object.assign(OnyxUtils.getDeferredInitTask(), createDeferredTask()); diff --git a/tests/unit/storage/providers/createStoreTest.ts b/tests/unit/storage/providers/createStoreTest.ts index 3ca68f587..acb6787c1 100644 --- a/tests/unit/storage/providers/createStoreTest.ts +++ b/tests/unit/storage/providers/createStoreTest.ts @@ -421,17 +421,15 @@ describe('createStore', () => { logInfoSpy = jest.spyOn(Logger, 'logInfo'); // QuotaExceededError — a CAPACITY error the connection layer does not own; it propagates - // to the operation layer (OnyxUtils.retryOperation) which handles eviction. + // to the operation layer (OnyxUtils.retryOperation) which handles eviction. The connection + // layer stays quiet for capacity (it's the expected path) so it can't spam the storm log. jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { throw new DOMException('Quota exceeded', 'QuotaExceededError'); }); await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow('Quota exceeded'); expect(logAlertSpy).not.toHaveBeenCalled(); - expect(logInfoSpy).toHaveBeenCalledWith( - 'IDB error not recoverable at the connection layer, propagating', - expect.objectContaining({errorMessage: 'Quota exceeded', errorClass: 'capacity'}), - ); + expect(logInfoSpy).not.toHaveBeenCalledWith('IDB error not recoverable at the connection layer, propagating', expect.objectContaining({errorClass: 'capacity'})); }); }); From 1ffe796da6393fb674075810c51f85090ddb4f12 Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 8 Jun 2026 16:04:22 +0200 Subject: [PATCH 3/8] updated api doc --- API-INTERNAL.md | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/API-INTERNAL.md b/API-INTERNAL.md index b3cfeb90a..3a5cc6460 100644 --- a/API-INTERNAL.md +++ b/API-INTERNAL.md @@ -79,12 +79,17 @@ If the requested key is a collection, it will return an object with all the coll

Remove a key from Onyx and update the subscribers

retryOperation()
-

Handles storage operation failures based on the error type:

+

Handles storage operation failures based on the error class (see lib/storage/errors.ts). +The connection layer (createStore) owns connection/transport recovery; this operation layer owns +capacity recovery (eviction) so that a given failure is retried by exactly one layer:

    -
  • Storage capacity errors: evicts data and retries the operation
  • -
  • Invalid data errors: logs an alert and throws an error
  • -
  • Non-retriable errors: logs an alert and resolves without retrying
  • -
  • Other errors: retries the operation
  • +
  • INVALID_DATA: logs an alert and throws (the same data will always fail).
  • +
  • TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget +and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly.
  • +
  • CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level +circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making +progress or failures storm — the per-operation budget alone cannot stop a session-wide storm.
  • +
  • UNKNOWN: bounded retry.
broadcastUpdate()
@@ -318,11 +323,16 @@ Remove a key from Onyx and update the subscribers ## retryOperation() -Handles storage operation failures based on the error type: -- Storage capacity errors: evicts data and retries the operation -- Invalid data errors: logs an alert and throws an error -- Non-retriable errors: logs an alert and resolves without retrying -- Other errors: retries the operation +Handles storage operation failures based on the error class (see lib/storage/errors.ts). +The connection layer (createStore) owns connection/transport recovery; this operation layer owns +capacity recovery (eviction) so that a given failure is retried by exactly one layer: +- INVALID_DATA: logs an alert and throws (the same data will always fail). +- TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget + and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly. +- CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level + circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making + progress or failures storm — the per-operation budget alone cannot stop a session-wide storm. +- UNKNOWN: bounded retry. **Kind**: global function From 8bc993cafd23c8980d9274333a38bb335f28c244 Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 8 Jun 2026 16:06:12 +0200 Subject: [PATCH 4/8] prettier fix --- lib/OnyxUtils.ts | 4 +++- tests/unit/onyxUtilsTest.ts | 16 ++++++++++++---- tests/unit/storage/providers/createStoreTest.ts | 5 +---- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index 96065e415..d80b300b4 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -810,7 +810,9 @@ function retryOperation(error: Error, on return Promise.resolve(); } - Logger.logInfo(`Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); + Logger.logInfo( + `Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`, + ); if (errorClass === StorageErrorClass.INVALID_DATA) { Logger.logAlert(`Attempted to set invalid data set in Onyx. Please ensure all data is serializable. Error: ${error}`); diff --git a/tests/unit/onyxUtilsTest.ts b/tests/unit/onyxUtilsTest.ts index 240c2dc12..7321e715e 100644 --- a/tests/unit/onyxUtilsTest.ts +++ b/tests/unit/onyxUtilsTest.ts @@ -777,7 +777,9 @@ describe('OnyxUtils', () => { // The connection layer (createStore) owns and alerts on fatal errors; the operation layer // just skips the retry at info level. No alert here, and no "5 retries exhausted" alert. - expect(logInfoSpy).toHaveBeenCalledWith(`Storage operation skipped retry; fatal errors are handled by the connection layer. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`); + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage operation skipped retry; fatal errors are handled by the connection layer. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`, + ); expect(logAlertSpy).not.toHaveBeenCalled(); }); @@ -798,7 +800,9 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logAlertSpy).toHaveBeenCalledWith(`Out of storage. But found no acceptable keys to remove. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`, + ); }); it('should include usageDetails in the storage quota log when available', async () => { @@ -810,7 +814,9 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith( - `Storage Quota Check -- bytesUsed: 13289269 originWideBytesRemaining (estimate, not per-DB headroom): 5000000 usageDetails: ${JSON.stringify(usageDetails)}. Original error: ${diskFullError}`, + `Storage Quota Check -- bytesUsed: 13289269 originWideBytesRemaining (estimate, not per-DB headroom): 5000000 usageDetails: ${JSON.stringify( + usageDetails, + )}. Original error: ${diskFullError}`, ); }); @@ -1390,7 +1396,9 @@ describe('OnyxUtils', () => { await LocalOnyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith(`Out of storage. Evicting least recently accessed key (${key1}) and retrying. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`, + ); }); }); diff --git a/tests/unit/storage/providers/createStoreTest.ts b/tests/unit/storage/providers/createStoreTest.ts index acb6787c1..ec58361fa 100644 --- a/tests/unit/storage/providers/createStoreTest.ts +++ b/tests/unit/storage/providers/createStoreTest.ts @@ -459,10 +459,7 @@ describe('createStore', () => { const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); expect(result).toBe('value'); expect(callCount).toBe(2); - expect(logInfoSpy).toHaveBeenCalledWith( - 'IDB transient error — dropping cached connection and retrying once', - expect.objectContaining({dbName: expect.any(String)}), - ); + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', expect.objectContaining({dbName: expect.any(String)})); }); it('should not be budgeted — reopens on every call without ever exhausting a budget', async () => { From 23bc519d72335bcd25c6c504eb9e8049e472ac88 Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 22 Jun 2026 11:51:47 +0200 Subject: [PATCH 5/8] move error classification to providers --- lib/OnyxUtils.ts | 20 +++++-- lib/storage/__mocks__/index.ts | 13 ++++ lib/storage/errors.ts | 60 ++++--------------- lib/storage/index.ts | 6 ++ .../IDBKeyValProvider/classifyError.ts | 45 ++++++++++++++ .../IDBKeyValProvider/createStore.ts | 7 ++- .../providers/IDBKeyValProvider/index.ts | 5 ++ lib/storage/providers/MemoryOnlyProvider.ts | 6 ++ lib/storage/providers/NoopProvider.ts | 6 ++ lib/storage/providers/SQLiteProvider.ts | 5 ++ lib/storage/providers/classifySQLiteError.ts | 24 ++++++++ lib/storage/providers/types.ts | 8 +++ tests/unit/onyxUtilsTest.ts | 15 +++++ 13 files changed, 165 insertions(+), 55 deletions(-) create mode 100644 lib/storage/providers/IDBKeyValProvider/classifyError.ts create mode 100644 lib/storage/providers/classifySQLiteError.ts diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index ce552d653..f10286f87 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -8,7 +8,7 @@ import cache, {TASK} from './OnyxCache'; import OnyxKeys from './OnyxKeys'; import StorageCircuitBreaker from './StorageCircuitBreaker'; import Storage from './storage'; -import {StorageErrorClass, classifyStorageError} from './storage/errors'; +import {StorageErrorClass} from './storage/errors'; import type { CollectionKeyBase, ConnectOptions, @@ -812,7 +812,8 @@ function reportStorageQuota(error?: Error): Promise { * - CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level * circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making * progress or failures storm — the per-operation budget alone cannot stop a session-wide storm. - * - UNKNOWN: bounded retry. + * - UNKNOWN: the provider couldn't classify it — log the full error shape (name + message + + * provider) once so it's visible, then bounded retry without eviction. */ function retryOperation( error: Error, @@ -823,7 +824,7 @@ function retryOperation( ): Promise { const currentRetryAttempt = retryAttempt ?? 0; const nextRetryAttempt = currentRetryAttempt + 1; - const errorClass = classifyStorageError(error); + const errorClass = Storage.classifyError(error); // Once the breaker is open, every capacity write is going to fail the same way. Drop it silently — // the breaker already emitted its single alert, and logging per failed write is exactly the storm @@ -851,8 +852,17 @@ function retryOperation( return Promise.resolve(); } - if (errorClass !== StorageErrorClass.CAPACITY) { - // UNKNOWN error — bounded retry without eviction. + if (errorClass === StorageErrorClass.UNKNOWN) { + // UNKNOWN is the blind spot: the active provider's classifier did not recognize this error, so + // we cannot route it to a real recovery strategy. Log the full error shape (name + message + + // provider) once per operation so telemetry can reveal what lives in UNKNOWN, letting us promote + // recurring cases into TRANSIENT/CAPACITY/FATAL. Logged on the first attempt only to avoid the + // per-retry amplification this mechanism is trying to kill. Then bounded retry without eviction. + if (currentRetryAttempt === 0) { + Logger.logAlert( + `Unclassified storage error. provider: ${Storage.getStorageProvider().name}. name: ${error?.name}. message: ${error?.message}. onyxMethod: ${onyxMethod.name}.`, + ); + } // @ts-expect-error No overload matches this call. return onyxMethod(defaultParams, nextRetryAttempt); } diff --git a/lib/storage/__mocks__/index.ts b/lib/storage/__mocks__/index.ts index a4456a7e7..90c75fd62 100644 --- a/lib/storage/__mocks__/index.ts +++ b/lib/storage/__mocks__/index.ts @@ -1,11 +1,24 @@ import MemoryOnlyProvider, {mockStore, setMockStore} from '../providers/MemoryOnlyProvider'; +import classifyIDBError from '../providers/IDBKeyValProvider/classifyError'; +import classifySQLiteError from '../providers/classifySQLiteError'; +import {StorageErrorClass} from '../errors'; const init = jest.fn(MemoryOnlyProvider.init); init(); +// Tests exercise retryOperation against both IndexedDB- and SQLite-shaped errors, so the mock facade +// classifies with each engine's real (native-dep-free) classifier in turn. Mirrors how the real facade +// delegates to the active provider; here we cover both engines since one mock stands in for all. +const classifyError = (error: unknown) => { + const idbClass = classifyIDBError(error); + return idbClass === StorageErrorClass.UNKNOWN ? classifySQLiteError(error) : idbClass; +}; + const StorageMock = { init, + classifyError: jest.fn(classifyError), + getStorageProvider: jest.fn(() => MemoryOnlyProvider), getItem: jest.fn(MemoryOnlyProvider.getItem), multiGet: jest.fn(MemoryOnlyProvider.multiGet), setItem: jest.fn(MemoryOnlyProvider.setItem), diff --git a/lib/storage/errors.ts b/lib/storage/errors.ts index dffe9f55a..607e19756 100644 --- a/lib/storage/errors.ts +++ b/lib/storage/errors.ts @@ -1,15 +1,16 @@ import type {ValueOf} from 'type-fest'; /** - * Single source of truth for classifying storage (IndexedDB / SQLite) write failures. + * Shared vocabulary for storage write failures. The *classes* are engine-agnostic; the *matching* + * is not — each storage provider knows its own error dialect and owns its classifier (see each + * provider's `classifyError`). This module deliberately holds NO string matchers: it is the common + * taxonomy the two reacting layers agree on, while the per-engine knowledge lives with the engine. * - * Both layers that react to storage errors consult this: * - the connection layer (`createStore`) recovers TRANSIENT and FATAL errors by reopening the DB, and * - the operation layer (`OnyxUtils.retryOperation`) recovers CAPACITY by eviction and retries UNKNOWN. * - * Keeping the matchers here (instead of duplicated string lists in each layer) guarantees the two - * layers agree on what an error *is*, even though they react to it differently. This module has no - * Onyx dependencies so it can live in the storage layer without creating an import cycle. + * This module has no Onyx dependencies (and no engine dependencies) so it can live in the storage + * layer, and be imported by every provider, without creating an import cycle. */ const StorageErrorClass = { /** Connection/transport failure (stale connection). Owner: connection layer — reopen + retry once. */ @@ -20,12 +21,17 @@ const StorageErrorClass = { INVALID_DATA: 'invalidData', /** Backing-store corruption. Owner: connection layer — budgeted heal, then give up. */ FATAL: 'fatal', - /** Unmatched. Owner: operation layer — bounded retry. */ + /** Unmatched by the active provider. Owner: operation layer — bounded retry, and log the shape so + * recurring cases can be promoted into one of the classes above. */ UNKNOWN: 'unknown', } as const; type StorageErrorClassValue = ValueOf; +/** + * Normalizes any thrown value into a lowercased `{name, message}` pair for matching. Shared by every + * provider's classifier so they all extract the error the same way. + */ function getErrorParts(error: unknown): {name: string; message: string} { if (error instanceof Error || error instanceof DOMException) { return {name: (error.name ?? '').toLowerCase(), message: (error.message ?? '').toLowerCase()}; @@ -33,45 +39,5 @@ function getErrorParts(error: unknown): {name: string; message: string} { return {name: '', message: String(error ?? '').toLowerCase()}; } -/** - * Classifies a storage write error into one of the {@link StorageErrorClass} buckets. - * Matching is done on the lowercased error name and message. - */ -function classifyStorageError(error: unknown): StorageErrorClassValue { - const {name, message} = getErrorParts(error); - - // Non-serializable data passed to IDBObjectStore.put — retrying is futile. - if (message.includes("failed to execute 'put' on 'idbobjectstore'")) { - return StorageErrorClass.INVALID_DATA; - } - - // Storage capacity: browser quota exceeded (IDB) or device disk full (SQLite). - if (name.includes('quotaexceedederror') || message.includes('quotaexceedederror') || message.includes('database or disk is full')) { - return StorageErrorClass.CAPACITY; - } - - // Backing-store corruption (Chromium LevelDB). Recoverable only via a budgeted reopen. - if (message.includes('internal error opening backing store')) { - return StorageErrorClass.FATAL; - } - - // Transient connection/transport failures — the cached connection is stale and a reopen fixes it: - // - InvalidStateError: connection closed between getDB() resolving and db.transaction(). - // - AbortError: write transaction aborted (connection close / versionchange / sibling abort). - // - Safari/WebKit IDB server termination for backgrounded tabs. - if ( - name.includes('invalidstateerror') || - name.includes('aborterror') || - message.includes('connection to indexed database server lost') || - message.includes('connection is closing') || - // This is related to https://github.com/Expensify/react-native-onyx/pull/796 — remove this comment when #796 is merged. - message.includes('idb write transaction aborted without an error') - ) { - return StorageErrorClass.TRANSIENT; - } - - return StorageErrorClass.UNKNOWN; -} - -export {StorageErrorClass, classifyStorageError}; +export {StorageErrorClass, getErrorParts}; export type {StorageErrorClassValue}; diff --git a/lib/storage/index.ts b/lib/storage/index.ts index 6dcd8e0bd..414a0a0ca 100644 --- a/lib/storage/index.ts +++ b/lib/storage/index.ts @@ -49,6 +49,12 @@ const storage: Storage = { return provider; }, + /** + * Classifies a write error using the active provider's own classifier. Synchronous and pure — + * never wrapped in tryOrDegradePerformance. + */ + classifyError: (error) => provider.classifyError(error), + /** * Initializes all providers in the list of storage providers * and enables fallback providers if necessary diff --git a/lib/storage/providers/IDBKeyValProvider/classifyError.ts b/lib/storage/providers/IDBKeyValProvider/classifyError.ts new file mode 100644 index 000000000..9e4834a00 --- /dev/null +++ b/lib/storage/providers/IDBKeyValProvider/classifyError.ts @@ -0,0 +1,45 @@ +import {StorageErrorClass, getErrorParts} from '../../errors'; +import type {StorageErrorClassValue} from '../../errors'; + +/** + * Classifies an IndexedDB write failure into the shared storage taxonomy (lib/storage/errors.ts). + * Matching is done on the lowercased error name and message. This is the IndexedDB engine's own + * dialect — it is NOT shared with other engines. + */ +function classifyIDBError(error: unknown): StorageErrorClassValue { + const {name, message} = getErrorParts(error); + + // Non-serializable data passed to IDBObjectStore.put — retrying is futile. + if (message.includes("failed to execute 'put' on 'idbobjectstore'")) { + return StorageErrorClass.INVALID_DATA; + } + + // Browser quota exceeded. + if (name.includes('quotaexceedederror') || message.includes('quotaexceedederror')) { + return StorageErrorClass.CAPACITY; + } + + // Backing-store corruption (Chromium LevelDB). Recoverable only via a budgeted reopen. + if (message.includes('internal error opening backing store')) { + return StorageErrorClass.FATAL; + } + + // Transient connection/transport failures — the cached connection is stale and a reopen fixes it: + // - InvalidStateError: connection closed between getDB() resolving and db.transaction(). + // - AbortError: write transaction aborted (connection close / versionchange / sibling abort). + // - Safari/WebKit IDB server termination for backgrounded tabs. + if ( + name.includes('invalidstateerror') || + name.includes('aborterror') || + message.includes('connection to indexed database server lost') || + message.includes('connection is closing') || + // This is related to https://github.com/Expensify/react-native-onyx/pull/796 — remove this comment when #796 is merged. + message.includes('idb write transaction aborted without an error') + ) { + return StorageErrorClass.TRANSIENT; + } + + return StorageErrorClass.UNKNOWN; +} + +export default classifyIDBError; diff --git a/lib/storage/providers/IDBKeyValProvider/createStore.ts b/lib/storage/providers/IDBKeyValProvider/createStore.ts index 995f6df8d..dc7d0b270 100644 --- a/lib/storage/providers/IDBKeyValProvider/createStore.ts +++ b/lib/storage/providers/IDBKeyValProvider/createStore.ts @@ -1,7 +1,8 @@ import * as IDB from 'idb-keyval'; import type {UseStore} from 'idb-keyval'; import * as Logger from '../../../Logger'; -import {StorageErrorClass, classifyStorageError} from '../../errors'; +import {StorageErrorClass} from '../../errors'; +import classifyIDBError from './classifyError'; const HEAL_ATTEMPTS_MAX = 3; @@ -112,7 +113,7 @@ function createStore(dbName: string, storeName: string): UseStore { const dropCacheIfStale = (error: unknown) => { // A stale/dead connection surfaces as TRANSIENT (InvalidStateError, Safari connection lost) // or FATAL (Chromium backing-store corruption) per the shared taxonomy (lib/storage/errors.ts). - const errorClass = classifyStorageError(error); + const errorClass = classifyIDBError(error); const isStaleConnection = errorClass === StorageErrorClass.TRANSIENT || errorClass === StorageErrorClass.FATAL; if (dbp !== probePromise || !isStaleConnection) { return; @@ -167,7 +168,7 @@ function createStore(dbName: string, storeName: string): UseStore { executeTransaction(txMode, callback) .then(resetHealBudget) .catch((error) => { - const errorClass = classifyStorageError(error); + const errorClass = classifyIDBError(error); if (errorClass === StorageErrorClass.TRANSIENT) { Logger.logInfo('IDB transient error — dropping cached connection and retrying once', { diff --git a/lib/storage/providers/IDBKeyValProvider/index.ts b/lib/storage/providers/IDBKeyValProvider/index.ts index 68768d1b3..09fcbb1f2 100644 --- a/lib/storage/providers/IDBKeyValProvider/index.ts +++ b/lib/storage/providers/IDBKeyValProvider/index.ts @@ -4,6 +4,7 @@ import utils from '../../../utils'; import type StorageProvider from '../types'; import type {OnyxKey, OnyxValue} from '../../../types'; import createStore from './createStore'; +import classifyIDBError from './classifyError'; import type {StorageKeyValuePair} from '../types'; const DB_NAME = 'OnyxDB'; @@ -29,6 +30,10 @@ const provider: StorageProvider = { * The name of the provider that can be printed to the logs */ name: 'IDBKeyValProvider', + /** + * Classifies an IndexedDB write failure into the shared storage taxonomy. + */ + classifyError: classifyIDBError, /** * Initializes the storage provider */ diff --git a/lib/storage/providers/MemoryOnlyProvider.ts b/lib/storage/providers/MemoryOnlyProvider.ts index 1f92425b6..b5f859c8f 100644 --- a/lib/storage/providers/MemoryOnlyProvider.ts +++ b/lib/storage/providers/MemoryOnlyProvider.ts @@ -1,6 +1,7 @@ import _ from 'underscore'; import type {OnyxKey, OnyxValue} from '../../types'; import utils from '../../utils'; +import {StorageErrorClass} from '../errors'; import type StorageProvider from './types'; import type {StorageKeyValuePair} from './types'; @@ -24,6 +25,11 @@ const provider: StorageProvider = { */ name: 'MemoryOnlyProvider', + /** + * In-memory storage has no quota/connection failure modes, so nothing is classifiable. + */ + classifyError: () => StorageErrorClass.UNKNOWN, + /** * Initializes the storage provider */ diff --git a/lib/storage/providers/NoopProvider.ts b/lib/storage/providers/NoopProvider.ts index 677826cfb..4f3aaea11 100644 --- a/lib/storage/providers/NoopProvider.ts +++ b/lib/storage/providers/NoopProvider.ts @@ -1,4 +1,5 @@ import type {OnyxValue} from '../../types'; +import {StorageErrorClass} from '../errors'; import type StorageProvider from './types'; const provider: StorageProvider = { @@ -9,6 +10,11 @@ const provider: StorageProvider = { */ name: 'NoopProvider', + /** + * The noop provider never throws, so nothing is classifiable. + */ + classifyError: () => StorageErrorClass.UNKNOWN, + /** * Initializes the storage provider */ diff --git a/lib/storage/providers/SQLiteProvider.ts b/lib/storage/providers/SQLiteProvider.ts index d3b6fee67..56fadc588 100644 --- a/lib/storage/providers/SQLiteProvider.ts +++ b/lib/storage/providers/SQLiteProvider.ts @@ -9,6 +9,7 @@ import type {FastMergeReplaceNullPatch} from '../../utils'; import utils from '../../utils'; import type StorageProvider from './types'; import type {StorageKeyList, StorageKeyValuePair} from './types'; +import classifySQLiteError from './classifySQLiteError'; /** * The type of the key-value pair stored in the SQLite database @@ -63,6 +64,10 @@ const provider: StorageProvider = { * The name of the provider that can be printed to the logs */ name: 'SQLiteProvider', + /** + * Classifies a SQLite write failure into the shared storage taxonomy. + */ + classifyError: classifySQLiteError, /** * Initializes the storage provider */ diff --git a/lib/storage/providers/classifySQLiteError.ts b/lib/storage/providers/classifySQLiteError.ts new file mode 100644 index 000000000..3bf2e0250 --- /dev/null +++ b/lib/storage/providers/classifySQLiteError.ts @@ -0,0 +1,24 @@ +import {StorageErrorClass, getErrorParts} from '../errors'; +import type {StorageErrorClassValue} from '../errors'; + +/** + * Classifies a SQLite write failure into the shared storage taxonomy (lib/storage/errors.ts). + * This is the SQLite engine's own dialect — it is NOT shared with other engines. It lives in a + * standalone module (no `react-native-nitro-sqlite` import) so it can be reused without pulling in + * native dependencies. + * + * SQLite surfaces fewer distinct write-failure shapes than IndexedDB. As telemetry from the UNKNOWN + * bucket (see OnyxUtils.retryOperation) reveals recurring native errors, add matchers here. + */ +function classifySQLiteError(error: unknown): StorageErrorClassValue { + const {message} = getErrorParts(error); + + // Device disk full. + if (message.includes('database or disk is full')) { + return StorageErrorClass.CAPACITY; + } + + return StorageErrorClass.UNKNOWN; +} + +export default classifySQLiteError; diff --git a/lib/storage/providers/types.ts b/lib/storage/providers/types.ts index 046b531b0..b8e52b440 100644 --- a/lib/storage/providers/types.ts +++ b/lib/storage/providers/types.ts @@ -1,5 +1,6 @@ import type {OnyxKey, OnyxValue} from '../../types'; import type {FastMergeReplaceNullPatch} from '../../utils'; +import type {StorageErrorClassValue} from '../errors'; type StorageKeyValuePair = [key: OnyxKey, value: OnyxValue, replaceNullPatches?: FastMergeReplaceNullPatch[]]; type StorageKeyList = OnyxKey[]; @@ -87,6 +88,13 @@ type StorageProvider = { */ getDatabaseSize: () => Promise; + /** + * Classifies a write error from THIS engine into the shared {@link StorageErrorClassValue} taxonomy. + * Each provider owns its own matchers (IndexedDB DOMExceptions, SQLite messages, …) so the central + * taxonomy stays engine-agnostic. Anything the provider doesn't recognize must return UNKNOWN. + */ + classifyError: (error: unknown) => StorageErrorClassValue; + /** * @param onStorageKeyChanged Storage synchronization mechanism keeping all opened tabs in sync */ diff --git a/tests/unit/onyxUtilsTest.ts b/tests/unit/onyxUtilsTest.ts index 32c83eae9..99ae441d8 100644 --- a/tests/unit/onyxUtilsTest.ts +++ b/tests/unit/onyxUtilsTest.ts @@ -773,6 +773,21 @@ describe('OnyxUtils', () => { expect(retryOperationSpy).toHaveBeenCalledTimes(6); }); + it('should log the full shape of an unclassified (UNKNOWN) error once per operation', async () => { + const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + StorageMock.setItem = jest.fn().mockRejectedValue(genericError); + + await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); + + // UNKNOWN is instrumented so we can see what to promote into a real class. The shape (provider + // + name + message) is logged exactly once even though the operation retries 6 times. + const unclassifiedCalls = logAlertSpy.mock.calls.filter((call) => typeof call[0] === 'string' && call[0].startsWith('Unclassified storage error.')); + expect(unclassifiedCalls).toHaveLength(1); + expect(unclassifiedCalls[0][0]).toBe( + `Unclassified storage error. provider: MemoryOnlyProvider. name: ${genericError.name}. message: ${genericError.message}. onyxMethod: setWithRetry.`, + ); + }); + it("should throw error for if operation failed with \"Failed to execute 'put' on 'IDBObjectStore': invalid data\" error", async () => { StorageMock.setItem = jest.fn().mockRejectedValueOnce(invalidDataError); From cd11e1cfd46efdb181d809311b470a9825bd9f30 Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 22 Jun 2026 12:36:29 +0200 Subject: [PATCH 6/8] record write success --- lib/OnyxUtils.ts | 5 ++++ lib/StorageCircuitBreaker.ts | 16 +++++++++++- tests/unit/StorageCircuitBreakerTest.ts | 34 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index f10286f87..b78615a73 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -1383,6 +1383,7 @@ function setWithRetry({key, value, options}: SetParams StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => OnyxUtils.retryOperation(error, setWithRetry, {key, value: valueWithoutNestedNullValues, options}, retryAttempt)) .then(() => { OnyxUtils.sendActionToDevTools(OnyxUtils.METHOD.SET, key, valueWithoutNestedNullValues); @@ -1483,6 +1484,7 @@ function multiSetWithRetry(data: OnyxMultiSetInput, retryAttempt?: number): Prom const inFlightKeys = new Set(keyValuePairsToSet.map(([key]) => key)); return Storage.multiSet(keyValuePairsToStore) + .then(() => StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => OnyxUtils.retryOperation(error, multiSetWithRetry, newData, retryAttempt, inFlightKeys)) .then(() => { OnyxUtils.sendActionToDevTools(OnyxUtils.METHOD.MULTI_SET, undefined, newData); @@ -1562,6 +1564,7 @@ function setCollectionWithRetry({collectionKey, const inFlightKeys = new Set(keyValuePairs.map(([key]) => key)); return Storage.multiSet(keyValuePairs) + .then(() => StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => OnyxUtils.retryOperation(error, setCollectionWithRetry, {collectionKey, collection}, retryAttempt, inFlightKeys)) .then(() => { OnyxUtils.sendActionToDevTools(OnyxUtils.METHOD.SET_COLLECTION, undefined, mutableCollection); @@ -1720,6 +1723,7 @@ function mergeCollectionWithPatches( const inFlightKeys = new Set(Object.keys(finalMergedCollection)); return Promise.all(promises) + .then(() => StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => retryOperation( error, @@ -1802,6 +1806,7 @@ function partialSetCollection({collectionKey, co const inFlightKeys = new Set(keyValuePairs.map(([key]) => key)); return Storage.multiSet(keyValuePairs) + .then(() => StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => retryOperation(error, partialSetCollection, {collectionKey, collection}, retryAttempt, inFlightKeys)) .then(() => { sendActionToDevTools(METHOD.SET_COLLECTION, undefined, mutableCollection); diff --git a/lib/StorageCircuitBreaker.ts b/lib/StorageCircuitBreaker.ts index 0e4ecfac0..d7c55677f 100644 --- a/lib/StorageCircuitBreaker.ts +++ b/lib/StorageCircuitBreaker.ts @@ -101,6 +101,20 @@ function recordEviction(): void { evictionAwaitingResult = true; } -const StorageCircuitBreaker = {recordCapacityFailure, recordEviction, isTripped, reset, ROLLING_WINDOW_MS, FAILURE_THRESHOLD, NO_PROGRESS_CAP}; +/** + * Record that a storage write SUCCEEDED. If an eviction was awaiting its verdict, the eviction freed + * usable space — so it must NOT later be miscounted as a no-progress cycle by the next capacity + * failure. Clear the pending flag and reset the consecutive no-progress streak (a success breaks the + * streak). No-op when no eviction is pending (the common case), so it's cheap to call on every write. + */ +function recordWriteSuccess(): void { + if (!evictionAwaitingResult) { + return; + } + evictionAwaitingResult = false; + consecutiveNoProgressEvictions = 0; +} + +const StorageCircuitBreaker = {recordCapacityFailure, recordEviction, recordWriteSuccess, isTripped, reset, ROLLING_WINDOW_MS, FAILURE_THRESHOLD, NO_PROGRESS_CAP}; export default StorageCircuitBreaker; diff --git a/tests/unit/StorageCircuitBreakerTest.ts b/tests/unit/StorageCircuitBreakerTest.ts index 8322c0337..94069e06e 100644 --- a/tests/unit/StorageCircuitBreakerTest.ts +++ b/tests/unit/StorageCircuitBreakerTest.ts @@ -58,6 +58,40 @@ describe('StorageCircuitBreaker', () => { expect(StorageCircuitBreaker.isTripped()).toBe(true); }); + it('should not trip when each eviction makes progress (retry succeeds)', () => { + // Intermittent quota pressure: every cycle is a capacity failure → eviction → SUCCESSFUL retry. + // A successful retry means the eviction freed usable space, so it must never be counted as a + // no-progress cycle by the next failure. Without recordWriteSuccess the stale pending flag made + // each subsequent failure look like no-progress and tripped the breaker after NO_PROGRESS_CAP cycles. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP + 3; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + StorageCircuitBreaker.recordWriteSuccess(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should reset the no-progress streak when an eviction finally makes progress', () => { + // A few no-progress evictions build the streak up, but short of the cap. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP - 1; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + } + + // This eviction succeeds, breaking the consecutive streak. + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + StorageCircuitBreaker.recordWriteSuccess(); + + // Two more no-progress cycles must not trip, because the streak was reset by the success above. + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + StorageCircuitBreaker.recordCapacityFailure(); + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + it('should not count a failure as no-progress when no eviction preceded it', () => { // Capacity failures with no interleaved evictions must not accumulate no-progress cycles. for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP + 2; i++) { From 50a3ec6f7d9d740fbfcb094c6d6e681b87c5486a Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 22 Jun 2026 14:49:04 +0200 Subject: [PATCH 7/8] prettier fixes --- lib/OnyxUtils.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index b78615a73..4979b3c59 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -859,9 +859,7 @@ function retryOperation( // recurring cases into TRANSIENT/CAPACITY/FATAL. Logged on the first attempt only to avoid the // per-retry amplification this mechanism is trying to kill. Then bounded retry without eviction. if (currentRetryAttempt === 0) { - Logger.logAlert( - `Unclassified storage error. provider: ${Storage.getStorageProvider().name}. name: ${error?.name}. message: ${error?.message}. onyxMethod: ${onyxMethod.name}.`, - ); + Logger.logAlert(`Unclassified storage error. provider: ${Storage.getStorageProvider().name}. name: ${error?.name}. message: ${error?.message}. onyxMethod: ${onyxMethod.name}.`); } // @ts-expect-error No overload matches this call. return onyxMethod(defaultParams, nextRetryAttempt); From edc6ec8dbbe23a44d5f941b4ad2ed3af3366a9ca Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 22 Jun 2026 14:50:26 +0200 Subject: [PATCH 8/8] API docs --- API-INTERNAL.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/API-INTERNAL.md b/API-INTERNAL.md index 3a5cc6460..54dc85d21 100644 --- a/API-INTERNAL.md +++ b/API-INTERNAL.md @@ -89,7 +89,8 @@ and alerted (fatal). Retrying here would only re-amplify, so we skip the write q
  • CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making progress or failures storm — the per-operation budget alone cannot stop a session-wide storm.
  • -
  • UNKNOWN: bounded retry.
  • +
  • UNKNOWN: the provider couldn't classify it — log the full error shape (name + message + +provider) once so it's visible, then bounded retry without eviction.
  • broadcastUpdate()
    @@ -332,7 +333,8 @@ capacity recovery (eviction) so that a given failure is retried by exactly one l - CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making progress or failures storm — the per-operation budget alone cannot stop a session-wide storm. -- UNKNOWN: bounded retry. +- UNKNOWN: the provider couldn't classify it — log the full error shape (name + message + + provider) once so it's visible, then bounded retry without eviction. **Kind**: global function