From bf806cb069524203d9ab2cce0dbedf93830d40e7 Mon Sep 17 00:00:00 2001 From: Administrator Date: Tue, 21 Apr 2026 17:03:59 +0200 Subject: [PATCH] fix(rotation): session-learning for account failures is now complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three related gaps closed so one full byse account stops wasting attempts on every subsequent job and later-added accounts get picked up without an app restart. 1. Pre-job-swap moved BEHIND the semaphore acquire. At scale (500 jobs / 1 slot) every worker was checking _failedAccounts at spawn time before the first upload had even tried — so none of them saw the failed state. Now each worker re-checks right before its first upload attempt. 2. save-config IPC handler re-resolves fallbacks for any account that is already in _failedAccounts but has no override set. Previously account-failed only fired once per account, so a config change after the first mark-failed was silently ignored and the batch stayed stuck on the dead account until the app restarted. 3. UploadManager exposes getFailedAccountKeys() and getOverride(hoster) so main.js can drive the late re-resolve without poking private fields. 4 new tests: pre-job-swap after semaphore, getters contract, fresh manager resets learned state, late-added fallback is honored by subsequent jobs. 80/80 green. --- lib/upload-manager.js | 57 +++++++----- main.js | 29 ++++++ tasks/lessons.md | 10 +++ tasks/todo.md | 44 +++++----- tests/upload-manager.test.js | 165 +++++++++++++++++++++++++++++++++++ 5 files changed, 261 insertions(+), 44 deletions(-) diff --git a/lib/upload-manager.js b/lib/upload-manager.js index 40d5cc3..1d841e4 100644 --- a/lib/upload-manager.js +++ b/lib/upload-manager.js @@ -52,6 +52,18 @@ class UploadManager extends EventEmitter { }); } + // Introspection helpers used by main.js to re-resolve fallbacks when the + // config changes mid-batch (e.g. user adds a new account after their only + // one ran out of space). Without this, an account that got marked failed + // before a fallback existed stays stuck until the app restarts. + getFailedAccountKeys() { + return Array.from(this._failedAccounts.keys()); + } + + getOverride(hoster) { + return this._accountOverrides.get(hoster) || null; + } + _rotLog(event, data) { this.emit('rot-log', { ts: Date.now(), event, ...data }); } @@ -354,27 +366,6 @@ class UploadManager extends EventEmitter { return; } - // If this account already failed in this batch, switch to fallback immediately - // instead of wasting retries on a known-bad account - if (task.accountId && this._failedAccounts.has(task.hoster + ':' + task.accountId)) { - const override = this._accountOverrides.get(task.hoster); - if (override && !this._failedAccounts.has(task.hoster + ':' + override.id)) { - this._rotLog('pre-job-swap', { - hoster: task.hoster, fileName, fromAccountId: task.accountId, toAccountId: override.id - }); - task.accountId = override.id; - task.username = override.username; - task.password = override.password; - task.apiKey = override.apiKey; - } else { - this._rotLog('pre-job-swap-blocked', { - hoster: task.hoster, fileName, accountId: task.accountId, - hasOverride: !!override, - overrideAlsoFailed: override ? this._failedAccounts.has(task.hoster + ':' + override.id) : false - }); - } - } - this._emitProgress(uploadId, fileName, task.hoster, { jobId, status: 'queued', @@ -404,6 +395,30 @@ class UploadManager extends EventEmitter { await this._waitForInterval(task.hoster, settings.timeIntervalSec * 1000, signal); } + // Pre-job-swap: if this account was marked failed WHILE this task was + // waiting in the semaphore queue, jump straight to the override instead + // of burning a guaranteed-to-fail upload attempt. Critical at scale: + // with 500 queued jobs and 1 parallel slot, without this check every + // job still hits the original dead account first. + if (task.accountId && this._failedAccounts.has(task.hoster + ':' + task.accountId)) { + const override = this._accountOverrides.get(task.hoster); + if (override && !this._failedAccounts.has(task.hoster + ':' + override.id)) { + this._rotLog('pre-job-swap', { + hoster: task.hoster, fileName, fromAccountId: task.accountId, toAccountId: override.id + }); + task.accountId = override.id; + task.username = override.username; + task.password = override.password; + task.apiKey = override.apiKey; + } else { + this._rotLog('pre-job-swap-blocked', { + hoster: task.hoster, fileName, accountId: task.accountId, + hasOverride: !!override, + overrideAlsoFailed: override ? this._failedAccounts.has(task.hoster + ':' + override.id) : false + }); + } + } + for (let attempt = 1; attempt <= maxAttempts; attempt++) { if (signal.aborted || this.stopAfterActive) break; diff --git a/main.js b/main.js index 993f187..ab6cad4 100644 --- a/main.js +++ b/main.js @@ -920,6 +920,35 @@ ipcMain.handle('get-config', () => { ipcMain.handle('save-config', async (_event, config) => { await configStore.save(config); + // If a batch is running and some accounts got marked failed before any + // fallback existed, re-resolve now — the user may have just added one. + // Without this re-probe, those accounts stay stuck with no override until + // the app restarts, and every subsequent job wastes an attempt on them. + if (uploadManager && typeof uploadManager.getFailedAccountKeys === 'function') { + try { + const cfg = configStore.load(); + const keys = uploadManager.getFailedAccountKeys(); + for (const key of keys) { + const sep = key.indexOf(':'); + if (sep < 0) continue; + const hoster = key.slice(0, sep); + const failedAccountId = key.slice(sep + 1); + if (uploadManager.getOverride(hoster)) continue; // already has a fallback + const fallback = getNextFallbackAccount(cfg, hoster, failedAccountId); + if (fallback) { + rotLog(`main: config-updated → late fallback ${fallback.id} for ${hoster} (was stuck on ${failedAccountId})`); + uploadManager.switchAccount(hoster, fallback); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('account-switched', { + hoster, fromAccountId: failedAccountId, toAccountId: fallback.id + }); + } + } + } + } catch (err) { + debugLog(`save-config re-resolve failed: ${err && err.message ? err.message : err}`); + } + } return true; }); diff --git a/tasks/lessons.md b/tasks/lessons.md index 3722cba..81c81ad 100644 --- a/tasks/lessons.md +++ b/tasks/lessons.md @@ -6,6 +6,16 @@ **Regel:** Wenn ein Click-Handler `await anotherHandler()` aufruft und der innere Handler seinen eigenen kompletten Render-Zyklus hat, NIEMALS noch einen davor. Einmal ist genug — der folgende innere Render sieht die frischen State-Mutationen ohnehin. **Wie anwenden:** Vor jeder `await fn()`-Folge in einem Handler prüfen: macht `fn` schon `renderQueueTable()`? Wenn ja, äußere Render-Calls löschen. +## 2026-04-21 — State-Checks MÜSSEN hinter die Semaphore-Queue +**Symptom:** Pre-Job-Swap prüfte `_failedAccounts` vor `semaphore.acquire`. Bei N parallelen Workers war der Check zum Start für ALLE leer — niemand hat geswapt. Erst nachdem alle im Semaphore ordentlich gewartet hatten und einer fehlschlug, wurde _failedAccounts befüllt, aber die anderen hatten ihren Check längst hinter sich. +**Regel:** State-basierte Entscheidungen (failed accounts, overrides, cached stats) gehören direkt vor die Aktion die sie betreffen — **nach** jeder async `await` die die Position in der Queue bestimmt. Nicht am Task-Start für später wichtigen State abfragen. +**Wie anwenden:** Bei Queue-basierten Pipelines prüfen: "Was kann sich zwischen Task-Start und dem tatsächlichen Execute ändern?" Alles was sich ändern kann, muss direkt vor dem Execute geprüft werden, nicht davor. + +## 2026-04-21 — Reaktive Config-Updates für laufende State-Maschinen +**Symptom:** User fügt mid-batch einen neuen Account hinzu, aber der UploadManager merkt nicht dass die Config sich geändert hat. `account-failed` Event feuert nur einmal pro Account → keine zweite Re-Resolve-Chance. +**Regel:** Wenn ein State nur bei Events neu evaluiert wird und Events "nur einmal" feuern, muss jede externe Zustandsänderung (Config-Save, User-Action) den State explizit triggern. +**Wie anwenden:** Save-Handler müssen aktive State-Maschinen informieren. Lieber einen überflüssigen Re-Resolve-Call als einen verpassten. Für Upload-Manager: nach saveConfig → re-evaluate failed accounts ohne Override. + ## 2026-04-21 — Error-Klassifikation: fileRejected vs accountError **Symptom:** Voller Byse-Account wurde nicht rotiert — `skip-rotation-file-rejected` geloggt für jede Datei. **Root cause:** Generisches Match auf Prefix-String (`"lehnte Datei ab"`) klassifizierte ALLE Byse-Errors als file-level, inklusive Account-voll-Meldungen. diff --git a/tasks/todo.md b/tasks/todo.md index e0f5a0f..1e87756 100644 --- a/tasks/todo.md +++ b/tasks/todo.md @@ -1,30 +1,28 @@ -# Rotation-Bug: Byse "not enough disk space" wird als file-rejected klassifiziert +# Account-Rotation: vollständiger Session-Lernprozess ## Problem -Log zeigt: Byse-Account ist voll ("not enough disk space on your account"), aber das System klassifiziert den Fehler als **file-rejected** und rotiert deshalb NICHT zum Fallback-Account. Jede nächste Datei landet beim selben vollen Account → endlose Fails. - -## Root Cause -- `lib/hosters.js:223-227` — Byse-Parser setzt `err.fileRejected = true` für JEDEN status-String der nicht `ok/success/done` ist. -- `lib/upload-manager.js:67` — `_isFileRejectedError` regex matcht generisch `"lehnte Datei ab"` → gilt für ALLE Byse-Errors unabhängig vom eigentlichen Grund. -- Upload-manager flow: `_isFileRejectedError` → break retry → `skip-rotation-file-rejected` → return. Kein `mark-failed`, kein Fallback-Resolve. Account bleibt aktiv für die nächste Datei. +1. Byse "disk space full" wurde als file-rejected klassifiziert → keine Rotation. (3.1.4 ✓) +2. `pre-job-swap` checkte `_failedAccounts` VOR Semaphore-Acquire → alle parallelen Jobs probierten weiter den vollen Account. (3.1.5 ✓) +3. Wenn beim ersten Fail noch kein Fallback-Account existierte und der User DANACH einen hinzufügt → `_accountOverrides` blieb leer, keine weitere `account-failed`-Emission (nur einmalig pro Account) → System blieb stuck bis zum Neustart. (3.1.5 ✓) ## Fix -- [x] `lib/hosters.js`: Byse-Parser erkennt account-level phrases (disk space / storage / quota / insufficient / account full) → setzt `err.accountError = true` statt `fileRejected`. -- [x] `lib/upload-manager.js` — `_isFileRejectedError`: generischen `lehnte Datei ab` Match entfernt. Explicit: `accountError === true` → früher out (ist NICHT file-rejected). -- [x] `lib/upload-manager.js` — `_shouldSkipRetryOnAccountError`: honoriert `err.accountError === true` Flag. Patterns erweitert um disk-space/storage/quota/account-voll Phrasen (Safety-Net falls Flag mal fehlt). -- [x] `tests/upload-manager.test.js`: 5 neue Tests für die Klassifikation (disk-space ist account-level; Duplicate bleibt file-rejected; accountError gewinnt gegen fileRejected). -- [x] `npm test` — 76/76 grün. -- [ ] Release als 3.1.4 (auf User-OK). +- [x] `lib/upload-manager.js` — Pre-Job-Swap hinter `semaphore.acquire` verschoben, plus zwei public getter: `getFailedAccountKeys()`, `getOverride(hoster)`. +- [x] `main.js` — `save-config` IPC-Handler ruft nach dem Speichern `getNextFallbackAccount` für alle failed-ohne-Override accounts im aktiven UploadManager. Findet er was, ruft er `switchAccount`. Emittiert `account-switched` an den Renderer. +- [x] Tests (4 neue): + - pre-job-swap greift für parallele Worker nach Semaphore-Queue. + - getFailedAccountKeys + getOverride als public API. + - Neustart-Reset: fresh Manager hat keine gelernte failed-Historie. + - Late-resolved Override wird von Folgejobs genutzt (mid-batch config add). +- [x] `npm test` — 80/80 grün. +- [ ] Release als 3.1.5 (auf User-OK). -## Expected Behavior nach Fix -Log-Pattern ab Fix: -``` -[retries-exhausted] hoster=byse.sx ... lastError=Byse lehnte Datei ab: 0:0:0:not enough disk space... -[mark-failed] hoster=byse.sx accountId=byse.sx-1773722669098-qc45 -[switchAccount] hoster=byse.sx → fallback byse.sx-XXXXX -[rotate] hoster=byse.sx → nächster Account -``` -Bei Fast-Fail (über `_shouldSkipRetryOnAccountError`) entfällt der 5×3s Retry-Wait → rotation setzt sofort ein. +## Behavior-Matrix +| Szenario | Verhalten | +|---|---| +| acc1 + acc2 gleich aktiv, acc1 wird voll | acc1→acc2 Rotation, alle weiteren Jobs direkt acc2 ✓ | +| Nur acc1 aktiv, acc2 mid-batch hinzugefügt BEVOR acc1 voll | Erster Fail resolved gegen frische Config, acc2 wird gezogen ✓ | +| Nur acc1 aktiv, acc1 wird voll, DANACH acc2 hinzugefügt | `save-config` trigert Late-Resolve, `switchAccount(acc2)` wird aufgerufen, Folgejobs swappen pre-job ✓ | +| App-Neustart | `_failedAccounts` in-memory → fresh, acc1 wird wieder probiert ✓ | ## Review -Zwei-Schichten-Ansatz: Byse-Parser setzt explicit `accountError` Flag (richtige Stelle weil der Parser den status-String direkt sieht), Upload-Manager honoriert den Flag und hat parallel Regex-Safety-Net. Test deckt beide Pfade ab. +Drei Ebenen zusammen (3.1.4 Classifier + 3.1.5 pre-swap-after-queue + 3.1.5 late-resolve) machen das Session-Lernen komplett: disk-space-voll wird erkannt, markiert, alle wartenden Jobs sehen es wenn sie dran sind, und spätere Config-Änderungen werden reaktiv aufgegriffen. diff --git a/tests/upload-manager.test.js b/tests/upload-manager.test.js index 24952c9..633ae0f 100644 --- a/tests/upload-manager.test.js +++ b/tests/upload-manager.test.js @@ -554,4 +554,169 @@ describe('UploadManager', () => { assert.equal(mgr._shouldSkipRetryOnAccountError(err), true); }); }); + + describe('session-level account memory', () => { + // Scenario: user has 2 byse accounts. Account 1 is full ("not enough + // disk space"). First job fails on acc1 → rotation to acc2. Second job + // must NOT re-probe acc1; pre-job-swap has to kick in. + it('after account is marked failed, next job swaps straight to override without retrying acc1', async () => { + // Only acc1 throws disk-space; acc2 succeeds. Mock decides by apiKey. + mockUploadFile.mock.mockImplementation(async (hoster, filePath, apiKey, onProgress) => { + if (apiKey === 'acc1-key') { + const err = new Error('Byse lehnte Datei ab: 0:0:0:not enough disk space on your account'); + err.accountError = true; + throw err; + } + if (onProgress) onProgress(fakeFileSize, fakeFileSize); + return { download_url: 'https://byse.sx/ok', embed_url: null, file_code: 'ok' }; + }); + + const mgr = new UploadManager( + { 'byse.sx': { retries: 3, parallelCount: 1, maxSpeedKbs: 0, restartBelowKbs: 0, timeIntervalSec: 0, maxSizeMb: 0 } } + ); + + // Simulate main.js: on account-failed, resolve fallback → switchAccount + mgr.on('account-failed', ({ hoster, accountId }) => { + mgr.switchAccount(hoster, { id: 'acc2', username: 'u2', password: 'p2', apiKey: 'acc2-key' }); + }); + + const rotEvents = []; + mgr.on('rot-log', (e) => rotEvents.push(e)); + const progress = []; + mgr.on('progress', (d) => progress.push({ fileName: d.fileName, status: d.status, error: d.error })); + + await mgr.startBatch([ + { file: '/test/a.mp4', hoster: 'byse.sx', apiKey: 'acc1-key', accountId: 'acc1', username: 'u1', password: 'p1' }, + { file: '/test/b.mp4', hoster: 'byse.sx', apiKey: 'acc1-key', accountId: 'acc1', username: 'u1', password: 'p1' } + ]); + + // Event sequence we expect: + // - job A: fast-fail on acc1 → mark-failed → switchAccount → rotate → upload with acc2 → done + // - job B: pre-job-swap from acc1 → acc2 (no attempts on acc1!) → done + const events = rotEvents.map(e => e.event); + assert.ok(events.includes('fast-fail'), `expected fast-fail, got: ${events.join(',')}`); + assert.ok(events.includes('mark-failed'), `expected mark-failed, got: ${events.join(',')}`); + assert.ok(events.includes('switchAccount'), `expected switchAccount, got: ${events.join(',')}`); + assert.ok(events.includes('pre-job-swap'), `expected pre-job-swap for 2nd job, got: ${events.join(',')}`); + + // job B's pre-job-swap MUST predate any upload attempt for /test/b.mp4. + // If acc1 was probed for B, the mock would have thrown and we'd see + // another fast-fail or retrying event for b.mp4. + const bProgressErrors = progress + .filter(p => p.fileName && p.fileName.includes('b.mp4') && p.error) + .map(p => p.error); + assert.equal(bProgressErrors.length, 0, + `job B should never have touched acc1; got errors: ${bProgressErrors.join(' | ')}`); + + // Both jobs should be done at the end. + const doneFiles = progress.filter(p => p.status === 'done').map(p => p.fileName); + assert.ok(doneFiles.some(f => f && f.includes('a.mp4')), 'a.mp4 should finish via rotation'); + assert.ok(doneFiles.some(f => f && f.includes('b.mp4')), 'b.mp4 should finish via pre-job-swap'); + + // Sanity: mock was called with acc2-key more often than acc1-key. + const byKey = { acc1: 0, acc2: 0 }; + for (const call of mockUploadFile.mock.calls) { + if (call.arguments[2] === 'acc1-key') byKey.acc1++; + else if (call.arguments[2] === 'acc2-key') byKey.acc2++; + } + assert.ok(byKey.acc1 <= 1, `acc1 should only be tried once (for job A); got ${byKey.acc1}`); + assert.ok(byKey.acc2 >= 2, `acc2 should handle both jobs after rotation; got ${byKey.acc2}`); + }); + + it('on fresh UploadManager (simulates app restart), failed-account memory is gone', () => { + const mgr1 = new UploadManager({}); + mgr1._failedAccounts.set('byse.sx:acc1', true); + mgr1.switchAccount('byse.sx', { id: 'acc2' }); + assert.equal(mgr1._failedAccounts.size, 1); + assert.equal(mgr1._accountOverrides.size, 1); + + const mgr2 = new UploadManager({}); + assert.equal(mgr2._failedAccounts.size, 0, 'new manager must start clean'); + assert.equal(mgr2._accountOverrides.size, 0, 'override map must be empty on fresh manager'); + }); + + it('exposes failed-account introspection (for main.js mid-batch re-resolve)', () => { + const mgr = new UploadManager({}); + assert.deepEqual(mgr.getFailedAccountKeys(), []); + assert.equal(mgr.getOverride('byse.sx'), null); + + mgr._failedAccounts.set('byse.sx:acc1', true); + mgr._failedAccounts.set('voe.sx:other', true); + assert.deepEqual(mgr.getFailedAccountKeys().sort(), ['byse.sx:acc1', 'voe.sx:other']); + assert.equal(mgr.getOverride('byse.sx'), null, 'no override yet'); + + mgr.switchAccount('byse.sx', { id: 'acc2', apiKey: 'k2' }); + assert.equal(mgr.getOverride('byse.sx').id, 'acc2'); + assert.equal(mgr.getOverride('voe.sx'), null, 'unrelated hoster still has no override'); + }); + + it('late-resolved override is honored by subsequent jobs (simulates mid-batch config add)', async () => { + // Only acc1 throws; acc2 succeeds. + mockUploadFile.mock.mockImplementation(async (hoster, filePath, apiKey, onProgress) => { + if (apiKey === 'acc1-key') { + const err = new Error('Byse lehnte Datei ab: not enough disk space'); + err.accountError = true; + throw err; + } + if (onProgress) onProgress(fakeFileSize, fakeFileSize); + return { download_url: 'ok', embed_url: null, file_code: 'ok' }; + }); + + const mgr = new UploadManager( + { 'byse.sx': { retries: 1, parallelCount: 1, maxSpeedKbs: 0, restartBelowKbs: 0, timeIntervalSec: 0, maxSizeMb: 0 } } + ); + + // Scenario: initially config has ONLY acc1. No account-failed listener + // resolves a fallback (because none exists in config yet). Job A fails + // with rotation-end. + const rotEvents = []; + mgr.on('rot-log', (e) => rotEvents.push(e)); + + await mgr.startBatch([ + { file: '/test/a.mp4', hoster: 'byse.sx', apiKey: 'acc1-key', accountId: 'acc1', username: 'u1', password: 'p1' } + ]); + + // Job A should have ended with rotation-end (no fallback available). + const eventsA = rotEvents.map(e => e.event); + assert.ok(eventsA.includes('mark-failed'), 'acc1 must be marked failed'); + assert.ok(eventsA.includes('rotation-end'), 'expected rotation-end without a fallback'); + assert.equal(mgr.getFailedAccountKeys().length, 1); + assert.equal(mgr.getOverride('byse.sx'), null, 'no override set during first batch'); + + // --- Simulate: user adds acc2 in Settings → save-config handler finds + // that byse.sx:acc1 is failed without an override → resolves + switches. + mgr.switchAccount('byse.sx', { id: 'acc2', username: 'u2', password: 'p2', apiKey: 'acc2-key' }); + + // Now a follow-up batch (same running session — in production, addJobs + // or a new startBatch without clearing maps would reach this state). + // We need to ALSO clear _failedAccounts manually here because startBatch + // resets it — so we poke the inner state to emulate "still mid-batch + // with late config". The switchAccount-after-fail path is what matters. + rotEvents.length = 0; + // Re-run just the _runJob path by manually setting up state and using + // addJobs — simulates mid-batch job add after config change. + mgr.running = true; + mgr._batchResults = new Map(); + mgr._batchResults.set('/test/b.mp4', { name: 'b.mp4', size: fakeFileSize, results: [] }); + mgr._failedAccounts.set('byse.sx:acc1', true); // re-establish failed state + mgr._additionalPromises = []; + + // Spawn a new job through addJobs() path (uses _runJob internally) + const addResult = await mgr.addJobs([ + { file: '/test/b.mp4', hoster: 'byse.sx', apiKey: 'acc1-key', accountId: 'acc1', username: 'u1', password: 'p1', jobId: 'jb' } + ]); + assert.ok(addResult.added >= 1 || addResult.alreadyInBatch === 0, + `addJobs should accept new job: ${JSON.stringify(addResult)}`); + await Promise.allSettled(mgr._additionalPromises); + + const eventsB = rotEvents.map(e => e.event); + assert.ok(eventsB.includes('pre-job-swap'), + `job B should have pre-job-swap after late override was set; got: ${eventsB.join(',')}`); + + // Mock must have been called with acc2-key for the new job (not acc1-key again) + const acc1ForB = mockUploadFile.mock.calls.filter(c => + c.arguments[1] === '/test/b.mp4' && c.arguments[2] === 'acc1-key').length; + assert.equal(acc1ForB, 0, 'job B must never touch acc1-key after late fallback was set'); + }); + }); });