fix(rotation): session-learning for account failures is now complete

Three related gaps closed so one full byse account stops wasting
attempts on every subsequent job and later-added accounts get picked
up without an app restart.

1. Pre-job-swap moved BEHIND the semaphore acquire. At scale (500 jobs
   / 1 slot) every worker was checking _failedAccounts at spawn time
   before the first upload had even tried — so none of them saw the
   failed state. Now each worker re-checks right before its first
   upload attempt.

2. save-config IPC handler re-resolves fallbacks for any account that
   is already in _failedAccounts but has no override set. Previously
   account-failed only fired once per account, so a config change
   after the first mark-failed was silently ignored and the batch
   stayed stuck on the dead account until the app restarted.

3. UploadManager exposes getFailedAccountKeys() and getOverride(hoster)
   so main.js can drive the late re-resolve without poking private
   fields.

4 new tests: pre-job-swap after semaphore, getters contract, fresh
manager resets learned state, late-added fallback is honored by
subsequent jobs. 80/80 green.
This commit is contained in:
Administrator 2026-04-21 17:03:59 +02:00
parent e5f9f91f4e
commit bf806cb069
5 changed files with 261 additions and 44 deletions

View File

@ -52,6 +52,18 @@ class UploadManager extends EventEmitter {
}); });
} }
// Introspection helpers used by main.js to re-resolve fallbacks when the
// config changes mid-batch (e.g. user adds a new account after their only
// one ran out of space). Without this, an account that got marked failed
// before a fallback existed stays stuck until the app restarts.
getFailedAccountKeys() {
return Array.from(this._failedAccounts.keys());
}
getOverride(hoster) {
return this._accountOverrides.get(hoster) || null;
}
_rotLog(event, data) { _rotLog(event, data) {
this.emit('rot-log', { ts: Date.now(), event, ...data }); this.emit('rot-log', { ts: Date.now(), event, ...data });
} }
@ -354,27 +366,6 @@ class UploadManager extends EventEmitter {
return; return;
} }
// If this account already failed in this batch, switch to fallback immediately
// instead of wasting retries on a known-bad account
if (task.accountId && this._failedAccounts.has(task.hoster + ':' + task.accountId)) {
const override = this._accountOverrides.get(task.hoster);
if (override && !this._failedAccounts.has(task.hoster + ':' + override.id)) {
this._rotLog('pre-job-swap', {
hoster: task.hoster, fileName, fromAccountId: task.accountId, toAccountId: override.id
});
task.accountId = override.id;
task.username = override.username;
task.password = override.password;
task.apiKey = override.apiKey;
} else {
this._rotLog('pre-job-swap-blocked', {
hoster: task.hoster, fileName, accountId: task.accountId,
hasOverride: !!override,
overrideAlsoFailed: override ? this._failedAccounts.has(task.hoster + ':' + override.id) : false
});
}
}
this._emitProgress(uploadId, fileName, task.hoster, { this._emitProgress(uploadId, fileName, task.hoster, {
jobId, jobId,
status: 'queued', status: 'queued',
@ -404,6 +395,30 @@ class UploadManager extends EventEmitter {
await this._waitForInterval(task.hoster, settings.timeIntervalSec * 1000, signal); await this._waitForInterval(task.hoster, settings.timeIntervalSec * 1000, signal);
} }
// Pre-job-swap: if this account was marked failed WHILE this task was
// waiting in the semaphore queue, jump straight to the override instead
// of burning a guaranteed-to-fail upload attempt. Critical at scale:
// with 500 queued jobs and 1 parallel slot, without this check every
// job still hits the original dead account first.
if (task.accountId && this._failedAccounts.has(task.hoster + ':' + task.accountId)) {
const override = this._accountOverrides.get(task.hoster);
if (override && !this._failedAccounts.has(task.hoster + ':' + override.id)) {
this._rotLog('pre-job-swap', {
hoster: task.hoster, fileName, fromAccountId: task.accountId, toAccountId: override.id
});
task.accountId = override.id;
task.username = override.username;
task.password = override.password;
task.apiKey = override.apiKey;
} else {
this._rotLog('pre-job-swap-blocked', {
hoster: task.hoster, fileName, accountId: task.accountId,
hasOverride: !!override,
overrideAlsoFailed: override ? this._failedAccounts.has(task.hoster + ':' + override.id) : false
});
}
}
for (let attempt = 1; attempt <= maxAttempts; attempt++) { for (let attempt = 1; attempt <= maxAttempts; attempt++) {
if (signal.aborted || this.stopAfterActive) break; if (signal.aborted || this.stopAfterActive) break;

29
main.js
View File

@ -920,6 +920,35 @@ ipcMain.handle('get-config', () => {
ipcMain.handle('save-config', async (_event, config) => { ipcMain.handle('save-config', async (_event, config) => {
await configStore.save(config); await configStore.save(config);
// If a batch is running and some accounts got marked failed before any
// fallback existed, re-resolve now — the user may have just added one.
// Without this re-probe, those accounts stay stuck with no override until
// the app restarts, and every subsequent job wastes an attempt on them.
if (uploadManager && typeof uploadManager.getFailedAccountKeys === 'function') {
try {
const cfg = configStore.load();
const keys = uploadManager.getFailedAccountKeys();
for (const key of keys) {
const sep = key.indexOf(':');
if (sep < 0) continue;
const hoster = key.slice(0, sep);
const failedAccountId = key.slice(sep + 1);
if (uploadManager.getOverride(hoster)) continue; // already has a fallback
const fallback = getNextFallbackAccount(cfg, hoster, failedAccountId);
if (fallback) {
rotLog(`main: config-updated → late fallback ${fallback.id} for ${hoster} (was stuck on ${failedAccountId})`);
uploadManager.switchAccount(hoster, fallback);
if (mainWindow && !mainWindow.isDestroyed()) {
mainWindow.webContents.send('account-switched', {
hoster, fromAccountId: failedAccountId, toAccountId: fallback.id
});
}
}
}
} catch (err) {
debugLog(`save-config re-resolve failed: ${err && err.message ? err.message : err}`);
}
}
return true; return true;
}); });

View File

@ -6,6 +6,16 @@
**Regel:** Wenn ein Click-Handler `await anotherHandler()` aufruft und der innere Handler seinen eigenen kompletten Render-Zyklus hat, NIEMALS noch einen davor. Einmal ist genug — der folgende innere Render sieht die frischen State-Mutationen ohnehin. **Regel:** Wenn ein Click-Handler `await anotherHandler()` aufruft und der innere Handler seinen eigenen kompletten Render-Zyklus hat, NIEMALS noch einen davor. Einmal ist genug — der folgende innere Render sieht die frischen State-Mutationen ohnehin.
**Wie anwenden:** Vor jeder `await fn()`-Folge in einem Handler prüfen: macht `fn` schon `renderQueueTable()`? Wenn ja, äußere Render-Calls löschen. **Wie anwenden:** Vor jeder `await fn()`-Folge in einem Handler prüfen: macht `fn` schon `renderQueueTable()`? Wenn ja, äußere Render-Calls löschen.
## 2026-04-21 — State-Checks MÜSSEN hinter die Semaphore-Queue
**Symptom:** Pre-Job-Swap prüfte `_failedAccounts` vor `semaphore.acquire`. Bei N parallelen Workers war der Check zum Start für ALLE leer — niemand hat geswapt. Erst nachdem alle im Semaphore ordentlich gewartet hatten und einer fehlschlug, wurde _failedAccounts befüllt, aber die anderen hatten ihren Check längst hinter sich.
**Regel:** State-basierte Entscheidungen (failed accounts, overrides, cached stats) gehören direkt vor die Aktion die sie betreffen — **nach** jeder async `await` die die Position in der Queue bestimmt. Nicht am Task-Start für später wichtigen State abfragen.
**Wie anwenden:** Bei Queue-basierten Pipelines prüfen: "Was kann sich zwischen Task-Start und dem tatsächlichen Execute ändern?" Alles was sich ändern kann, muss direkt vor dem Execute geprüft werden, nicht davor.
## 2026-04-21 — Reaktive Config-Updates für laufende State-Maschinen
**Symptom:** User fügt mid-batch einen neuen Account hinzu, aber der UploadManager merkt nicht dass die Config sich geändert hat. `account-failed` Event feuert nur einmal pro Account → keine zweite Re-Resolve-Chance.
**Regel:** Wenn ein State nur bei Events neu evaluiert wird und Events "nur einmal" feuern, muss jede externe Zustandsänderung (Config-Save, User-Action) den State explizit triggern.
**Wie anwenden:** Save-Handler müssen aktive State-Maschinen informieren. Lieber einen überflüssigen Re-Resolve-Call als einen verpassten. Für Upload-Manager: nach saveConfig → re-evaluate failed accounts ohne Override.
## 2026-04-21 — Error-Klassifikation: fileRejected vs accountError ## 2026-04-21 — Error-Klassifikation: fileRejected vs accountError
**Symptom:** Voller Byse-Account wurde nicht rotiert — `skip-rotation-file-rejected` geloggt für jede Datei. **Symptom:** Voller Byse-Account wurde nicht rotiert — `skip-rotation-file-rejected` geloggt für jede Datei.
**Root cause:** Generisches Match auf Prefix-String (`"lehnte Datei ab"`) klassifizierte ALLE Byse-Errors als file-level, inklusive Account-voll-Meldungen. **Root cause:** Generisches Match auf Prefix-String (`"lehnte Datei ab"`) klassifizierte ALLE Byse-Errors als file-level, inklusive Account-voll-Meldungen.

View File

@ -1,30 +1,28 @@
# Rotation-Bug: Byse "not enough disk space" wird als file-rejected klassifiziert # Account-Rotation: vollständiger Session-Lernprozess
## Problem ## Problem
Log zeigt: Byse-Account ist voll ("not enough disk space on your account"), aber das System klassifiziert den Fehler als **file-rejected** und rotiert deshalb NICHT zum Fallback-Account. Jede nächste Datei landet beim selben vollen Account → endlose Fails. 1. Byse "disk space full" wurde als file-rejected klassifiziert → keine Rotation. (3.1.4 ✓)
2. `pre-job-swap` checkte `_failedAccounts` VOR Semaphore-Acquire → alle parallelen Jobs probierten weiter den vollen Account. (3.1.5 ✓)
## Root Cause 3. Wenn beim ersten Fail noch kein Fallback-Account existierte und der User DANACH einen hinzufügt → `_accountOverrides` blieb leer, keine weitere `account-failed`-Emission (nur einmalig pro Account) → System blieb stuck bis zum Neustart. (3.1.5 ✓)
- `lib/hosters.js:223-227` — Byse-Parser setzt `err.fileRejected = true` für JEDEN status-String der nicht `ok/success/done` ist.
- `lib/upload-manager.js:67``_isFileRejectedError` regex matcht generisch `"lehnte Datei ab"` → gilt für ALLE Byse-Errors unabhängig vom eigentlichen Grund.
- Upload-manager flow: `_isFileRejectedError` → break retry → `skip-rotation-file-rejected` → return. Kein `mark-failed`, kein Fallback-Resolve. Account bleibt aktiv für die nächste Datei.
## Fix ## Fix
- [x] `lib/hosters.js`: Byse-Parser erkennt account-level phrases (disk space / storage / quota / insufficient / account full) → setzt `err.accountError = true` statt `fileRejected`. - [x] `lib/upload-manager.js` — Pre-Job-Swap hinter `semaphore.acquire` verschoben, plus zwei public getter: `getFailedAccountKeys()`, `getOverride(hoster)`.
- [x] `lib/upload-manager.js``_isFileRejectedError`: generischen `lehnte Datei ab` Match entfernt. Explicit: `accountError === true` → früher out (ist NICHT file-rejected). - [x] `main.js``save-config` IPC-Handler ruft nach dem Speichern `getNextFallbackAccount` für alle failed-ohne-Override accounts im aktiven UploadManager. Findet er was, ruft er `switchAccount`. Emittiert `account-switched` an den Renderer.
- [x] `lib/upload-manager.js``_shouldSkipRetryOnAccountError`: honoriert `err.accountError === true` Flag. Patterns erweitert um disk-space/storage/quota/account-voll Phrasen (Safety-Net falls Flag mal fehlt). - [x] Tests (4 neue):
- [x] `tests/upload-manager.test.js`: 5 neue Tests für die Klassifikation (disk-space ist account-level; Duplicate bleibt file-rejected; accountError gewinnt gegen fileRejected). - pre-job-swap greift für parallele Worker nach Semaphore-Queue.
- [x] `npm test` — 76/76 grün. - getFailedAccountKeys + getOverride als public API.
- [ ] Release als 3.1.4 (auf User-OK). - Neustart-Reset: fresh Manager hat keine gelernte failed-Historie.
- Late-resolved Override wird von Folgejobs genutzt (mid-batch config add).
- [x] `npm test` — 80/80 grün.
- [ ] Release als 3.1.5 (auf User-OK).
## Expected Behavior nach Fix ## Behavior-Matrix
Log-Pattern ab Fix: | Szenario | Verhalten |
``` |---|---|
[retries-exhausted] hoster=byse.sx ... lastError=Byse lehnte Datei ab: 0:0:0:not enough disk space... | acc1 + acc2 gleich aktiv, acc1 wird voll | acc1→acc2 Rotation, alle weiteren Jobs direkt acc2 ✓ |
[mark-failed] hoster=byse.sx accountId=byse.sx-1773722669098-qc45 | Nur acc1 aktiv, acc2 mid-batch hinzugefügt BEVOR acc1 voll | Erster Fail resolved gegen frische Config, acc2 wird gezogen ✓ |
[switchAccount] hoster=byse.sx → fallback byse.sx-XXXXX | Nur acc1 aktiv, acc1 wird voll, DANACH acc2 hinzugefügt | `save-config` trigert Late-Resolve, `switchAccount(acc2)` wird aufgerufen, Folgejobs swappen pre-job ✓ |
[rotate] hoster=byse.sx → nächster Account | App-Neustart | `_failedAccounts` in-memory → fresh, acc1 wird wieder probiert ✓ |
```
Bei Fast-Fail (über `_shouldSkipRetryOnAccountError`) entfällt der 5×3s Retry-Wait → rotation setzt sofort ein.
## Review ## Review
Zwei-Schichten-Ansatz: Byse-Parser setzt explicit `accountError` Flag (richtige Stelle weil der Parser den status-String direkt sieht), Upload-Manager honoriert den Flag und hat parallel Regex-Safety-Net. Test deckt beide Pfade ab. Drei Ebenen zusammen (3.1.4 Classifier + 3.1.5 pre-swap-after-queue + 3.1.5 late-resolve) machen das Session-Lernen komplett: disk-space-voll wird erkannt, markiert, alle wartenden Jobs sehen es wenn sie dran sind, und spätere Config-Änderungen werden reaktiv aufgegriffen.

View File

@ -554,4 +554,169 @@ describe('UploadManager', () => {
assert.equal(mgr._shouldSkipRetryOnAccountError(err), true); assert.equal(mgr._shouldSkipRetryOnAccountError(err), true);
}); });
}); });
describe('session-level account memory', () => {
// Scenario: user has 2 byse accounts. Account 1 is full ("not enough
// disk space"). First job fails on acc1 → rotation to acc2. Second job
// must NOT re-probe acc1; pre-job-swap has to kick in.
it('after account is marked failed, next job swaps straight to override without retrying acc1', async () => {
// Only acc1 throws disk-space; acc2 succeeds. Mock decides by apiKey.
mockUploadFile.mock.mockImplementation(async (hoster, filePath, apiKey, onProgress) => {
if (apiKey === 'acc1-key') {
const err = new Error('Byse lehnte Datei ab: 0:0:0:not enough disk space on your account');
err.accountError = true;
throw err;
}
if (onProgress) onProgress(fakeFileSize, fakeFileSize);
return { download_url: 'https://byse.sx/ok', embed_url: null, file_code: 'ok' };
});
const mgr = new UploadManager(
{ 'byse.sx': { retries: 3, parallelCount: 1, maxSpeedKbs: 0, restartBelowKbs: 0, timeIntervalSec: 0, maxSizeMb: 0 } }
);
// Simulate main.js: on account-failed, resolve fallback → switchAccount
mgr.on('account-failed', ({ hoster, accountId }) => {
mgr.switchAccount(hoster, { id: 'acc2', username: 'u2', password: 'p2', apiKey: 'acc2-key' });
});
const rotEvents = [];
mgr.on('rot-log', (e) => rotEvents.push(e));
const progress = [];
mgr.on('progress', (d) => progress.push({ fileName: d.fileName, status: d.status, error: d.error }));
await mgr.startBatch([
{ file: '/test/a.mp4', hoster: 'byse.sx', apiKey: 'acc1-key', accountId: 'acc1', username: 'u1', password: 'p1' },
{ file: '/test/b.mp4', hoster: 'byse.sx', apiKey: 'acc1-key', accountId: 'acc1', username: 'u1', password: 'p1' }
]);
// Event sequence we expect:
// - job A: fast-fail on acc1 → mark-failed → switchAccount → rotate → upload with acc2 → done
// - job B: pre-job-swap from acc1 → acc2 (no attempts on acc1!) → done
const events = rotEvents.map(e => e.event);
assert.ok(events.includes('fast-fail'), `expected fast-fail, got: ${events.join(',')}`);
assert.ok(events.includes('mark-failed'), `expected mark-failed, got: ${events.join(',')}`);
assert.ok(events.includes('switchAccount'), `expected switchAccount, got: ${events.join(',')}`);
assert.ok(events.includes('pre-job-swap'), `expected pre-job-swap for 2nd job, got: ${events.join(',')}`);
// job B's pre-job-swap MUST predate any upload attempt for /test/b.mp4.
// If acc1 was probed for B, the mock would have thrown and we'd see
// another fast-fail or retrying event for b.mp4.
const bProgressErrors = progress
.filter(p => p.fileName && p.fileName.includes('b.mp4') && p.error)
.map(p => p.error);
assert.equal(bProgressErrors.length, 0,
`job B should never have touched acc1; got errors: ${bProgressErrors.join(' | ')}`);
// Both jobs should be done at the end.
const doneFiles = progress.filter(p => p.status === 'done').map(p => p.fileName);
assert.ok(doneFiles.some(f => f && f.includes('a.mp4')), 'a.mp4 should finish via rotation');
assert.ok(doneFiles.some(f => f && f.includes('b.mp4')), 'b.mp4 should finish via pre-job-swap');
// Sanity: mock was called with acc2-key more often than acc1-key.
const byKey = { acc1: 0, acc2: 0 };
for (const call of mockUploadFile.mock.calls) {
if (call.arguments[2] === 'acc1-key') byKey.acc1++;
else if (call.arguments[2] === 'acc2-key') byKey.acc2++;
}
assert.ok(byKey.acc1 <= 1, `acc1 should only be tried once (for job A); got ${byKey.acc1}`);
assert.ok(byKey.acc2 >= 2, `acc2 should handle both jobs after rotation; got ${byKey.acc2}`);
});
it('on fresh UploadManager (simulates app restart), failed-account memory is gone', () => {
const mgr1 = new UploadManager({});
mgr1._failedAccounts.set('byse.sx:acc1', true);
mgr1.switchAccount('byse.sx', { id: 'acc2' });
assert.equal(mgr1._failedAccounts.size, 1);
assert.equal(mgr1._accountOverrides.size, 1);
const mgr2 = new UploadManager({});
assert.equal(mgr2._failedAccounts.size, 0, 'new manager must start clean');
assert.equal(mgr2._accountOverrides.size, 0, 'override map must be empty on fresh manager');
});
it('exposes failed-account introspection (for main.js mid-batch re-resolve)', () => {
const mgr = new UploadManager({});
assert.deepEqual(mgr.getFailedAccountKeys(), []);
assert.equal(mgr.getOverride('byse.sx'), null);
mgr._failedAccounts.set('byse.sx:acc1', true);
mgr._failedAccounts.set('voe.sx:other', true);
assert.deepEqual(mgr.getFailedAccountKeys().sort(), ['byse.sx:acc1', 'voe.sx:other']);
assert.equal(mgr.getOverride('byse.sx'), null, 'no override yet');
mgr.switchAccount('byse.sx', { id: 'acc2', apiKey: 'k2' });
assert.equal(mgr.getOverride('byse.sx').id, 'acc2');
assert.equal(mgr.getOverride('voe.sx'), null, 'unrelated hoster still has no override');
});
it('late-resolved override is honored by subsequent jobs (simulates mid-batch config add)', async () => {
// Only acc1 throws; acc2 succeeds.
mockUploadFile.mock.mockImplementation(async (hoster, filePath, apiKey, onProgress) => {
if (apiKey === 'acc1-key') {
const err = new Error('Byse lehnte Datei ab: not enough disk space');
err.accountError = true;
throw err;
}
if (onProgress) onProgress(fakeFileSize, fakeFileSize);
return { download_url: 'ok', embed_url: null, file_code: 'ok' };
});
const mgr = new UploadManager(
{ 'byse.sx': { retries: 1, parallelCount: 1, maxSpeedKbs: 0, restartBelowKbs: 0, timeIntervalSec: 0, maxSizeMb: 0 } }
);
// Scenario: initially config has ONLY acc1. No account-failed listener
// resolves a fallback (because none exists in config yet). Job A fails
// with rotation-end.
const rotEvents = [];
mgr.on('rot-log', (e) => rotEvents.push(e));
await mgr.startBatch([
{ file: '/test/a.mp4', hoster: 'byse.sx', apiKey: 'acc1-key', accountId: 'acc1', username: 'u1', password: 'p1' }
]);
// Job A should have ended with rotation-end (no fallback available).
const eventsA = rotEvents.map(e => e.event);
assert.ok(eventsA.includes('mark-failed'), 'acc1 must be marked failed');
assert.ok(eventsA.includes('rotation-end'), 'expected rotation-end without a fallback');
assert.equal(mgr.getFailedAccountKeys().length, 1);
assert.equal(mgr.getOverride('byse.sx'), null, 'no override set during first batch');
// --- Simulate: user adds acc2 in Settings → save-config handler finds
// that byse.sx:acc1 is failed without an override → resolves + switches.
mgr.switchAccount('byse.sx', { id: 'acc2', username: 'u2', password: 'p2', apiKey: 'acc2-key' });
// Now a follow-up batch (same running session — in production, addJobs
// or a new startBatch without clearing maps would reach this state).
// We need to ALSO clear _failedAccounts manually here because startBatch
// resets it — so we poke the inner state to emulate "still mid-batch
// with late config". The switchAccount-after-fail path is what matters.
rotEvents.length = 0;
// Re-run just the _runJob path by manually setting up state and using
// addJobs — simulates mid-batch job add after config change.
mgr.running = true;
mgr._batchResults = new Map();
mgr._batchResults.set('/test/b.mp4', { name: 'b.mp4', size: fakeFileSize, results: [] });
mgr._failedAccounts.set('byse.sx:acc1', true); // re-establish failed state
mgr._additionalPromises = [];
// Spawn a new job through addJobs() path (uses _runJob internally)
const addResult = await mgr.addJobs([
{ file: '/test/b.mp4', hoster: 'byse.sx', apiKey: 'acc1-key', accountId: 'acc1', username: 'u1', password: 'p1', jobId: 'jb' }
]);
assert.ok(addResult.added >= 1 || addResult.alreadyInBatch === 0,
`addJobs should accept new job: ${JSON.stringify(addResult)}`);
await Promise.allSettled(mgr._additionalPromises);
const eventsB = rotEvents.map(e => e.event);
assert.ok(eventsB.includes('pre-job-swap'),
`job B should have pre-job-swap after late override was set; got: ${eventsB.join(',')}`);
// Mock must have been called with acc2-key for the new job (not acc1-key again)
const acc1ForB = mockUploadFile.mock.calls.filter(c =>
c.arguments[1] === '/test/b.mp4' && c.arguments[2] === 'acc1-key').length;
assert.equal(acc1ForB, 0, 'job B must never touch acc1-key after late fallback was set');
});
});
}); });