Overhaul download retry pipeline: circuit breaker, escalating backoff, shelve logic

- Provider circuit breaker: track consecutive failures per provider with
  escalating cooldowns (30s/60s/120s/300s), auto-invalidate Mega-Debrid
  session on cooldown
- Escalating backoff: retry delays now scale up to 120s (was 30s max),
  unrestrict backoff exponential instead of linear 15s cap
- Shelve logic: after 15 consecutive failures, item pauses 5 min with
  counter halving for gradual recovery
- Periodic soft-reset: every 10 min, reset stale retry counters (>10 min
  queued) and old provider failures (>15 min), acts like mini-restart
- Mega-Debrid queue timeout: 90s wait limit in runExclusive to prevent
  cascade blocking behind stuck calls
- Provider-cooldown-aware retry delays: items wait for provider cooldown
  instead of retrying against broken service
- Fix: reconnect/package_toggle now persist retry counters (previously
  lost on interruption, defeating shelve logic)
- Mega-Debrid generate: tighter timeouts, progressive reload backoff,
  hoster retry limit (5x max)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sucukdeluxe 2026-03-02 15:00:17 +01:00
parent a9c8ee2ff4
commit 550942aad7
4 changed files with 197 additions and 24 deletions

View File

@ -1,6 +1,6 @@
{
"name": "real-debrid-downloader",
"version": "1.4.87",
"version": "1.4.88",
"description": "Real-Debrid Downloader Desktop (Electron + React + TypeScript)",
"main": "build/main/main/main.js",
"author": "Sucukdeluxe",

View File

@ -58,7 +58,8 @@ export class AppController {
password: this.settings.megaPassword
}));
this.manager = new DownloadManager(this.settings, session, this.storagePaths, {
megaWebUnrestrict: (link: string, signal?: AbortSignal) => this.megaWebFallback.unrestrict(link, signal)
megaWebUnrestrict: (link: string, signal?: AbortSignal) => this.megaWebFallback.unrestrict(link, signal),
invalidateMegaSession: () => this.megaWebFallback.invalidateSession()
});
this.manager.on("state", (snapshot: UiSnapshot) => {
this.onStateHandler?.(snapshot);

View File

@ -134,6 +134,7 @@ function retryLimitToMaxRetries(retryLimit: number): number {
type DownloadManagerOptions = {
megaWebUnrestrict?: MegaWebUnrestrictor;
invalidateMegaSession?: () => void;
};
function cloneSession(session: SessionState): SessionState {
@ -680,8 +681,8 @@ function resolveArchiveItemsFromList(archiveName: string, items: DownloadItem[])
}
function retryDelayWithJitter(attempt: number, baseMs: number): number {
const exponential = baseMs * Math.pow(1.5, Math.min(attempt - 1, 8));
const capped = Math.min(exponential, 30000);
const exponential = baseMs * Math.pow(1.5, Math.min(attempt - 1, 14));
const capped = Math.min(exponential, 120000);
const jitter = capped * (0.5 + Math.random() * 0.5);
return Math.floor(jitter);
}
@ -695,6 +696,8 @@ export class DownloadManager extends EventEmitter {
private debridService: DebridService;
private invalidateMegaSessionFn?: () => void;
private activeTasks = new Map<string, ActiveTask>();
private scheduleRunning = false;
@ -762,6 +765,10 @@ export class DownloadManager extends EventEmitter {
unrestrictRetries: number;
}>();
private providerFailures = new Map<string, { count: number; lastFailAt: number; cooldownUntil: number }>();
private lastStaleResetAt = 0;
public constructor(settings: AppSettings, session: SessionState, storagePaths: StoragePaths, options: DownloadManagerOptions = {}) {
super();
this.settings = settings;
@ -769,6 +776,7 @@ export class DownloadManager extends EventEmitter {
this.itemCount = Object.keys(this.session.items).length;
this.storagePaths = storagePaths;
this.debridService = new DebridService(settings, { megaWebUnrestrict: options.megaWebUnrestrict });
this.invalidateMegaSessionFn = options.invalidateMegaSession;
this.applyOnStartCleanupPolicy();
this.normalizeSessionStatuses();
void this.recoverRetryableItems("startup").catch((err) => logger.warn(`recoverRetryableItems Fehler (startup): ${compactErrorText(err)}`));
@ -2943,6 +2951,79 @@ export class DownloadManager extends EventEmitter {
this.resetSessionTotalsIfQueueEmpty();
}
// ── Provider Circuit Breaker ──────────────────────────────────────────
private recordProviderFailure(provider: string): void {
const now = nowMs();
const entry = this.providerFailures.get(provider) || { count: 0, lastFailAt: 0, cooldownUntil: 0 };
entry.count += 1;
entry.lastFailAt = now;
// Escalating cooldown: 5 failures→30s, 10→60s, 15→120s, 20+→300s
if (entry.count >= 5) {
const tier = Math.min(Math.floor((entry.count - 5) / 5), 3);
const cooldownMs = [30000, 60000, 120000, 300000][tier];
entry.cooldownUntil = now + cooldownMs;
logger.warn(`Provider Circuit-Breaker: ${provider} ${entry.count} konsekutive Fehler, Cooldown ${cooldownMs / 1000}s`);
// Invalidate mega-debrid session on cooldown to force fresh login
if (provider === "megadebrid" && this.invalidateMegaSessionFn) {
try {
this.invalidateMegaSessionFn();
} catch { /* ignore */ }
}
}
this.providerFailures.set(provider, entry);
}
private recordProviderSuccess(provider: string): void {
if (this.providerFailures.has(provider)) {
this.providerFailures.delete(provider);
}
}
private getProviderCooldownRemaining(provider: string): number {
const entry = this.providerFailures.get(provider);
if (!entry || entry.cooldownUntil <= 0) {
return 0;
}
const remaining = entry.cooldownUntil - nowMs();
return remaining > 0 ? remaining : 0;
}
private resetStaleRetryState(): void {
const now = nowMs();
// Reset retry counters for items queued >10 min without progress
for (const [itemId, retryState] of this.retryStateByItem) {
const item = this.session.items[itemId];
if (!item || item.status !== "queued") {
continue;
}
if (this.activeTasks.has(itemId)) {
continue;
}
const retryAfter = this.retryAfterByItem.get(itemId) || 0;
if (retryAfter > now) {
continue;
}
const staleMs = now - item.updatedAt;
if (staleMs > 600000) {
retryState.stallRetries = 0;
retryState.unrestrictRetries = 0;
retryState.genericErrorRetries = 0;
retryState.freshRetryUsed = false;
logger.info(`Soft-Reset: Retry-Counter zurückgesetzt für ${item.fileName || itemId} (${Math.floor(staleMs / 60000)} min stale)`);
}
}
// Reset provider failures older than 15 min
for (const [provider, entry] of this.providerFailures) {
if (now - entry.lastFailAt > 900000) {
this.providerFailures.delete(provider);
logger.info(`Soft-Reset: Provider-Failures zurückgesetzt für ${provider}`);
}
}
}
// ── Scheduler ──────────────────────────────────────────────────────────
private async ensureScheduler(): Promise<void> {
if (this.scheduleRunning) {
return;
@ -2956,6 +3037,11 @@ export class DownloadManager extends EventEmitter {
this.lastSchedulerHeartbeatAt = now;
logger.info(`Scheduler Heartbeat: active=${this.activeTasks.size}, queued=${this.countQueuedItems()}, reconnect=${this.reconnectActive()}, paused=${this.session.paused}, postProcess=${this.packagePostProcessTasks.size}`);
}
// Periodic soft-reset every 10 min: clear stale retry counters & provider failures
if (now - this.lastStaleResetAt >= 600000) {
this.lastStaleResetAt = now;
this.resetStaleRetryState();
}
if (this.session.paused) {
await sleep(120);
@ -3330,6 +3416,23 @@ export class DownloadManager extends EventEmitter {
const maxStallRetries = maxItemRetries;
while (true) {
try {
// Check provider cooldown before attempting unrestrict
const lastProvider = item.provider || "";
const cooldownProviders = lastProvider ? [lastProvider] : ["realdebrid", "megadebrid", "bestdebrid", "alldebrid", "unknown"];
let maxCooldownMs = 0;
for (const prov of cooldownProviders) {
const cd = this.getProviderCooldownRemaining(prov);
if (cd > maxCooldownMs) {
maxCooldownMs = cd;
}
}
if (maxCooldownMs > 0) {
const delayMs = Math.min(maxCooldownMs + 1000, 310000);
this.queueRetry(item, active, delayMs, `Provider-Cooldown (${Math.ceil(delayMs / 1000)}s)`);
this.persistSoon();
this.emitState();
return;
}
const unrestrictTimeoutSignal = AbortSignal.timeout(getUnrestrictTimeoutMs());
const unrestrictedSignal = AbortSignal.any([active.abortController.signal, unrestrictTimeoutSignal]);
let unrestricted;
@ -3337,13 +3440,22 @@ export class DownloadManager extends EventEmitter {
unrestricted = await this.debridService.unrestrictLink(item.url, unrestrictedSignal);
} catch (unrestrictError) {
if (!active.abortController.signal.aborted && unrestrictTimeoutSignal.aborted) {
// Record failure for all providers since we don't know which one timed out
this.recordProviderFailure(lastProvider || "unknown");
throw new Error(`Unrestrict Timeout nach ${Math.ceil(getUnrestrictTimeoutMs() / 1000)}s`);
}
// Record failure for the provider that errored
const errText = compactErrorText(unrestrictError);
if (isUnrestrictFailure(errText)) {
this.recordProviderFailure(lastProvider || "unknown");
}
throw unrestrictError;
}
if (active.abortController.signal.aborted) {
throw new Error(`aborted:${active.abortReason}`);
}
// Unrestrict succeeded - reset provider failure counter
this.recordProviderSuccess(unrestricted.provider);
item.provider = unrestricted.provider;
item.retries += unrestricted.retriesUsed;
item.fileName = sanitizeFilename(unrestricted.fileName || filenameFromUrl(item.url));
@ -3528,17 +3640,47 @@ export class DownloadManager extends EventEmitter {
item.status = "queued";
item.speedBps = 0;
item.fullStatus = "Wartet auf Reconnect";
// Persist retry counters so shelve logic survives reconnect interruption
this.retryStateByItem.set(item.id, {
freshRetryUsed: Boolean(active.freshRetryUsed),
stallRetries: Number(active.stallRetries || 0),
genericErrorRetries: Number(active.genericErrorRetries || 0),
unrestrictRetries: Number(active.unrestrictRetries || 0)
});
} else if (reason === "package_toggle") {
item.status = "queued";
item.speedBps = 0;
item.fullStatus = "Paket gestoppt";
this.retryStateByItem.set(item.id, {
freshRetryUsed: Boolean(active.freshRetryUsed),
stallRetries: Number(active.stallRetries || 0),
genericErrorRetries: Number(active.genericErrorRetries || 0),
unrestrictRetries: Number(active.unrestrictRetries || 0)
});
} else if (reason === "stall") {
const stallErrorText = compactErrorText(error);
const isSlowThroughput = stallErrorText.includes("slow_throughput");
const wasValidating = item.status === "validating";
active.stallRetries += 1;
const stallDelayMs = retryDelayWithJitter(active.stallRetries, 500);
// Record provider failure if stall during validation
if (wasValidating && item.provider) {
this.recordProviderFailure(item.provider);
}
logger.warn(`Stall erkannt: item=${item.fileName || item.id}, phase=${wasValidating ? "validating" : "downloading"}, retry=${active.stallRetries}/${retryDisplayLimit}, bytes=${item.downloadedBytes}, error=${stallErrorText || "none"}, provider=${item.provider || "?"}`);
// Shelve check: too many consecutive failures → long pause
const totalFailures = (active.stallRetries || 0) + (active.unrestrictRetries || 0) + (active.genericErrorRetries || 0);
if (totalFailures >= 15) {
item.retries += 1;
active.stallRetries = Math.floor((active.stallRetries || 0) / 2);
active.unrestrictRetries = Math.floor((active.unrestrictRetries || 0) / 2);
active.genericErrorRetries = Math.floor((active.genericErrorRetries || 0) / 2);
logger.warn(`Item shelved: ${item.fileName || item.id}, totalFailures=${totalFailures}`);
this.queueRetry(item, active, 300000, `Viele Fehler (${totalFailures}x), Pause 5 min`);
item.lastError = stallErrorText;
this.persistSoon();
this.emitState();
return;
}
if (active.stallRetries <= maxStallRetries) {
item.retries += 1;
// Reset partial download so next attempt uses a fresh link
@ -3553,6 +3695,14 @@ export class DownloadManager extends EventEmitter {
item.totalBytes = null;
this.dropItemContribution(item.id);
}
let stallDelayMs = retryDelayWithJitter(active.stallRetries, 500);
// Respect provider cooldown
if (item.provider) {
const providerCooldown = this.getProviderCooldownRemaining(item.provider);
if (providerCooldown > stallDelayMs) {
stallDelayMs = providerCooldown + 1000;
}
}
const retryText = wasValidating
? `Link-Umwandlung hing, Retry ${active.stallRetries}/${retryDisplayLimit}`
: isSlowThroughput
@ -3615,11 +3765,32 @@ export class DownloadManager extends EventEmitter {
return;
}
// Shelve check for non-stall errors
const totalNonStallFailures = (active.stallRetries || 0) + (active.unrestrictRetries || 0) + (active.genericErrorRetries || 0);
if (totalNonStallFailures >= 15) {
item.retries += 1;
active.stallRetries = Math.floor((active.stallRetries || 0) / 2);
active.unrestrictRetries = Math.floor((active.unrestrictRetries || 0) / 2);
active.genericErrorRetries = Math.floor((active.genericErrorRetries || 0) / 2);
logger.warn(`Item shelved (error path): ${item.fileName || item.id}, totalFailures=${totalNonStallFailures}, error=${errorText}`);
this.queueRetry(item, active, 300000, `Viele Fehler (${totalNonStallFailures}x), Pause 5 min`);
item.lastError = errorText;
this.persistSoon();
this.emitState();
return;
}
if (isUnrestrictFailure(errorText) && active.unrestrictRetries < maxUnrestrictRetries) {
active.unrestrictRetries += 1;
item.retries += 1;
// Longer backoff for unrestrict: 5s, 10s, 15s (capped at 15s) to let API cache expire
const unrestrictDelayMs = Math.min(15000, 5000 * active.unrestrictRetries);
this.recordProviderFailure(item.provider || "unknown");
// Escalating backoff: 5s, 7.5s, 11s, 17s, 25s, 38s, ... up to 120s
let unrestrictDelayMs = Math.min(120000, Math.floor(5000 * Math.pow(1.5, active.unrestrictRetries - 1)));
// Respect provider cooldown
const providerCooldown = this.getProviderCooldownRemaining(item.provider || "unknown");
if (providerCooldown > unrestrictDelayMs) {
unrestrictDelayMs = providerCooldown + 1000;
}
logger.warn(`Unrestrict-Fehler: item=${item.fileName || item.id}, retry=${active.unrestrictRetries}/${retryDisplayLimit}, delay=${unrestrictDelayMs}ms, error=${errorText}, link=${item.url.slice(0, 80)}`);
// Reset partial download so next attempt starts fresh
if (item.downloadedBytes > 0) {

View File

@ -196,7 +196,7 @@ export class MegaWebFallback {
return null;
}
if (!this.cookie || Date.now() - this.cookieSetAt > 10 * 60 * 1000) {
if (!this.cookie || Date.now() - this.cookieSetAt > 20 * 60 * 1000) {
await this.login(creds.login, creds.password, signal);
}
@ -225,9 +225,20 @@ export class MegaWebFallback {
}, signal);
}
public invalidateSession(): void {
this.cookie = "";
this.cookieSetAt = 0;
}
private async runExclusive<T>(job: () => Promise<T>, signal?: AbortSignal): Promise<T> {
const queuedAt = Date.now();
const QUEUE_WAIT_TIMEOUT_MS = 90000;
const guardedJob = async (): Promise<T> => {
throwIfAborted(signal);
const waited = Date.now() - queuedAt;
if (waited > QUEUE_WAIT_TIMEOUT_MS) {
throw new Error(`Mega-Web Queue-Timeout (${Math.floor(waited / 1000)}s gewartet)`);
}
return job();
};
const run = this.queue.then(guardedJob, guardedJob);
@ -278,8 +289,6 @@ export class MegaWebFallback {
private async generate(link: string, signal?: AbortSignal): Promise<{ directUrl: string; fileName: string } | null> {
throwIfAborted(signal);
// Overall timeout for the entire generate operation (45s)
const generateSignal = withTimeoutSignal(signal, 45000);
const page = await fetch(DEBRID_URL, {
method: "POST",
headers: {
@ -293,7 +302,7 @@ export class MegaWebFallback {
password: "",
showLinks: "1"
}),
signal: withTimeoutSignal(generateSignal, 20000)
signal: withTimeoutSignal(signal, 30000)
});
const html = await page.text();
@ -302,10 +311,8 @@ export class MegaWebFallback {
return null;
}
let reloadCount = 0;
let hosterRetryCount = 0;
for (let attempt = 1; attempt <= 30; attempt += 1) {
throwIfAborted(generateSignal);
for (let attempt = 1; attempt <= 60; attempt += 1) {
throwIfAborted(signal);
const res = await fetch(DEBRID_AJAX_URL, {
method: "POST",
headers: {
@ -318,14 +325,12 @@ export class MegaWebFallback {
code,
autodl: "0"
}),
signal: withTimeoutSignal(generateSignal, 12000)
signal: withTimeoutSignal(signal, 15000)
});
const text = (await res.text()).trim();
if (text === "reload") {
reloadCount += 1;
// Back off progressively: 500ms, 700ms, 900ms...
await sleepWithSignal(Math.min(2000, 500 + reloadCount * 200), generateSignal);
await sleepWithSignal(650, signal);
continue;
}
if (text === "false") {
@ -339,11 +344,7 @@ export class MegaWebFallback {
if (!parsed.link) {
if (/hoster does not respond correctly|could not be done for this moment/i.test(parsed.text || "")) {
hosterRetryCount += 1;
if (hosterRetryCount > 5) {
return null;
}
await sleepWithSignal(Math.min(3000, 800 + hosterRetryCount * 400), generateSignal);
await sleepWithSignal(1200, signal);
continue;
}
return null;