From cdec0029feabdd787c5cba3ddb2faf7faf6f3365 Mon Sep 17 00:00:00 2001 From: Sucukdeluxe Date: Sun, 19 Apr 2026 22:11:35 +0200 Subject: [PATCH] Debrid-Link stability: stop double-blocking, shorter transport cooldown, max-wins User reported Debrid-Link "often jumps into provider-cooldown" and feels unstable. Root cause was four cooperating bugs that turned isolated key-level failures into provider-wide multi-minute outages. Fix 1: Skip provider-wide circuit breaker for ALL Debrid-Link errors (download-manager.ts ~8689) Previously only the explicit `debrid_link_cooldown:` sentinel was bypassed; every other Debrid-Link error (terminal failures, timeouts, parse errors) still went through recordProviderFailure() + applyProviderBusyBackoff(), applying a provider-wide cooldown ON TOP of the per-key cooldown debrid.ts already managed. Now any error message containing "debrid-link" or where the failure key is "debridlink" skips the provider-level circuit breaker entirely. Per-key cooldowns alone are the right granularity. Fix 2: Transport errors get a short 15s cooldown, not 2 min (debrid.ts ~2684) A single network timeout / ECONNRESET was parking the key for 2 full minutes. With 9 keys all of which might experience the same transient issue at different moments, this could cascade into all keys cooling down for 2 min each. Now isolated transport hiccups get 15s while real API/server problems still get the full 2 min. Fix 3: HTTP 200 with success:false (no error code) is now temporary, not fatal (debrid.ts ~2691) Previously these went through to the fallthrough "fatal: true" which permanently failed the item. Now they get a 30s temporary cooldown and the item retries on the next key. Fix 4: setDebridLinkKeyCooldownState is max-wins under concurrent calls (debrid.ts ~135) When 8 parallel items all hit the same key with floodDetected, each computes its own cooldown duration and calls setDebrid LinkKeyCooldown State. Without max-wins, the LAST setter could shorten the cooldown (e.g. one item read a 1h Retry-After header, another defaulted to 2 min; the 2 min would then overwrite the 1h). Now the longer cooldown wins, with rate_limit/quota/invalid categories also winning over plain temporary regardless of duration. Tests: 201/201 (debrid + download-manager) green. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/main/debrid.ts | 46 ++++++++++++++++++++++++++++++++++-- src/main/download-manager.ts | 21 +++++++++++----- 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/main/debrid.ts b/src/main/debrid.ts index 3940613..812448c 100644 --- a/src/main/debrid.ts +++ b/src/main/debrid.ts @@ -142,7 +142,30 @@ function setDebridLinkKeyCooldownState( clearDebridLinkKeyCooldownState(keyId); return; } - debridLinkKeyCooldowns.set(keyId, Date.now() + Math.max(1000, Math.floor(cooldownMs))); + // Cooldown set: max-wins. When 8 parallel items hit floodDetected on the + // same key, each computes its own retry-after and calls setDebridLinkKey + // CooldownState. Without max-wins, the LAST setter could shorten the + // cooldown (e.g. one item got a 1h Retry-After header, another got the + // default 2 min — without max-wins the 2 min would overwrite the 1h). + // Quota and rate_limit categories take priority over generic temporary + // cooldowns regardless of duration to preserve the more-specific signal. + const newUntil = Date.now() + Math.max(1000, Math.floor(cooldownMs)); + const existingUntil = Number(debridLinkKeyCooldowns.get(keyId) || 0); + const existingDetail = debridLinkKeyCooldownDetails.get(keyId); + const newIsStrongCategory = category === "rate_limit" || category === "quota" || category === "invalid"; + const existingIsStrongCategory = existingDetail + ? (existingDetail.category === "rate_limit" || existingDetail.category === "quota" || existingDetail.category === "invalid") + : false; + // Keep existing if it's still active and either lasts longer or has a stronger category + if (existingUntil > Date.now()) { + if (existingUntil >= newUntil && (!newIsStrongCategory || existingIsStrongCategory)) { + return; + } + if (existingIsStrongCategory && !newIsStrongCategory) { + return; + } + } + debridLinkKeyCooldowns.set(keyId, newUntil); debridLinkKeyCooldownDetails.set(keyId, { message, category }); setDebridLinkKeyRuntimeStatus(keyId, mapDebridLinkCooldownCategoryToRuntimeState(category), message); } @@ -2682,13 +2705,32 @@ class DebridLinkClient { } if (isRetryableErrorText(errorText) || /debrid-link.*(json|html)/i.test(errorText)) { + // Distinguish a single transient transport error (timeout, network blip, + // ECONNRESET) from a real API/server problem. Single timeouts shouldn't + // park a key for 2 full minutes — that just delays parallel work for + // no reason. Use a short 15s cooldown for transport, full 2min only + // for things that look like server-side faults (5xx HTML pages, etc). + const isTransport = /timeout|network|fetch failed|aborted|econnreset|enotfound|etimedout|socket/i.test(errorText) + && !(error instanceof DebridLinkApiError); return { fatal: false, - cooldownMs: DEBRID_LINK_KEY_COOLDOWN_MS, + cooldownMs: isTransport ? 15_000 : DEBRID_LINK_KEY_COOLDOWN_MS, message: errorText || "temporärer Transportfehler" }; } + // HTTP 200 with success:false but no recognizable error code: don't kill + // the item permanently. Treat as a temporary blip — same key can be tried + // again after a short cooldown, or another key picked up. + if (errorText && /success.*false|kein.*json|empty.*response/i.test(errorText)) { + return { + fatal: false, + cooldownMs: 30_000, + message: errorText, + category: "temporary" + }; + } + return { fatal: true, cooldownMs: 0, diff --git a/src/main/download-manager.ts b/src/main/download-manager.ts index 16c3945..21d2d85 100644 --- a/src/main/download-manager.ts +++ b/src/main/download-manager.ts @@ -8687,12 +8687,21 @@ export class DownloadManager extends EventEmitter { active.unrestrictRetries += 1; item.retries += 1; const failureProvider = this.getProviderFailureKeyForItem(item); - this.recordProviderFailure(failureProvider); - if (isProviderBusyUnrestrictError(errorText) || isTemporaryUnrestrictError(errorText)) { - const busyCooldownMs = isTemporaryUnrestrictError(errorText) - ? Math.min(180000, 20000 + Number(active.unrestrictRetries || 0) * 10000) - : Math.min(60000, 12000 + Number(active.unrestrictRetries || 0) * 3000); - this.applyProviderBusyBackoff(failureProvider, busyCooldownMs); + // Debrid-Link manages its own per-key cooldowns in debrid.ts. The + // provider-wide circuit breaker would double-block all Debrid-Link + // keys when only one key (or a transient transport hiccup) failed. + // Skip recordProviderFailure / applyProviderBusyBackoff entirely + // for any Debrid-Link-flavoured error message, not just the + // debrid_link_cooldown sentinel that's caught above. + const isDebridLinkError = /debrid-link|debrid_link/i.test(errorText) || failureProvider === "debridlink"; + if (!isDebridLinkError) { + this.recordProviderFailure(failureProvider); + if (isProviderBusyUnrestrictError(errorText) || isTemporaryUnrestrictError(errorText)) { + const busyCooldownMs = isTemporaryUnrestrictError(errorText) + ? Math.min(180000, 20000 + Number(active.unrestrictRetries || 0) * 10000) + : Math.min(60000, 12000 + Number(active.unrestrictRetries || 0) * 3000); + this.applyProviderBusyBackoff(failureProvider, busyCooldownMs); + } } // Escalating backoff: 5s, 7.5s, 11s, 17s, 25s, 38s, ... up to 120s let unrestrictDelayMs = Math.min(120000, Math.floor(5000 * Math.pow(1.5, active.unrestrictRetries - 1)));