From 020f3dacf1368940b13e026afae113a4daf63db6 Mon Sep 17 00:00:00 2001 From: xRangerDE Date: Sun, 3 May 2026 15:54:40 +0200 Subject: [PATCH] harden: GQL retry on transient errors + consolidate shutdown cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two server-side changes touching different paths. 1. fetchPublicTwitchGql now retries on transient HTTP (408/429/5xx) and network-layer failures (no response). Up to 3 attempts with exponential backoff + jitter (400ms * 2^(n-1)). The previous catch (e) { return null; } swallowed network blips on the public fallback path, which is what every user without a client_id hits on each VOD list load — a single TCP RST produced an empty list and the user had to click refresh. GraphQL errors[] are still returned without retry (application-level query rejections). Recovery is logged via appendDebugLog so we can later see whether the retries actually pay off in production. 2. shutdownCleanup() consolidates window-all-closed and before-quit. The two handlers ran nearly identical cleanup blocks but had drifted: only window-all-closed killed children and was platform-aware. The helper kills activeDownloads + activeClipProcesses + currentEditorProcess with try/catch, persists config + queue, then stops timers (debug-log flush moved AFTER persistence so any save error reaches the log before the timer is gone). An idempotent shutdownCleanupDone flag makes a follow-on event a no-op. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/main.ts | 139 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 104 insertions(+), 35 deletions(-) diff --git a/src/main.ts b/src/main.ts index 501763b..a0787de 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1598,30 +1598,78 @@ function formatTwitchDurationFromSeconds(totalSeconds: number): string { return `${s}s`; } -async function fetchPublicTwitchGql(query: string, variables: Record): Promise { - try { - const response = await axios.post<{ data?: T; errors?: Array<{ message: string }> }>( - 'https://gql.twitch.tv/gql', - { query, variables }, - { - headers: { - 'Client-ID': TWITCH_WEB_CLIENT_ID, - 'Content-Type': 'application/json' - }, - timeout: API_TIMEOUT - } - ); - - if (response.data.errors?.length) { - console.error('Public Twitch GQL errors:', response.data.errors.map((err) => err.message).join('; ')); - return null; - } - - return response.data.data || null; - } catch (e) { - console.error('Public Twitch GQL request failed:', e); - return null; +// Transient HTTP errors that warrant a retry (5xx, 408 timeout, 429 rate limit). +// 4xx (other than 408/429) are application errors and not retried. +function isTransientAxiosError(err: unknown): boolean { + if (!axios.isAxiosError(err)) { + // Non-axios errors thrown from axios.post are typically network-layer + // failures (DNS, ECONNRESET, socket hangup) — retry those too. + return true; } + const status = err.response?.status; + if (status === undefined) { + // No response means the request never reached / never returned — + // treat as transient (network blip, timeout). + return true; + } + return status === 408 || status === 429 || (status >= 500 && status < 600); +} + +const TWITCH_GQL_RETRY_ATTEMPTS = 3; +const TWITCH_GQL_RETRY_BASE_DELAY_MS = 400; + +async function fetchPublicTwitchGql(query: string, variables: Record): Promise { + let lastError: unknown = null; + + for (let attempt = 1; attempt <= TWITCH_GQL_RETRY_ATTEMPTS; attempt++) { + try { + const response = await axios.post<{ data?: T; errors?: Array<{ message: string }> }>( + 'https://gql.twitch.tv/gql', + { query, variables }, + { + headers: { + 'Client-ID': TWITCH_WEB_CLIENT_ID, + 'Content-Type': 'application/json' + }, + timeout: API_TIMEOUT + } + ); + + // GraphQL errors (in `errors[]`) are application-level and not + // retried — the query itself is rejected. + if (response.data.errors?.length) { + const messages = response.data.errors.map((err) => err.message).join('; '); + appendDebugLog('public-gql-errors', { messages, attempt }); + console.error('Public Twitch GQL errors:', messages); + return null; + } + + if (attempt > 1) { + appendDebugLog('public-gql-recovered', { attempt }); + } + return response.data.data || null; + } catch (e) { + lastError = e; + const transient = isTransientAxiosError(e); + const willRetry = transient && attempt < TWITCH_GQL_RETRY_ATTEMPTS; + appendDebugLog('public-gql-failed', { + attempt, + maxAttempts: TWITCH_GQL_RETRY_ATTEMPTS, + transient, + willRetry, + error: String(e) + }); + if (!willRetry) { + break; + } + // Exponential backoff with jitter + const delay = TWITCH_GQL_RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1) + Math.floor(Math.random() * 250); + await sleep(delay); + } + } + + console.error('Public Twitch GQL request failed:', lastError); + return null; } async function getPublicUserId(username: string): Promise { @@ -4050,38 +4098,59 @@ app.whenReady().then(() => { }); }); -app.on('window-all-closed', () => { +// Both window-all-closed and before-quit ran nearly identical cleanup blocks +// before, with slight drift (only window-all-closed killed children, only +// window-all-closed did anything platform-specific). Consolidating them into +// a single idempotent helper means any future tweak (e.g. flushing a new +// debug stream) lands once and applies on every quit path. +let shutdownCleanupDone = false; + +function shutdownCleanup(reason: 'window-all-closed' | 'before-quit'): void { + if (shutdownCleanupDone) return; + shutdownCleanupDone = true; + + appendDebugLog('shutdown-cleanup', { reason }); + stopMetadataCacheCleanup(); cleanupMetadataCaches('shutdown'); - stopDebugLogFlushTimer(true); stopAutoUpdatePolling(); // Kill all active children: queue downloads, standalone clip downloads, - // and any in-flight cutter/merger/splitter ffmpeg. + // and any in-flight cutter/merger/splitter ffmpeg. before-quit used to + // skip this entirely; window-all-closed did it but only via direct + // kill() (no try/catch around the queue process kill). for (const [, tracking] of activeDownloads) { if (tracking.process) { - tracking.process.kill(); + try { tracking.process.kill(); } catch { /* already exited */ } } } + activeDownloads.clear(); + for (const [, proc] of activeClipProcesses) { - try { proc.kill(); } catch { } + try { proc.kill(); } catch { /* already exited */ } } + activeClipProcesses.clear(); + if (currentEditorProcess) { - currentEditorProcess.kill(); + try { currentEditorProcess.kill(); } catch { /* already exited */ } + currentEditorProcess = null; } + saveConfig(config); flushQueueSave(); + // Flush debug log AFTER persisting state so any errors saving config / + // queue land in the log before the timer is gone. + stopDebugLogFlushTimer(true); +} + +app.on('window-all-closed', () => { + shutdownCleanup('window-all-closed'); if (process.platform !== 'darwin') { app.quit(); } }); app.on('before-quit', () => { - stopMetadataCacheCleanup(); - cleanupMetadataCaches('shutdown'); - stopDebugLogFlushTimer(true); - stopAutoUpdatePolling(); - saveConfig(config); - flushQueueSave(); + shutdownCleanup('before-quit'); });