harden: GQL retry on transient errors + consolidate shutdown cleanup
Two server-side changes touching different paths.
1. fetchPublicTwitchGql now retries on transient HTTP (408/429/5xx) and
network-layer failures (no response). Up to 3 attempts with
exponential backoff + jitter (400ms * 2^(n-1)). The previous
catch (e) { return null; } swallowed network blips on the public
fallback path, which is what every user without a client_id hits
on each VOD list load — a single TCP RST produced an empty list
and the user had to click refresh. GraphQL errors[] are still
returned without retry (application-level query rejections).
Recovery is logged via appendDebugLog so we can later see whether
the retries actually pay off in production.
2. shutdownCleanup() consolidates window-all-closed and before-quit.
The two handlers ran nearly identical cleanup blocks but had
drifted: only window-all-closed killed children and was
platform-aware. The helper kills activeDownloads + activeClipProcesses
+ currentEditorProcess with try/catch, persists config + queue,
then stops timers (debug-log flush moved AFTER persistence so any
save error reaches the log before the timer is gone). An idempotent
shutdownCleanupDone flag makes a follow-on event a no-op.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
81a1f914b4
commit
020f3dacf1
99
src/main.ts
99
src/main.ts
@ -1598,7 +1598,30 @@ function formatTwitchDurationFromSeconds(totalSeconds: number): string {
|
||||
return `${s}s`;
|
||||
}
|
||||
|
||||
// Transient HTTP errors that warrant a retry (5xx, 408 timeout, 429 rate limit).
|
||||
// 4xx (other than 408/429) are application errors and not retried.
|
||||
function isTransientAxiosError(err: unknown): boolean {
|
||||
if (!axios.isAxiosError(err)) {
|
||||
// Non-axios errors thrown from axios.post are typically network-layer
|
||||
// failures (DNS, ECONNRESET, socket hangup) — retry those too.
|
||||
return true;
|
||||
}
|
||||
const status = err.response?.status;
|
||||
if (status === undefined) {
|
||||
// No response means the request never reached / never returned —
|
||||
// treat as transient (network blip, timeout).
|
||||
return true;
|
||||
}
|
||||
return status === 408 || status === 429 || (status >= 500 && status < 600);
|
||||
}
|
||||
|
||||
const TWITCH_GQL_RETRY_ATTEMPTS = 3;
|
||||
const TWITCH_GQL_RETRY_BASE_DELAY_MS = 400;
|
||||
|
||||
async function fetchPublicTwitchGql<T>(query: string, variables: Record<string, unknown>): Promise<T | null> {
|
||||
let lastError: unknown = null;
|
||||
|
||||
for (let attempt = 1; attempt <= TWITCH_GQL_RETRY_ATTEMPTS; attempt++) {
|
||||
try {
|
||||
const response = await axios.post<{ data?: T; errors?: Array<{ message: string }> }>(
|
||||
'https://gql.twitch.tv/gql',
|
||||
@ -1612,16 +1635,41 @@ async function fetchPublicTwitchGql<T>(query: string, variables: Record<string,
|
||||
}
|
||||
);
|
||||
|
||||
// GraphQL errors (in `errors[]`) are application-level and not
|
||||
// retried — the query itself is rejected.
|
||||
if (response.data.errors?.length) {
|
||||
console.error('Public Twitch GQL errors:', response.data.errors.map((err) => err.message).join('; '));
|
||||
const messages = response.data.errors.map((err) => err.message).join('; ');
|
||||
appendDebugLog('public-gql-errors', { messages, attempt });
|
||||
console.error('Public Twitch GQL errors:', messages);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (attempt > 1) {
|
||||
appendDebugLog('public-gql-recovered', { attempt });
|
||||
}
|
||||
return response.data.data || null;
|
||||
} catch (e) {
|
||||
console.error('Public Twitch GQL request failed:', e);
|
||||
return null;
|
||||
lastError = e;
|
||||
const transient = isTransientAxiosError(e);
|
||||
const willRetry = transient && attempt < TWITCH_GQL_RETRY_ATTEMPTS;
|
||||
appendDebugLog('public-gql-failed', {
|
||||
attempt,
|
||||
maxAttempts: TWITCH_GQL_RETRY_ATTEMPTS,
|
||||
transient,
|
||||
willRetry,
|
||||
error: String(e)
|
||||
});
|
||||
if (!willRetry) {
|
||||
break;
|
||||
}
|
||||
// Exponential backoff with jitter
|
||||
const delay = TWITCH_GQL_RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1) + Math.floor(Math.random() * 250);
|
||||
await sleep(delay);
|
||||
}
|
||||
}
|
||||
|
||||
console.error('Public Twitch GQL request failed:', lastError);
|
||||
return null;
|
||||
}
|
||||
|
||||
async function getPublicUserId(username: string): Promise<string | null> {
|
||||
@ -4050,38 +4098,59 @@ app.whenReady().then(() => {
|
||||
});
|
||||
});
|
||||
|
||||
app.on('window-all-closed', () => {
|
||||
// Both window-all-closed and before-quit ran nearly identical cleanup blocks
|
||||
// before, with slight drift (only window-all-closed killed children, only
|
||||
// window-all-closed did anything platform-specific). Consolidating them into
|
||||
// a single idempotent helper means any future tweak (e.g. flushing a new
|
||||
// debug stream) lands once and applies on every quit path.
|
||||
let shutdownCleanupDone = false;
|
||||
|
||||
function shutdownCleanup(reason: 'window-all-closed' | 'before-quit'): void {
|
||||
if (shutdownCleanupDone) return;
|
||||
shutdownCleanupDone = true;
|
||||
|
||||
appendDebugLog('shutdown-cleanup', { reason });
|
||||
|
||||
stopMetadataCacheCleanup();
|
||||
cleanupMetadataCaches('shutdown');
|
||||
stopDebugLogFlushTimer(true);
|
||||
stopAutoUpdatePolling();
|
||||
|
||||
// Kill all active children: queue downloads, standalone clip downloads,
|
||||
// and any in-flight cutter/merger/splitter ffmpeg.
|
||||
// and any in-flight cutter/merger/splitter ffmpeg. before-quit used to
|
||||
// skip this entirely; window-all-closed did it but only via direct
|
||||
// kill() (no try/catch around the queue process kill).
|
||||
for (const [, tracking] of activeDownloads) {
|
||||
if (tracking.process) {
|
||||
tracking.process.kill();
|
||||
try { tracking.process.kill(); } catch { /* already exited */ }
|
||||
}
|
||||
}
|
||||
activeDownloads.clear();
|
||||
|
||||
for (const [, proc] of activeClipProcesses) {
|
||||
try { proc.kill(); } catch { }
|
||||
try { proc.kill(); } catch { /* already exited */ }
|
||||
}
|
||||
activeClipProcesses.clear();
|
||||
|
||||
if (currentEditorProcess) {
|
||||
currentEditorProcess.kill();
|
||||
try { currentEditorProcess.kill(); } catch { /* already exited */ }
|
||||
currentEditorProcess = null;
|
||||
}
|
||||
|
||||
saveConfig(config);
|
||||
flushQueueSave();
|
||||
|
||||
// Flush debug log AFTER persisting state so any errors saving config /
|
||||
// queue land in the log before the timer is gone.
|
||||
stopDebugLogFlushTimer(true);
|
||||
}
|
||||
|
||||
app.on('window-all-closed', () => {
|
||||
shutdownCleanup('window-all-closed');
|
||||
if (process.platform !== 'darwin') {
|
||||
app.quit();
|
||||
}
|
||||
});
|
||||
|
||||
app.on('before-quit', () => {
|
||||
stopMetadataCacheCleanup();
|
||||
cleanupMetadataCaches('shutdown');
|
||||
stopDebugLogFlushTimer(true);
|
||||
stopAutoUpdatePolling();
|
||||
saveConfig(config);
|
||||
flushQueueSave();
|
||||
shutdownCleanup('before-quit');
|
||||
});
|
||||
|
||||
Loading…
Reference in New Issue
Block a user