harden: GQL retry on transient errors + consolidate shutdown cleanup

Two server-side changes touching different paths.

1. fetchPublicTwitchGql now retries on transient HTTP (408/429/5xx) and
   network-layer failures (no response). Up to 3 attempts with
   exponential backoff + jitter (400ms * 2^(n-1)). The previous
   catch (e) { return null; } swallowed network blips on the public
   fallback path, which is what every user without a client_id hits
   on each VOD list load — a single TCP RST produced an empty list
   and the user had to click refresh. GraphQL errors[] are still
   returned without retry (application-level query rejections).
   Recovery is logged via appendDebugLog so we can later see whether
   the retries actually pay off in production.

2. shutdownCleanup() consolidates window-all-closed and before-quit.
   The two handlers ran nearly identical cleanup blocks but had
   drifted: only window-all-closed killed children and was
   platform-aware. The helper kills activeDownloads + activeClipProcesses
   + currentEditorProcess with try/catch, persists config + queue,
   then stops timers (debug-log flush moved AFTER persistence so any
   save error reaches the log before the timer is gone). An idempotent
   shutdownCleanupDone flag makes a follow-on event a no-op.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
xRangerDE 2026-05-03 15:54:40 +02:00
parent 81a1f914b4
commit 020f3dacf1

View File

@ -1598,30 +1598,78 @@ function formatTwitchDurationFromSeconds(totalSeconds: number): string {
return `${s}s`;
}
async function fetchPublicTwitchGql<T>(query: string, variables: Record<string, unknown>): Promise<T | null> {
try {
const response = await axios.post<{ data?: T; errors?: Array<{ message: string }> }>(
'https://gql.twitch.tv/gql',
{ query, variables },
{
headers: {
'Client-ID': TWITCH_WEB_CLIENT_ID,
'Content-Type': 'application/json'
},
timeout: API_TIMEOUT
}
);
if (response.data.errors?.length) {
console.error('Public Twitch GQL errors:', response.data.errors.map((err) => err.message).join('; '));
return null;
}
return response.data.data || null;
} catch (e) {
console.error('Public Twitch GQL request failed:', e);
return null;
// Transient HTTP errors that warrant a retry (5xx, 408 timeout, 429 rate limit).
// 4xx (other than 408/429) are application errors and not retried.
function isTransientAxiosError(err: unknown): boolean {
if (!axios.isAxiosError(err)) {
// Non-axios errors thrown from axios.post are typically network-layer
// failures (DNS, ECONNRESET, socket hangup) — retry those too.
return true;
}
const status = err.response?.status;
if (status === undefined) {
// No response means the request never reached / never returned —
// treat as transient (network blip, timeout).
return true;
}
return status === 408 || status === 429 || (status >= 500 && status < 600);
}
const TWITCH_GQL_RETRY_ATTEMPTS = 3;
const TWITCH_GQL_RETRY_BASE_DELAY_MS = 400;
async function fetchPublicTwitchGql<T>(query: string, variables: Record<string, unknown>): Promise<T | null> {
let lastError: unknown = null;
for (let attempt = 1; attempt <= TWITCH_GQL_RETRY_ATTEMPTS; attempt++) {
try {
const response = await axios.post<{ data?: T; errors?: Array<{ message: string }> }>(
'https://gql.twitch.tv/gql',
{ query, variables },
{
headers: {
'Client-ID': TWITCH_WEB_CLIENT_ID,
'Content-Type': 'application/json'
},
timeout: API_TIMEOUT
}
);
// GraphQL errors (in `errors[]`) are application-level and not
// retried — the query itself is rejected.
if (response.data.errors?.length) {
const messages = response.data.errors.map((err) => err.message).join('; ');
appendDebugLog('public-gql-errors', { messages, attempt });
console.error('Public Twitch GQL errors:', messages);
return null;
}
if (attempt > 1) {
appendDebugLog('public-gql-recovered', { attempt });
}
return response.data.data || null;
} catch (e) {
lastError = e;
const transient = isTransientAxiosError(e);
const willRetry = transient && attempt < TWITCH_GQL_RETRY_ATTEMPTS;
appendDebugLog('public-gql-failed', {
attempt,
maxAttempts: TWITCH_GQL_RETRY_ATTEMPTS,
transient,
willRetry,
error: String(e)
});
if (!willRetry) {
break;
}
// Exponential backoff with jitter
const delay = TWITCH_GQL_RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1) + Math.floor(Math.random() * 250);
await sleep(delay);
}
}
console.error('Public Twitch GQL request failed:', lastError);
return null;
}
async function getPublicUserId(username: string): Promise<string | null> {
@ -4050,38 +4098,59 @@ app.whenReady().then(() => {
});
});
app.on('window-all-closed', () => {
// Both window-all-closed and before-quit ran nearly identical cleanup blocks
// before, with slight drift (only window-all-closed killed children, only
// window-all-closed did anything platform-specific). Consolidating them into
// a single idempotent helper means any future tweak (e.g. flushing a new
// debug stream) lands once and applies on every quit path.
let shutdownCleanupDone = false;
function shutdownCleanup(reason: 'window-all-closed' | 'before-quit'): void {
if (shutdownCleanupDone) return;
shutdownCleanupDone = true;
appendDebugLog('shutdown-cleanup', { reason });
stopMetadataCacheCleanup();
cleanupMetadataCaches('shutdown');
stopDebugLogFlushTimer(true);
stopAutoUpdatePolling();
// Kill all active children: queue downloads, standalone clip downloads,
// and any in-flight cutter/merger/splitter ffmpeg.
// and any in-flight cutter/merger/splitter ffmpeg. before-quit used to
// skip this entirely; window-all-closed did it but only via direct
// kill() (no try/catch around the queue process kill).
for (const [, tracking] of activeDownloads) {
if (tracking.process) {
tracking.process.kill();
try { tracking.process.kill(); } catch { /* already exited */ }
}
}
activeDownloads.clear();
for (const [, proc] of activeClipProcesses) {
try { proc.kill(); } catch { }
try { proc.kill(); } catch { /* already exited */ }
}
activeClipProcesses.clear();
if (currentEditorProcess) {
currentEditorProcess.kill();
try { currentEditorProcess.kill(); } catch { /* already exited */ }
currentEditorProcess = null;
}
saveConfig(config);
flushQueueSave();
// Flush debug log AFTER persisting state so any errors saving config /
// queue land in the log before the timer is gone.
stopDebugLogFlushTimer(true);
}
app.on('window-all-closed', () => {
shutdownCleanup('window-all-closed');
if (process.platform !== 'darwin') {
app.quit();
}
});
app.on('before-quit', () => {
stopMetadataCacheCleanup();
cleanupMetadataCaches('shutdown');
stopDebugLogFlushTimer(true);
stopAutoUpdatePolling();
saveConfig(config);
flushQueueSave();
shutdownCleanup('before-quit');
});