real-debrid-downloader/src/main/extractor.ts

1359 lines
44 KiB
TypeScript

import fs from "node:fs";
import path from "node:path";
import os from "node:os";
import { spawn } from "node:child_process";
import AdmZip from "adm-zip";
import { CleanupMode, ConflictMode } from "../shared/types";
import { logger } from "./logger";
import { removeDownloadLinkArtifacts, removeSampleArtifacts } from "./cleanup";
const DEFAULT_ARCHIVE_PASSWORDS = ["", "serienfans.org", "serienjunkies.org"];
const NO_EXTRACTOR_MESSAGE = "WinRAR/UnRAR nicht gefunden. Bitte WinRAR installieren.";
let resolvedExtractorCommand: string | null = null;
let resolveFailureReason = "";
let resolveFailureAt = 0;
let externalExtractorSupportsPerfFlags = true;
let resolveExtractorCommandInFlight: Promise<string> | null = null;
const EXTRACTOR_RETRY_AFTER_MS = 30_000;
const DEFAULT_ZIP_ENTRY_MEMORY_LIMIT_MB = 256;
const EXTRACTOR_PROBE_TIMEOUT_MS = 8_000;
const DEFAULT_EXTRACT_CPU_BUDGET_PERCENT = 80;
export interface ExtractOptions {
packageDir: string;
targetDir: string;
cleanupMode: CleanupMode;
conflictMode: ConflictMode;
removeLinks: boolean;
removeSamples: boolean;
passwordList?: string;
signal?: AbortSignal;
onProgress?: (update: ExtractProgressUpdate) => void;
onlyArchives?: Set<string>;
skipPostCleanup?: boolean;
packageId?: string;
}
export interface ExtractProgressUpdate {
current: number;
total: number;
percent: number;
archiveName: string;
archivePercent?: number;
elapsedMs?: number;
phase: "extracting" | "done";
}
const MAX_EXTRACT_OUTPUT_BUFFER = 48 * 1024;
const EXTRACT_PROGRESS_FILE = ".rd_extract_progress.json";
const EXTRACT_BASE_TIMEOUT_MS = 6 * 60 * 1000;
const EXTRACT_PER_GIB_TIMEOUT_MS = 4 * 60 * 1000;
const EXTRACT_MAX_TIMEOUT_MS = 120 * 60 * 1000;
const ARCHIVE_SORT_COLLATOR = new Intl.Collator(undefined, { numeric: true, sensitivity: "base" });
function zipEntryMemoryLimitBytes(): number {
const fromEnvMb = Number(process.env.RD_ZIP_ENTRY_MEMORY_LIMIT_MB ?? NaN);
if (Number.isFinite(fromEnvMb) && fromEnvMb >= 8 && fromEnvMb <= 4096) {
return Math.floor(fromEnvMb * 1024 * 1024);
}
return DEFAULT_ZIP_ENTRY_MEMORY_LIMIT_MB * 1024 * 1024;
}
export function pathSetKey(filePath: string): string {
return process.platform === "win32" ? filePath.toLowerCase() : filePath;
}
function archiveNameKey(fileName: string): string {
return process.platform === "win32" ? String(fileName || "").toLowerCase() : String(fileName || "");
}
function archiveSortKey(filePath: string): string {
const fileName = path.basename(filePath).toLowerCase();
return fileName
.replace(/\.part0*1\.rar$/i, "")
.replace(/\.zip\.\d{3}$/i, "")
.replace(/\.7z\.\d{3}$/i, "")
.replace(/\.rar$/i, "")
.replace(/\.zip$/i, "")
.replace(/\.7z$/i, "")
.replace(/[._\-\s]+$/g, "");
}
function archiveTypeRank(filePath: string): number {
const fileName = path.basename(filePath).toLowerCase();
if (/\.part0*1\.rar$/i.test(fileName)) {
return 0;
}
if (/\.rar$/i.test(fileName)) {
return 1;
}
if (/\.zip(?:\.\d{3})?$/i.test(fileName)) {
return 2;
}
if (/\.7z(?:\.\d{3})?$/i.test(fileName)) {
return 3;
}
return 9;
}
type ExtractResumeState = {
completedArchives: string[];
};
export function findArchiveCandidates(packageDir: string): string[] {
if (!packageDir || !fs.existsSync(packageDir)) {
return [];
}
let files: string[] = [];
try {
files = fs.readdirSync(packageDir, { withFileTypes: true })
.filter((entry) => entry.isFile())
.map((entry) => path.join(packageDir, entry.name));
} catch {
return [];
}
const fileNamesLower = new Set(files.map((filePath) => path.basename(filePath).toLowerCase()));
const multipartRar = files.filter((filePath) => /\.part0*1\.rar$/i.test(filePath));
const singleRar = files.filter((filePath) => /\.rar$/i.test(filePath) && !/\.part\d+\.rar$/i.test(filePath));
const zipSplit = files.filter((filePath) => /\.zip\.001$/i.test(filePath));
const zip = files.filter((filePath) => {
const fileName = path.basename(filePath);
if (!/\.zip$/i.test(fileName)) {
return false;
}
return !fileNamesLower.has(`${fileName}.001`.toLowerCase());
});
const sevenSplit = files.filter((filePath) => /\.7z\.001$/i.test(filePath));
const seven = files.filter((filePath) => {
const fileName = path.basename(filePath);
if (!/\.7z$/i.test(fileName)) {
return false;
}
return !fileNamesLower.has(`${fileName}.001`.toLowerCase());
});
const unique: string[] = [];
const seen = new Set<string>();
for (const candidate of [...multipartRar, ...singleRar, ...zipSplit, ...zip, ...sevenSplit, ...seven]) {
const key = pathSetKey(candidate);
if (seen.has(key)) {
continue;
}
seen.add(key);
unique.push(candidate);
}
unique.sort((left, right) => {
const keyCmp = ARCHIVE_SORT_COLLATOR.compare(archiveSortKey(left), archiveSortKey(right));
if (keyCmp !== 0) {
return keyCmp;
}
const rankCmp = archiveTypeRank(left) - archiveTypeRank(right);
if (rankCmp !== 0) {
return rankCmp;
}
return ARCHIVE_SORT_COLLATOR.compare(path.basename(left), path.basename(right));
});
return unique;
}
function effectiveConflictMode(conflictMode: ConflictMode): "overwrite" | "skip" | "rename" {
if (conflictMode === "rename") {
return "rename";
}
if (conflictMode === "overwrite") {
return "overwrite";
}
return "skip";
}
function cleanErrorText(text: string): string {
return String(text || "").replace(/\s+/g, " ").trim().slice(0, 240);
}
function appendLimited(base: string, chunk: string, maxLen = MAX_EXTRACT_OUTPUT_BUFFER): string {
const next = `${base}${chunk}`;
if (next.length <= maxLen) {
return next;
}
return next.slice(next.length - maxLen);
}
function parseProgressPercent(chunk: string): number | null {
const text = String(chunk || "");
const matches = text.match(/(?:^|\D)(\d{1,3})%/g);
if (!matches) {
return null;
}
let latest: number | null = null;
for (const raw of matches) {
const digits = raw.match(/(\d{1,3})%/);
if (!digits) {
continue;
}
const value = Number(digits[1]);
if (Number.isFinite(value) && value >= 0 && value <= 100) {
latest = value;
}
}
return latest;
}
function shouldPreferExternalZip(archivePath: string): boolean {
try {
const stat = fs.statSync(archivePath);
return stat.size >= 64 * 1024 * 1024;
} catch {
return true;
}
}
function computeExtractTimeoutMs(archivePath: string): number {
try {
const relatedFiles = collectArchiveCleanupTargets(archivePath);
let totalBytes = 0;
for (const filePath of relatedFiles) {
try {
totalBytes += fs.statSync(filePath).size;
} catch {
// ignore missing parts
}
}
if (totalBytes <= 0) {
totalBytes = fs.statSync(archivePath).size;
}
const gib = totalBytes / (1024 * 1024 * 1024);
const dynamicMs = EXTRACT_BASE_TIMEOUT_MS + Math.floor(gib * EXTRACT_PER_GIB_TIMEOUT_MS);
return Math.max(EXTRACT_BASE_TIMEOUT_MS, Math.min(EXTRACT_MAX_TIMEOUT_MS, dynamicMs));
} catch {
return EXTRACT_BASE_TIMEOUT_MS;
}
}
function extractProgressFilePath(packageDir: string, packageId?: string): string {
if (packageId) {
return path.join(packageDir, `.rd_extract_progress_${packageId}.json`);
}
return path.join(packageDir, EXTRACT_PROGRESS_FILE);
}
function readExtractResumeState(packageDir: string, packageId?: string): Set<string> {
const progressPath = extractProgressFilePath(packageDir, packageId);
if (!fs.existsSync(progressPath)) {
return new Set<string>();
}
try {
const payload = JSON.parse(fs.readFileSync(progressPath, "utf8")) as Partial<ExtractResumeState>;
const names = Array.isArray(payload.completedArchives) ? payload.completedArchives : [];
return new Set(names.map((value) => archiveNameKey(String(value || "").trim())).filter(Boolean));
} catch {
return new Set<string>();
}
}
function writeExtractResumeState(packageDir: string, completedArchives: Set<string>, packageId?: string): void {
try {
fs.mkdirSync(packageDir, { recursive: true });
const progressPath = extractProgressFilePath(packageDir, packageId);
const payload: ExtractResumeState = {
completedArchives: Array.from(completedArchives)
.map((name) => archiveNameKey(name))
.sort((a, b) => a.localeCompare(b))
};
fs.writeFileSync(progressPath, JSON.stringify(payload, null, 2), "utf8");
} catch (error) {
logger.warn(`ExtractResumeState schreiben fehlgeschlagen: ${String(error)}`);
}
}
function clearExtractResumeState(packageDir: string, packageId?: string): void {
try {
fs.rmSync(extractProgressFilePath(packageDir, packageId), { force: true });
} catch {
// ignore
}
}
function isExtractAbortError(errorText: string): boolean {
const text = String(errorText || "").toLowerCase();
return text.includes("aborted:extract") || text.includes("extract_aborted");
}
function archivePasswords(listInput: string): string[] {
const custom = String(listInput || "")
.split(/\r?\n/g)
.map((part) => part.trim())
.filter(Boolean);
const fromEnv = String(process.env.RD_ARCHIVE_PASSWORDS || "")
.split(/[;,\n]/g)
.map((part) => part.trim())
.filter(Boolean);
return Array.from(new Set(["", ...custom, ...fromEnv, ...DEFAULT_ARCHIVE_PASSWORDS]));
}
function prioritizePassword(passwords: string[], successful: string): string[] {
const target = String(successful || "");
if (!target || passwords.length <= 1) {
return passwords;
}
const index = passwords.findIndex((candidate) => candidate === target);
if (index <= 0) {
return passwords;
}
const next = [...passwords];
const [value] = next.splice(index, 1);
next.unshift(value);
return next;
}
function winRarCandidates(): string[] {
const programFiles = process.env.ProgramFiles || "C:\\Program Files";
const programFilesX86 = process.env["ProgramFiles(x86)"] || "C:\\Program Files (x86)";
const localAppData = process.env.LOCALAPPDATA || "";
const installed = [
path.join(programFiles, "WinRAR", "UnRAR.exe"),
path.join(programFilesX86, "WinRAR", "UnRAR.exe"),
path.join(programFilesX86, "WinRAR", "UnRAR.exe")
];
if (localAppData) {
installed.push(path.join(localAppData, "Programs", "WinRAR", "UnRAR.exe"));
}
const ordered = resolvedExtractorCommand
? [resolvedExtractorCommand, ...installed, "UnRAR.exe", "unrar"]
: [...installed, "UnRAR.exe", "unrar"];
return Array.from(new Set(ordered.filter(Boolean)));
}
function isAbsoluteCommand(command: string): boolean {
return path.isAbsolute(command)
|| command.includes("\\")
|| command.includes("/");
}
function isNoExtractorError(errorText: string): boolean {
return String(errorText || "").toLowerCase().includes("nicht gefunden");
}
function isUnsupportedArchiveFormatError(errorText: string): boolean {
const text = String(errorText || "").toLowerCase();
return text.includes("kein rar-archiv")
|| text.includes("not a rar archive")
|| text.includes("is not a rar archive");
}
function isUnsupportedExtractorSwitchError(errorText: string): boolean {
const text = String(errorText || "").toLowerCase();
return text.includes("unknown switch")
|| text.includes("unknown option")
|| text.includes("invalid switch")
|| text.includes("unsupported option")
|| text.includes("unbekannter schalter")
|| text.includes("falscher parameter");
}
function shouldUseExtractorPerformanceFlags(): boolean {
const raw = String(process.env.RD_EXTRACT_PERF_FLAGS || "").trim().toLowerCase();
return raw !== "0" && raw !== "false" && raw !== "off" && raw !== "no";
}
function extractCpuBudgetPercent(): number {
const envValue = Number(process.env.RD_EXTRACT_CPU_BUDGET_PERCENT ?? NaN);
if (Number.isFinite(envValue) && envValue >= 40 && envValue <= 95) {
return Math.floor(envValue);
}
return DEFAULT_EXTRACT_CPU_BUDGET_PERCENT;
}
function extractorThreadSwitch(): string {
const envValue = Number(process.env.RD_EXTRACT_THREADS ?? NaN);
if (Number.isFinite(envValue) && envValue >= 1 && envValue <= 32) {
return `-mt${Math.floor(envValue)}`;
}
const cpuCount = Math.max(1, os.cpus().length || 1);
const budgetPercent = extractCpuBudgetPercent();
const budgetedThreads = Math.floor((cpuCount * budgetPercent) / 100);
const threadCount = Math.max(1, Math.min(16, Math.max(1, budgetedThreads)));
return `-mt${threadCount}`;
}
function lowerExtractProcessPriority(childPid: number | undefined): void {
if (process.platform !== "win32") {
return;
}
const pid = Number(childPid || 0);
if (!Number.isFinite(pid) || pid <= 0) {
return;
}
try {
os.setPriority(pid, os.constants.priority.PRIORITY_BELOW_NORMAL);
} catch {
// ignore: priority lowering is best-effort
}
}
type ExtractSpawnResult = {
ok: boolean;
missingCommand: boolean;
aborted: boolean;
timedOut: boolean;
errorText: string;
};
function killProcessTree(child: { pid?: number; kill: () => void }): void {
const pid = Number(child.pid || 0);
if (!Number.isFinite(pid) || pid <= 0) {
try {
child.kill();
} catch {
// ignore
}
return;
}
if (process.platform === "win32") {
try {
const killer = spawn("taskkill", ["/PID", String(pid), "/T", "/F"], {
windowsHide: true,
stdio: "ignore"
});
killer.on("error", () => {
try {
child.kill();
} catch {
// ignore
}
});
} catch {
try {
child.kill();
} catch {
// ignore
}
}
return;
}
try {
child.kill();
} catch {
// ignore
}
}
function runExtractCommand(
command: string,
args: string[],
onChunk?: (chunk: string) => void,
signal?: AbortSignal,
timeoutMs?: number
): Promise<ExtractSpawnResult> {
if (signal?.aborted) {
return Promise.resolve({ ok: false, missingCommand: false, aborted: true, timedOut: false, errorText: "aborted:extract" });
}
return new Promise((resolve) => {
let settled = false;
let output = "";
const child = spawn(command, args, { windowsHide: true });
lowerExtractProcessPriority(child.pid);
let timeoutId: NodeJS.Timeout | null = null;
let timedOutByWatchdog = false;
let abortedBySignal = false;
const finish = (result: ExtractSpawnResult): void => {
if (settled) {
return;
}
settled = true;
if (timeoutId) {
clearTimeout(timeoutId);
timeoutId = null;
}
if (signal && onAbort) {
signal.removeEventListener("abort", onAbort);
}
resolve(result);
};
if (timeoutMs && timeoutMs > 0) {
timeoutId = setTimeout(() => {
timedOutByWatchdog = true;
killProcessTree(child);
finish({
ok: false,
missingCommand: false,
aborted: false,
timedOut: true,
errorText: `Entpacken Timeout nach ${Math.ceil(timeoutMs / 1000)}s`
});
}, timeoutMs);
}
const onAbort = signal
? (): void => {
abortedBySignal = true;
killProcessTree(child);
finish({ ok: false, missingCommand: false, aborted: true, timedOut: false, errorText: "aborted:extract" });
}
: null;
if (signal && onAbort) {
signal.addEventListener("abort", onAbort, { once: true });
}
child.stdout.on("data", (chunk) => {
const text = String(chunk || "");
output = appendLimited(output, text);
onChunk?.(text);
});
child.stderr.on("data", (chunk) => {
const text = String(chunk || "");
output = appendLimited(output, text);
onChunk?.(text);
});
child.on("error", (error) => {
const text = cleanErrorText(String(error));
finish({
ok: false,
missingCommand: text.toLowerCase().includes("enoent"),
aborted: false,
timedOut: false,
errorText: text
});
});
child.on("close", (code) => {
if (abortedBySignal) {
finish({ ok: false, missingCommand: false, aborted: true, timedOut: false, errorText: "aborted:extract" });
return;
}
if (timedOutByWatchdog) {
finish({
ok: false,
missingCommand: false,
aborted: false,
timedOut: true,
errorText: `Entpacken Timeout nach ${Math.ceil((timeoutMs || 0) / 1000)}s`
});
return;
}
if (code === 0) {
finish({ ok: true, missingCommand: false, aborted: false, timedOut: false, errorText: "" });
return;
}
if (code === 1) {
const lowered = output.toLowerCase();
const warningOnly = !lowered.includes("crc failed")
&& !lowered.includes("checksum error")
&& !lowered.includes("wrong password")
&& !lowered.includes("cannot open")
&& !lowered.includes("fatal error")
&& !lowered.includes("unexpected end of archive")
&& !lowered.includes("error:");
if (warningOnly) {
finish({ ok: true, missingCommand: false, aborted: false, timedOut: false, errorText: "" });
return;
}
}
const cleaned = cleanErrorText(output);
finish({
ok: false,
missingCommand: false,
aborted: false,
timedOut: false,
errorText: cleaned || `Exit Code ${String(code ?? "?")}`
});
});
});
}
export function buildExternalExtractArgs(
command: string,
archivePath: string,
targetDir: string,
conflictMode: ConflictMode,
password = "",
usePerformanceFlags = true
): string[] {
const mode = effectiveConflictMode(conflictMode);
const lower = command.toLowerCase();
if (lower.includes("unrar") || lower.includes("winrar")) {
const overwrite = mode === "overwrite" ? "-o+" : mode === "rename" ? "-or" : "-o-";
// NOTE: The password is passed as a CLI argument (-p<password>), which means it may be
// visible via process listing tools (e.g. `ps aux` on Unix). This is unavoidable because
// WinRAR/UnRAR CLI does not support password input via stdin or environment variables.
// On Windows (the target platform) this is less of a concern than on shared Unix systems.
const pass = password ? `-p${password}` : "-p-";
const perfArgs = usePerformanceFlags && shouldUseExtractorPerformanceFlags()
? ["-idc", extractorThreadSwitch()]
: [];
return ["x", overwrite, pass, "-y", ...perfArgs, archivePath, `${targetDir}${path.sep}`];
}
const overwrite = mode === "overwrite" ? "-aoa" : mode === "rename" ? "-aou" : "-aos";
// NOTE: Same password-in-args limitation as above applies to 7z as well.
const pass = password ? `-p${password}` : "-p";
return ["x", "-y", overwrite, pass, archivePath, `-o${targetDir}`];
}
async function resolveExtractorCommandInternal(): Promise<string> {
if (resolvedExtractorCommand) {
return resolvedExtractorCommand;
}
if (resolveFailureReason) {
const age = Date.now() - resolveFailureAt;
if (age < EXTRACTOR_RETRY_AFTER_MS) {
throw new Error(resolveFailureReason);
}
resolveFailureReason = "";
resolveFailureAt = 0;
}
const candidates = winRarCandidates();
for (const command of candidates) {
if (isAbsoluteCommand(command) && !fs.existsSync(command)) {
continue;
}
const probeArgs = command.toLowerCase().includes("winrar") ? ["-?"] : ["?"];
const probe = await runExtractCommand(command, probeArgs, undefined, undefined, EXTRACTOR_PROBE_TIMEOUT_MS);
if (probe.ok) {
resolvedExtractorCommand = command;
resolveFailureReason = "";
resolveFailureAt = 0;
logger.info(`Entpacker erkannt: ${command}`);
return command;
}
}
resolveFailureReason = NO_EXTRACTOR_MESSAGE;
resolveFailureAt = Date.now();
throw new Error(resolveFailureReason);
}
async function resolveExtractorCommand(): Promise<string> {
if (resolvedExtractorCommand) {
return resolvedExtractorCommand;
}
if (resolveExtractorCommandInFlight) {
return resolveExtractorCommandInFlight;
}
const pending = resolveExtractorCommandInternal();
resolveExtractorCommandInFlight = pending;
try {
return await pending;
} finally {
if (resolveExtractorCommandInFlight === pending) {
resolveExtractorCommandInFlight = null;
}
}
}
async function runExternalExtract(
archivePath: string,
targetDir: string,
conflictMode: ConflictMode,
passwordCandidates: string[],
onArchiveProgress?: (percent: number) => void,
signal?: AbortSignal
): Promise<string> {
const command = await resolveExtractorCommand();
const passwords = passwordCandidates;
let lastError = "";
const timeoutMs = computeExtractTimeoutMs(archivePath);
fs.mkdirSync(targetDir, { recursive: true });
let announcedStart = false;
let bestPercent = 0;
let usePerformanceFlags = externalExtractorSupportsPerfFlags && shouldUseExtractorPerformanceFlags();
for (const password of passwords) {
if (signal?.aborted) {
throw new Error("aborted:extract");
}
if (!announcedStart) {
announcedStart = true;
onArchiveProgress?.(0);
}
let args = buildExternalExtractArgs(command, archivePath, targetDir, conflictMode, password, usePerformanceFlags);
let result = await runExtractCommand(command, args, (chunk) => {
const parsed = parseProgressPercent(chunk);
if (parsed === null || parsed <= bestPercent) {
return;
}
bestPercent = parsed;
onArchiveProgress?.(bestPercent);
}, signal, timeoutMs);
if (!result.ok && usePerformanceFlags && isUnsupportedExtractorSwitchError(result.errorText)) {
usePerformanceFlags = false;
externalExtractorSupportsPerfFlags = false;
logger.warn(`Entpacker ohne Performance-Flags fortgesetzt: ${path.basename(archivePath)}`);
args = buildExternalExtractArgs(command, archivePath, targetDir, conflictMode, password, false);
result = await runExtractCommand(command, args, (chunk) => {
const parsed = parseProgressPercent(chunk);
if (parsed === null || parsed <= bestPercent) {
return;
}
bestPercent = parsed;
onArchiveProgress?.(bestPercent);
}, signal, timeoutMs);
}
if (result.ok) {
onArchiveProgress?.(100);
return password;
}
if (result.aborted) {
throw new Error("aborted:extract");
}
if (result.timedOut) {
lastError = result.errorText;
break;
}
if (result.missingCommand) {
resolvedExtractorCommand = null;
resolveFailureReason = NO_EXTRACTOR_MESSAGE;
resolveFailureAt = Date.now();
throw new Error(NO_EXTRACTOR_MESSAGE);
}
lastError = result.errorText;
}
throw new Error(lastError || "Entpacken fehlgeschlagen");
}
function isZipSafetyGuardError(error: unknown): boolean {
const text = String(error || "").toLowerCase();
return text.includes("path traversal")
|| text.includes("zip-eintrag verdächtig groß")
|| text.includes("zip-eintrag verdaechtig gross");
}
function isZipInternalLimitError(error: unknown): boolean {
const text = String(error || "").toLowerCase();
return text.includes("zip-eintrag zu groß")
|| text.includes("zip-eintrag komprimiert zu groß")
|| text.includes("zip-eintrag ohne sichere groessenangabe");
}
function shouldFallbackToExternalZip(error: unknown): boolean {
if (isZipSafetyGuardError(error)) {
return false;
}
if (isZipInternalLimitError(error)) {
return true;
}
const text = String(error || "").toLowerCase();
if (text.includes("aborted:extract") || text.includes("extract_aborted")) {
return false;
}
return true;
}
function extractZipArchive(archivePath: string, targetDir: string, conflictMode: ConflictMode, signal?: AbortSignal): void {
const mode = effectiveConflictMode(conflictMode);
const memoryLimitBytes = zipEntryMemoryLimitBytes();
const zip = new AdmZip(archivePath);
const entries = zip.getEntries();
const resolvedTarget = path.resolve(targetDir);
const usedOutputs = new Set<string>();
const renameCounters = new Map<string, number>();
for (const entry of entries) {
if (signal?.aborted) {
throw new Error("aborted:extract");
}
const baseOutputPath = path.resolve(targetDir, entry.entryName);
if (!baseOutputPath.startsWith(resolvedTarget + path.sep) && baseOutputPath !== resolvedTarget) {
logger.warn(`ZIP-Eintrag übersprungen (Path Traversal): ${entry.entryName}`);
continue;
}
if (entry.isDirectory) {
fs.mkdirSync(baseOutputPath, { recursive: true });
continue;
}
const header = (entry as unknown as {
header?: {
size?: number;
compressedSize?: number;
crc?: number;
dataHeader?: {
size?: number;
compressedSize?: number;
crc?: number;
};
};
}).header;
const uncompressedSize = Number(header?.size ?? header?.dataHeader?.size ?? NaN);
const compressedSize = Number(header?.compressedSize ?? header?.dataHeader?.compressedSize ?? NaN);
if (!Number.isFinite(uncompressedSize) || uncompressedSize < 0) {
throw new Error("ZIP-Eintrag ohne sichere Groessenangabe fur internen Entpacker");
}
if (!Number.isFinite(compressedSize) || compressedSize < 0) {
throw new Error("ZIP-Eintrag ohne sichere Groessenangabe fur internen Entpacker");
}
if (uncompressedSize > memoryLimitBytes) {
const entryMb = Math.ceil(uncompressedSize / (1024 * 1024));
const limitMb = Math.ceil(memoryLimitBytes / (1024 * 1024));
throw new Error(`ZIP-Eintrag zu groß für internen Entpacker (${entryMb} MB > ${limitMb} MB)`);
}
if (compressedSize > memoryLimitBytes) {
const entryMb = Math.ceil(compressedSize / (1024 * 1024));
const limitMb = Math.ceil(memoryLimitBytes / (1024 * 1024));
throw new Error(`ZIP-Eintrag komprimiert zu groß für internen Entpacker (${entryMb} MB > ${limitMb} MB)`);
}
let outputPath = baseOutputPath;
let outputKey = pathSetKey(outputPath);
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
// TOCTOU note: There is a small race between existsSync and writeFileSync below.
// This is acceptable here because zip extraction is single-threaded and we need
// the exists check to implement skip/rename conflict resolution semantics.
if (usedOutputs.has(outputKey) || fs.existsSync(outputPath)) {
if (mode === "skip") {
continue;
}
if (mode === "rename") {
const parsed = path.parse(baseOutputPath);
const counterKey = pathSetKey(baseOutputPath);
let n = renameCounters.get(counterKey) || 1;
let candidate = baseOutputPath;
let candidateKey = outputKey;
while (n <= 10000) {
candidate = path.join(parsed.dir, `${parsed.name} (${n})${parsed.ext}`);
candidateKey = pathSetKey(candidate);
if (!usedOutputs.has(candidateKey) && !fs.existsSync(candidate)) {
break;
}
n += 1;
}
if (n > 10000) {
throw new Error(`ZIP-Rename-Limit erreicht für ${entry.entryName}`);
}
renameCounters.set(counterKey, n + 1);
if (signal?.aborted) {
throw new Error("aborted:extract");
}
outputPath = candidate;
outputKey = candidateKey;
}
}
if (signal?.aborted) {
throw new Error("aborted:extract");
}
const data = entry.getData();
if (data.length > memoryLimitBytes) {
const entryMb = Math.ceil(data.length / (1024 * 1024));
const limitMb = Math.ceil(memoryLimitBytes / (1024 * 1024));
throw new Error(`ZIP-Eintrag zu groß für internen Entpacker (${entryMb} MB > ${limitMb} MB)`);
}
if (data.length > Math.max(uncompressedSize, compressedSize) * 20) {
throw new Error(`ZIP-Eintrag verdächtig groß nach Entpacken (${entry.entryName})`);
}
fs.writeFileSync(outputPath, data);
usedOutputs.add(outputKey);
}
}
function escapeRegex(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
export function collectArchiveCleanupTargets(sourceArchivePath: string, directoryFiles?: string[]): string[] {
const targets = new Set<string>([sourceArchivePath]);
const dir = path.dirname(sourceArchivePath);
const fileName = path.basename(sourceArchivePath);
let filesInDir: string[] = directoryFiles ?? [];
if (!directoryFiles) {
try {
filesInDir = fs.readdirSync(dir, { withFileTypes: true })
.filter((entry) => entry.isFile())
.map((entry) => entry.name);
} catch {
return Array.from(targets);
}
}
const addMatching = (pattern: RegExp): void => {
for (const candidate of filesInDir) {
if (pattern.test(candidate)) {
targets.add(path.join(dir, candidate));
}
}
};
const multipartRar = fileName.match(/^(.*)\.part\d+\.rar$/i);
if (multipartRar) {
const prefix = escapeRegex(multipartRar[1]);
addMatching(new RegExp(`^${prefix}\\.part\\d+\\.rar$`, "i"));
return Array.from(targets);
}
if (/\.rar$/i.test(fileName)) {
const stem = escapeRegex(fileName.replace(/\.rar$/i, ""));
addMatching(new RegExp(`^${stem}\\.rar$`, "i"));
addMatching(new RegExp(`^${stem}\\.r\\d{2,3}$`, "i"));
return Array.from(targets);
}
if (/\.zip$/i.test(fileName)) {
const stem = escapeRegex(fileName.replace(/\.zip$/i, ""));
addMatching(new RegExp(`^${stem}\\.zip$`, "i"));
addMatching(new RegExp(`^${stem}\\.z\\d{2}$`, "i"));
return Array.from(targets);
}
const splitZip = fileName.match(/^(.*)\.zip\.\d{3}$/i);
if (splitZip) {
const stem = escapeRegex(splitZip[1]);
addMatching(new RegExp(`^${stem}\\.zip$`, "i"));
addMatching(new RegExp(`^${stem}\\.zip\\.\\d{3}$`, "i"));
return Array.from(targets);
}
if (/\.7z$/i.test(fileName)) {
const stem = escapeRegex(fileName.replace(/\.7z$/i, ""));
addMatching(new RegExp(`^${stem}\\.7z$`, "i"));
addMatching(new RegExp(`^${stem}\\.7z\\.\\d{3}$`, "i"));
return Array.from(targets);
}
const splitSeven = fileName.match(/^(.*)\.7z\.\d{3}$/i);
if (splitSeven) {
const stem = escapeRegex(splitSeven[1]);
addMatching(new RegExp(`^${stem}\\.7z$`, "i"));
addMatching(new RegExp(`^${stem}\\.7z\\.\\d{3}$`, "i"));
return Array.from(targets);
}
return Array.from(targets);
}
function cleanupArchives(sourceFiles: string[], cleanupMode: CleanupMode): number {
if (cleanupMode === "none") {
return 0;
}
const targets = new Set<string>();
const dirFilesCache = new Map<string, string[]>();
for (const sourceFile of sourceFiles) {
const dir = path.dirname(sourceFile);
let filesInDir = dirFilesCache.get(dir);
if (!filesInDir) {
try {
filesInDir = fs.readdirSync(dir, { withFileTypes: true })
.filter((entry) => entry.isFile())
.map((entry) => entry.name);
} catch {
filesInDir = [];
}
dirFilesCache.set(dir, filesInDir);
}
for (const target of collectArchiveCleanupTargets(sourceFile, filesInDir)) {
targets.add(target);
}
}
let removed = 0;
const moveToTrashLike = (filePath: string): boolean => {
try {
const parsed = path.parse(filePath);
const trashDir = path.join(parsed.dir, ".rd-trash");
fs.mkdirSync(trashDir, { recursive: true });
let index = 0;
while (index <= 10000) {
const suffix = index === 0 ? "" : `-${index}`;
const candidate = path.join(trashDir, `${parsed.base}.${Date.now()}${suffix}`);
if (!fs.existsSync(candidate)) {
fs.renameSync(filePath, candidate);
return true;
}
index += 1;
}
} catch {
// ignore
}
return false;
};
for (const filePath of targets) {
try {
if (!fs.existsSync(filePath)) {
continue;
}
if (cleanupMode === "trash") {
if (moveToTrashLike(filePath)) {
removed += 1;
}
continue;
}
fs.rmSync(filePath, { force: true });
removed += 1;
} catch {
// ignore
}
}
return removed;
}
function hasAnyFilesRecursive(rootDir: string): boolean {
if (!fs.existsSync(rootDir)) {
return false;
}
const deadline = Date.now() + 220;
let inspectedDirs = 0;
const stack = [rootDir];
while (stack.length > 0) {
inspectedDirs += 1;
if (inspectedDirs > 8000 || Date.now() > deadline) {
return hasAnyEntries(rootDir);
}
const current = stack.pop() as string;
let entries: fs.Dirent[] = [];
try {
entries = fs.readdirSync(current, { withFileTypes: true });
} catch {
continue;
}
for (const entry of entries) {
if (entry.isFile()) {
return true;
}
if (entry.isDirectory()) {
stack.push(path.join(current, entry.name));
}
}
}
return false;
}
function hasAnyEntries(rootDir: string): boolean {
if (!rootDir || !fs.existsSync(rootDir)) {
return false;
}
try {
return fs.readdirSync(rootDir).length > 0;
} catch {
return false;
}
}
function removeEmptyDirectoryTree(rootDir: string): number {
if (!fs.existsSync(rootDir)) {
return 0;
}
const dirs = [rootDir];
const stack = [rootDir];
while (stack.length > 0) {
const current = stack.pop() as string;
let entries: fs.Dirent[] = [];
try {
entries = fs.readdirSync(current, { withFileTypes: true });
} catch {
continue;
}
for (const entry of entries) {
if (entry.isDirectory()) {
const full = path.join(current, entry.name);
dirs.push(full);
stack.push(full);
}
}
}
dirs.sort((a, b) => b.length - a.length);
let removed = 0;
for (const dirPath of dirs) {
try {
const entries = fs.readdirSync(dirPath);
if (entries.length === 0) {
fs.rmdirSync(dirPath);
removed += 1;
}
} catch {
// ignore
}
}
return removed;
}
export async function extractPackageArchives(options: ExtractOptions): Promise<{ extracted: number; failed: number; lastError: string }> {
if (options.signal?.aborted) {
throw new Error("aborted:extract");
}
const allCandidates = findArchiveCandidates(options.packageDir);
const candidates = options.onlyArchives
? allCandidates.filter((archivePath) => {
const key = process.platform === "win32" ? path.resolve(archivePath).toLowerCase() : path.resolve(archivePath);
return options.onlyArchives!.has(key);
})
: allCandidates;
logger.info(`Entpacken gestartet: packageDir=${options.packageDir}, targetDir=${options.targetDir}, archives=${candidates.length}${options.onlyArchives ? ` (hybrid, gesamt=${allCandidates.length})` : ""}, cleanupMode=${options.cleanupMode}, conflictMode=${options.conflictMode}`);
if (candidates.length === 0) {
if (!options.onlyArchives) {
const existingResume = readExtractResumeState(options.packageDir, options.packageId);
if (existingResume.size > 0 && hasAnyEntries(options.targetDir)) {
clearExtractResumeState(options.packageDir, options.packageId);
logger.info(`Entpacken übersprungen (Archive bereinigt, Ziel hat Dateien): ${options.packageDir}`);
options.onProgress?.({
current: existingResume.size,
total: existingResume.size,
percent: 100,
archiveName: "",
phase: "done"
});
return { extracted: existingResume.size, failed: 0, lastError: "" };
}
clearExtractResumeState(options.packageDir, options.packageId);
}
logger.info(`Entpacken übersprungen (keine Archive gefunden): ${options.packageDir}`);
return { extracted: 0, failed: 0, lastError: "" };
}
const conflictMode = effectiveConflictMode(options.conflictMode);
if (options.conflictMode === "ask") {
logger.warn("Extract-ConflictMode 'ask' wird ohne Prompt als 'skip' behandelt");
}
let passwordCandidates = archivePasswords(options.passwordList || "");
const resumeCompleted = readExtractResumeState(options.packageDir, options.packageId);
const resumeCompletedAtStart = resumeCompleted.size;
const allCandidateNames = new Set(allCandidates.map((archivePath) => archiveNameKey(path.basename(archivePath))));
for (const archiveName of Array.from(resumeCompleted.values())) {
if (!allCandidateNames.has(archiveName)) {
resumeCompleted.delete(archiveName);
}
}
if (resumeCompleted.size > 0) {
writeExtractResumeState(options.packageDir, resumeCompleted, options.packageId);
} else {
clearExtractResumeState(options.packageDir, options.packageId);
}
const pendingCandidates = candidates.filter((archivePath) => !resumeCompleted.has(archiveNameKey(path.basename(archivePath))));
let extracted = candidates.length - pendingCandidates.length;
let failed = 0;
let lastError = "";
const extractedArchives = new Set<string>();
for (const archivePath of candidates) {
if (resumeCompleted.has(archiveNameKey(path.basename(archivePath)))) {
extractedArchives.add(archivePath);
}
}
const emitProgress = (
current: number,
archiveName: string,
phase: "extracting" | "done",
archivePercent?: number,
elapsedMs?: number
): void => {
if (!options.onProgress) {
return;
}
const total = Math.max(1, candidates.length);
let percent = Math.max(0, Math.min(100, Math.floor((current / total) * 100)));
if (phase !== "done") {
const boundedCurrent = Math.max(0, Math.min(total, current));
const boundedArchivePercent = Math.max(0, Math.min(100, Number(archivePercent ?? 0)));
percent = Math.max(0, Math.min(100, Math.floor(((boundedCurrent + (boundedArchivePercent / 100)) / total) * 100)));
}
try {
options.onProgress({
current,
total,
percent,
archiveName,
archivePercent,
elapsedMs,
phase
});
} catch (error) {
logger.warn(`onProgress callback Fehler unterdrückt: ${cleanErrorText(String(error))}`);
}
};
emitProgress(extracted, "", "extracting");
for (const archivePath of pendingCandidates) {
if (options.signal?.aborted) {
throw new Error("aborted:extract");
}
const archiveName = path.basename(archivePath);
const archiveResumeKey = archiveNameKey(archiveName);
const archiveStartedAt = Date.now();
let archivePercent = 0;
emitProgress(extracted + failed, archiveName, "extracting", archivePercent, 0);
const pulseTimer = setInterval(() => {
emitProgress(extracted + failed, archiveName, "extracting", archivePercent, Date.now() - archiveStartedAt);
}, 1100);
logger.info(`Entpacke Archiv: ${path.basename(archivePath)} -> ${options.targetDir}`);
try {
const ext = path.extname(archivePath).toLowerCase();
if (ext === ".zip") {
const preferExternal = shouldPreferExternalZip(archivePath);
if (preferExternal) {
try {
const usedPassword = await runExternalExtract(archivePath, options.targetDir, options.conflictMode, passwordCandidates, (value) => {
archivePercent = Math.max(archivePercent, value);
emitProgress(extracted + failed, archiveName, "extracting", archivePercent, Date.now() - archiveStartedAt);
}, options.signal);
passwordCandidates = prioritizePassword(passwordCandidates, usedPassword);
} catch (error) {
if (isNoExtractorError(String(error))) {
extractZipArchive(archivePath, options.targetDir, options.conflictMode, options.signal);
} else {
throw error;
}
}
} else {
try {
extractZipArchive(archivePath, options.targetDir, options.conflictMode, options.signal);
archivePercent = 100;
} catch (error) {
if (!shouldFallbackToExternalZip(error)) {
throw error;
}
try {
const usedPassword = await runExternalExtract(archivePath, options.targetDir, options.conflictMode, passwordCandidates, (value) => {
archivePercent = Math.max(archivePercent, value);
emitProgress(extracted + failed, archiveName, "extracting", archivePercent, Date.now() - archiveStartedAt);
}, options.signal);
passwordCandidates = prioritizePassword(passwordCandidates, usedPassword);
} catch (externalError) {
if (isNoExtractorError(String(externalError)) || isUnsupportedArchiveFormatError(String(externalError))) {
throw error;
}
throw externalError;
}
}
}
} else {
const usedPassword = await runExternalExtract(archivePath, options.targetDir, options.conflictMode, passwordCandidates, (value) => {
archivePercent = Math.max(archivePercent, value);
emitProgress(extracted + failed, archiveName, "extracting", archivePercent, Date.now() - archiveStartedAt);
}, options.signal);
passwordCandidates = prioritizePassword(passwordCandidates, usedPassword);
}
extracted += 1;
extractedArchives.add(archivePath);
resumeCompleted.add(archiveResumeKey);
writeExtractResumeState(options.packageDir, resumeCompleted, options.packageId);
logger.info(`Entpacken erfolgreich: ${path.basename(archivePath)}`);
archivePercent = 100;
emitProgress(extracted + failed, archiveName, "extracting", archivePercent, Date.now() - archiveStartedAt);
} catch (error) {
failed += 1;
const errorText = String(error);
if (isExtractAbortError(errorText)) {
throw new Error("aborted:extract");
}
lastError = errorText;
logger.error(`Entpack-Fehler ${path.basename(archivePath)}: ${errorText}`);
emitProgress(extracted + failed, archiveName, "extracting", archivePercent, Date.now() - archiveStartedAt);
if (isNoExtractorError(errorText)) {
const remaining = candidates.length - (extracted + failed);
if (remaining > 0) {
failed += remaining;
emitProgress(candidates.length, archiveName, "extracting", 0, Date.now() - archiveStartedAt);
}
break;
}
} finally {
clearInterval(pulseTimer);
}
}
if (extracted > 0) {
const hasOutputAfter = hasAnyEntries(options.targetDir);
const hadResumeProgress = resumeCompletedAtStart > 0;
if (!hasOutputAfter && conflictMode !== "skip" && !hadResumeProgress) {
lastError = "Keine entpackten Dateien erkannt";
failed += extracted;
extracted = 0;
logger.error(`Entpacken ohne neue Ausgabe erkannt: ${options.targetDir}. Cleanup wird NICHT ausgeführt.`);
} else {
if (!options.skipPostCleanup) {
const cleanupSources = failed === 0 ? candidates : Array.from(extractedArchives.values());
const sourceAndTargetEqual = pathSetKey(path.resolve(options.packageDir)) === pathSetKey(path.resolve(options.targetDir));
const removedArchives = sourceAndTargetEqual
? 0
: cleanupArchives(cleanupSources, options.cleanupMode);
if (sourceAndTargetEqual && options.cleanupMode !== "none") {
logger.warn(`Archive-Cleanup übersprungen (Quelle=Ziel): ${options.packageDir}`);
}
if (options.cleanupMode !== "none") {
logger.info(`Archive-Cleanup abgeschlossen: ${removedArchives} Datei(en) entfernt`);
}
if (options.removeLinks) {
const removedLinks = removeDownloadLinkArtifacts(options.targetDir);
logger.info(`Link-Artefakt-Cleanup: ${removedLinks} Datei(en) entfernt`);
}
if (options.removeSamples) {
const removedSamples = removeSampleArtifacts(options.targetDir);
logger.info(`Sample-Cleanup: ${removedSamples.files} Datei(en), ${removedSamples.dirs} Ordner entfernt`);
}
}
if (failed === 0 && resumeCompleted.size >= allCandidates.length) {
clearExtractResumeState(options.packageDir, options.packageId);
}
if (!options.skipPostCleanup && options.cleanupMode === "delete" && !hasAnyFilesRecursive(options.packageDir)) {
const removedDirs = removeEmptyDirectoryTree(options.packageDir);
if (removedDirs > 0) {
logger.info(`Leere Download-Ordner entfernt: ${removedDirs} (root=${options.packageDir})`);
}
}
}
} else if (!options.skipPostCleanup) {
try {
if (fs.existsSync(options.targetDir) && fs.readdirSync(options.targetDir).length === 0) {
fs.rmSync(options.targetDir, { recursive: true, force: true });
}
} catch {
// ignore
}
}
if (failed > 0) {
if (resumeCompleted.size > 0) {
writeExtractResumeState(options.packageDir, resumeCompleted, options.packageId);
} else {
clearExtractResumeState(options.packageDir, options.packageId);
}
}
emitProgress(extracted, "", "done");
logger.info(`Entpacken beendet: extracted=${extracted}, failed=${failed}, targetDir=${options.targetDir}`);
return { extracted, failed, lastError };
}