Harden hybrid extract readiness for partial archives

Require near-complete file size checks in Item-Recovery and hybrid ready-set detection so partially downloaded RAR parts are not marked completed and extracted prematurely.
This commit is contained in:
Sucukdeluxe 2026-03-03 16:52:16 +01:00
parent 87e0a986e6
commit 30ac5bf9db
2 changed files with 299 additions and 27 deletions

View File

@ -218,6 +218,35 @@ function isArchiveLikePath(filePath: string): boolean {
return /\.(?:part\d+\.rar|rar|r\d{2,3}|zip(?:\.\d+)?|z\d{1,3}|7z(?:\.\d+)?)$/i.test(lower);
}
const ITEM_RECOVERY_MIN_BYTES = 10 * 1024;
const ARCHIVE_RECOVERY_MIN_RATIO = 0.995;
const ARCHIVE_RECOVERY_MAX_SLACK_BYTES = 4 * 1024 * 1024;
const FILE_RECOVERY_MIN_RATIO = 0.98;
const FILE_RECOVERY_MAX_SLACK_BYTES = 8 * 1024 * 1024;
function recoveryExpectedMinSize(filePath: string, totalBytes: number | null | undefined): number {
const knownTotal = Number(totalBytes || 0);
if (!Number.isFinite(knownTotal) || knownTotal <= 0) {
return ITEM_RECOVERY_MIN_BYTES;
}
const archiveLike = isArchiveLikePath(filePath);
const minRatio = archiveLike ? ARCHIVE_RECOVERY_MIN_RATIO : FILE_RECOVERY_MIN_RATIO;
const maxSlack = archiveLike ? ARCHIVE_RECOVERY_MAX_SLACK_BYTES : FILE_RECOVERY_MAX_SLACK_BYTES;
const ratioBased = Math.floor(knownTotal * minRatio);
const slackBased = Math.max(0, Math.floor(knownTotal) - maxSlack);
return Math.max(ITEM_RECOVERY_MIN_BYTES, Math.max(ratioBased, slackBased));
}
function isRecoveredFileSizeSufficient(item: Pick<DownloadItem, "targetPath" | "fileName" | "totalBytes">, fileSize: number): boolean {
if (!Number.isFinite(fileSize) || fileSize <= 0) {
return false;
}
const candidatePath = String(item.targetPath || item.fileName || "");
const minSize = recoveryExpectedMinSize(candidatePath, item.totalBytes);
return fileSize >= minSize;
}
function isFetchFailure(errorText: string): boolean {
const text = String(errorText || "").toLowerCase();
return text.includes("fetch failed") || text.includes("socket hang up") || text.includes("econnreset") || text.includes("network error");
@ -4949,6 +4978,7 @@ export class DownloadManager extends EventEmitter {
}
const completedPaths = new Set<string>();
const completedItemsByPath = new Map<string, DownloadItem>();
const pendingPaths = new Set<string>();
for (const itemId of pkg.itemIds) {
const item = this.session.items[itemId];
@ -4956,7 +4986,9 @@ export class DownloadManager extends EventEmitter {
continue;
}
if (item.status === "completed" && item.targetPath) {
completedPaths.add(pathKey(item.targetPath));
const key = pathKey(item.targetPath);
completedPaths.add(key);
completedItemsByPath.set(key, item);
} else if (item.targetPath) {
pendingPaths.add(pathKey(item.targetPath));
}
@ -4992,6 +5024,30 @@ export class DownloadManager extends EventEmitter {
const partsOnDisk = collectArchiveCleanupTargets(candidate, dirFiles);
const allPartsCompleted = partsOnDisk.every((part) => completedPaths.has(pathKey(part)));
if (allPartsCompleted) {
let allPartsLikelyComplete = true;
for (const part of partsOnDisk) {
const completedItem = completedItemsByPath.get(pathKey(part));
if (!completedItem) {
continue;
}
try {
const stat = fs.statSync(part);
if (isRecoveredFileSizeSufficient(completedItem, stat.size)) {
continue;
}
const minSize = recoveryExpectedMinSize(completedItem.targetPath || completedItem.fileName, completedItem.totalBytes);
logger.info(`Hybrid-Extract: ${path.basename(candidate)} übersprungen ${path.basename(part)} zu klein (${humanSize(stat.size)}, erwartet mind. ${humanSize(minSize)})`);
allPartsLikelyComplete = false;
break;
} catch {
allPartsLikelyComplete = false;
break;
}
}
if (!allPartsLikelyComplete) {
continue;
}
const candidateBase = path.basename(candidate).toLowerCase();
// For multi-part archives (.part1.rar), check if parts of THIS SPECIFIC archive
@ -5225,17 +5281,9 @@ export class DownloadManager extends EventEmitter {
if (progress.phase === "done") {
return;
}
// When a new archive starts, mark the previous archive's items as done
// Track only currently active archive items; final statuses are set
// after extraction result is known.
if (progress.archiveName && progress.archiveName !== lastHybridArchiveName) {
if (lastHybridArchiveName && currentArchiveItems.length > 0) {
const doneAt = nowMs();
for (const entry of currentArchiveItems) {
if (!isExtractedLabel(entry.fullStatus)) {
entry.fullStatus = "Entpackt - Done";
entry.updatedAt = doneAt;
}
}
}
lastHybridArchiveName = progress.archiveName;
const resolved = resolveArchiveItems(progress.archiveName);
currentArchiveItems = resolved;
@ -5310,12 +5358,8 @@ export class DownloadManager extends EventEmitter {
}
try {
const stat = fs.statSync(item.targetPath);
// Require file to be either ≥50% of expected size or at least 10 KB to avoid
// recovering tiny error-response files (e.g. 9-byte "Forbidden" pages).
const minSize = item.totalBytes && item.totalBytes > 0
? Math.max(10240, Math.floor(item.totalBytes * 0.5))
: 10240;
if (stat.size >= minSize) {
const minSize = recoveryExpectedMinSize(item.targetPath || item.fileName, item.totalBytes);
if (isRecoveredFileSizeSufficient(item, stat.size)) {
logger.info(`Item-Recovery: ${item.fileName} war "${item.status}" aber Datei existiert (${humanSize(stat.size)}), setze auf completed`);
item.status = "completed";
item.fullStatus = this.settings.autoExtract ? "Entpacken - Ausstehend" : `Fertig (${humanSize(stat.size)})`;
@ -5449,17 +5493,9 @@ export class DownloadManager extends EventEmitter {
signal: extractAbortController.signal,
packageId,
onProgress: (progress) => {
// When a new archive starts, mark the previous archive's items as done
// Track only currently active archive items; final statuses are set
// after extraction result is known.
if (progress.archiveName && progress.archiveName !== lastExtractArchiveName) {
if (lastExtractArchiveName && currentArchiveItems.length > 0) {
const doneAt = nowMs();
for (const entry of currentArchiveItems) {
if (!isExtractedLabel(entry.fullStatus)) {
entry.fullStatus = "Entpackt - Done";
entry.updatedAt = doneAt;
}
}
}
lastExtractArchiveName = progress.archiveName;
currentArchiveItems = resolveArchiveItems(progress.archiveName);
}

View File

@ -3636,6 +3636,242 @@ describe("download manager", () => {
expect(snapshot.session.items[itemId]?.fullStatus).toBe("Entpacken abgebrochen (wird fortgesetzt)");
});
it("does not recover partial archive files as completed", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "rd-dm-"));
tempDirs.push(root);
const outputDir = path.join(root, "downloads", "partial-recovery");
const extractDir = path.join(root, "extract", "partial-recovery");
fs.mkdirSync(outputDir, { recursive: true });
const archivePath = path.join(outputDir, "partial.repack.part1.rar");
const totalBytes = 1_000_000;
fs.writeFileSync(archivePath, Buffer.alloc(860_000, 1));
const session = emptySession();
const packageId = "partial-recovery-pkg";
const itemId = "partial-recovery-item";
const createdAt = Date.now() - 20_000;
session.packageOrder = [packageId];
session.packages[packageId] = {
id: packageId,
name: "partial-recovery",
outputDir,
extractDir,
status: "downloading",
itemIds: [itemId],
cancelled: false,
enabled: true,
createdAt,
updatedAt: createdAt
};
session.items[itemId] = {
id: itemId,
packageId,
url: "https://dummy/partial-recovery",
provider: "megadebrid",
status: "queued",
retries: 0,
speedBps: 0,
downloadedBytes: 0,
totalBytes,
progressPercent: 0,
fileName: path.basename(archivePath),
targetPath: archivePath,
resumable: true,
attempts: 0,
lastError: "",
fullStatus: "Wartet",
createdAt,
updatedAt: createdAt
};
const manager = new DownloadManager(
{
...defaultSettings(),
token: "rd-token",
outputDir: path.join(root, "downloads"),
extractDir: path.join(root, "extract"),
autoExtract: false
},
session,
createStoragePaths(path.join(root, "state"))
);
const internal = manager as unknown as {
handlePackagePostProcessing: (packageId: string) => Promise<void>;
};
await internal.handlePackagePostProcessing(packageId);
const item = manager.getSnapshot().session.items[itemId];
expect(item?.status).toBe("queued");
expect(item?.fullStatus).toBe("Wartet");
});
it("recovers near-complete archive files with known size", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "rd-dm-"));
tempDirs.push(root);
const outputDir = path.join(root, "downloads", "near-complete-recovery");
const extractDir = path.join(root, "extract", "near-complete-recovery");
fs.mkdirSync(outputDir, { recursive: true });
const archivePath = path.join(outputDir, "near.complete.part1.rar");
const totalBytes = 1_000_000;
const fileSize = 996_000;
fs.writeFileSync(archivePath, Buffer.alloc(fileSize, 2));
const session = emptySession();
const packageId = "near-complete-recovery-pkg";
const itemId = "near-complete-recovery-item";
const createdAt = Date.now() - 20_000;
session.packageOrder = [packageId];
session.packages[packageId] = {
id: packageId,
name: "near-complete-recovery",
outputDir,
extractDir,
status: "downloading",
itemIds: [itemId],
cancelled: false,
enabled: true,
createdAt,
updatedAt: createdAt
};
session.items[itemId] = {
id: itemId,
packageId,
url: "https://dummy/near-complete-recovery",
provider: "megadebrid",
status: "queued",
retries: 0,
speedBps: 0,
downloadedBytes: 0,
totalBytes,
progressPercent: 0,
fileName: path.basename(archivePath),
targetPath: archivePath,
resumable: true,
attempts: 0,
lastError: "",
fullStatus: "Wartet",
createdAt,
updatedAt: createdAt
};
const manager = new DownloadManager(
{
...defaultSettings(),
token: "rd-token",
outputDir: path.join(root, "downloads"),
extractDir: path.join(root, "extract"),
autoExtract: false
},
session,
createStoragePaths(path.join(root, "state"))
);
const internal = manager as unknown as {
handlePackagePostProcessing: (packageId: string) => Promise<void>;
};
await internal.handlePackagePostProcessing(packageId);
const item = manager.getSnapshot().session.items[itemId];
expect(item?.status).toBe("completed");
expect(item?.downloadedBytes).toBe(fileSize);
});
it("skips hybrid-ready multipart archives when a completed part is still too small", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "rd-dm-"));
tempDirs.push(root);
const outputDir = path.join(root, "downloads", "hybrid-size-guard");
const extractDir = path.join(root, "extract", "hybrid-size-guard");
fs.mkdirSync(outputDir, { recursive: true });
const part1 = path.join(outputDir, "show.s01e01.part1.rar");
const part2 = path.join(outputDir, "show.s01e01.part2.rar");
fs.writeFileSync(part1, Buffer.alloc(900_000, 3));
fs.writeFileSync(part2, Buffer.alloc(700_000, 4));
const session = emptySession();
const packageId = "hybrid-size-guard-pkg";
const createdAt = Date.now() - 20_000;
session.packageOrder = [packageId];
session.packages[packageId] = {
id: packageId,
name: "hybrid-size-guard",
outputDir,
extractDir,
status: "downloading",
itemIds: ["hybrid-size-guard-item-1", "hybrid-size-guard-item-2"],
cancelled: false,
enabled: true,
createdAt,
updatedAt: createdAt
};
session.items["hybrid-size-guard-item-1"] = {
id: "hybrid-size-guard-item-1",
packageId,
url: "https://dummy/hybrid-size-guard/1",
provider: "megadebrid",
status: "completed",
retries: 0,
speedBps: 0,
downloadedBytes: 900_000,
totalBytes: 1_000_000,
progressPercent: 100,
fileName: path.basename(part1),
targetPath: part1,
resumable: true,
attempts: 1,
lastError: "",
fullStatus: "Entpacken - Ausstehend",
createdAt,
updatedAt: createdAt
};
session.items["hybrid-size-guard-item-2"] = {
id: "hybrid-size-guard-item-2",
packageId,
url: "https://dummy/hybrid-size-guard/2",
provider: "megadebrid",
status: "completed",
retries: 0,
speedBps: 0,
downloadedBytes: 700_000,
totalBytes: 700_000,
progressPercent: 100,
fileName: path.basename(part2),
targetPath: part2,
resumable: true,
attempts: 1,
lastError: "",
fullStatus: "Entpacken - Ausstehend",
createdAt,
updatedAt: createdAt
};
const manager = new DownloadManager(
{
...defaultSettings(),
token: "rd-token",
outputDir: path.join(root, "downloads"),
extractDir: path.join(root, "extract"),
autoExtract: true,
hybridExtract: true
},
session,
createStoragePaths(path.join(root, "state"))
);
const internal = manager as unknown as {
session: ReturnType<typeof emptySession>;
findReadyArchiveSets: (pkg: ReturnType<typeof emptySession>["packages"][string]) => Promise<Set<string>>;
};
const ready = await internal.findReadyArchiveSets(internal.session.packages[packageId]);
expect(ready.size).toBe(0);
});
it("recovers pending extraction on startup for completed package", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "rd-dm-"));
tempDirs.push(root);