Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 24 additions & 28 deletions src/mergeAxeResults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import printMessage from 'print-message';
import path from 'path';
import ejs from 'ejs';
import { fileURLToPath } from 'url';
import { Dataset, RequestQueue, Configuration } from 'crawlee';
import constants, {
BrowserTypes,
ScannerTypes,
Expand Down Expand Up @@ -1020,36 +1021,34 @@ const generateArtifacts = async (
1,
);

// Suppress uncaught EPERM errors from lingering Crawlee async lock-file operations
// (Windows holds mandatory file locks; Crawlee may still attempt mkdir on .json.lock
// files after the crawl has finished). Without this, Node crashes with uncaughtException.
const crawleeEpermHandler = (err: Error & { code?: string }) => {
if (err.code === 'EPERM' && err.message?.includes('crawlee')) {
consoleLogger.info(`Suppressed lingering Crawlee storage error: ${err.message}`);
return;
}
// Re-throw non-crawlee EPERM errors so they aren't silently swallowed
throw err;
};
process.on('uncaughtException', crawleeEpermHandler);
process.on('unhandledRejection', crawleeEpermHandler);
// Flush pending background storage operations (metadata writes, lock-file ops)
const storageClient = Configuration.getStorageClient();
if (storageClient.teardown) {
await storageClient.teardown();
}

// Gracefully drop Dataset and RequestQueue — releases locks and removes files
const crawleeDir = path.join(storagePath, 'crawlee');
try {
const dataset = await Dataset.open(crawleeDir);
await dataset.drop();
} catch (error) {
consoleLogger.info(`Dataset drop: ${error.message}`);
}

// Brief delay to allow lingering async crawlee storage operations to flush
await new Promise(resolve => setTimeout(resolve, process.platform === 'win32' ? 5000 : 3000));
try {
const requestQueue = await RequestQueue.open(crawleeDir);
await requestQueue.drop();
} catch (error) {
consoleLogger.info(`RequestQueue drop: ${error.message}`);
}

// Fallback rm for any leftover files not managed by Crawlee's storage API
const crawleePath = path.join(storagePath, 'crawlee');
try {
await fs.promises.rm(crawleePath, { recursive: true, force: true });
} catch (error) {
// On Windows, retry once after a delay if the folder is still locked
if (process.platform === 'win32') {
await new Promise(resolve => setTimeout(resolve, 3000));
try {
await fs.promises.rm(crawleePath, { recursive: true, force: true });
} catch {
// Best-effort cleanup — leave the folder; report generation continues
}
}
} catch {
// Best-effort; storage was already dropped via API
}

try {
Expand Down Expand Up @@ -1135,9 +1134,6 @@ const generateArtifacts = async (
if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE)
console.log('Report generated successfully');

process.removeListener('uncaughtException', crawleeEpermHandler);
process.removeListener('unhandledRejection', crawleeEpermHandler);

return ruleIdJson;
};

Expand Down
14 changes: 14 additions & 0 deletions src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import axe, { Rule } from 'axe-core';
import { v4 as uuidv4 } from 'uuid';
import { getDomain } from 'tldts';
import { normalizeUrl } from '@apify/utilities';
import { Dataset, RequestQueue, Configuration } from 'crawlee';
import constants, {
BrowserTypes,
destinationPath,
Expand Down Expand Up @@ -390,6 +391,19 @@ export const cleanUp = async (randomToken?: string, isError: boolean = false): P
if (randomToken !== undefined) {
const storagePath = getStoragePath(randomToken);

try {
const storageClient = Configuration.getStorageClient();
if (storageClient.teardown) {
await storageClient.teardown();
}
const crawleeDir = path.join(storagePath, 'crawlee');
const dataset = await Dataset.open(crawleeDir);
await dataset.drop();
const requestQueue = await RequestQueue.open(crawleeDir);
await requestQueue.drop();
} catch (error) {
consoleLogger.info(`Crawlee storage drop in cleanUp: ${error.message}`);
}
try {
fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
} catch (error) {
Expand Down