diff --git a/src/mergeAxeResults.ts b/src/mergeAxeResults.ts index f771194f..a78fc67f 100644 --- a/src/mergeAxeResults.ts +++ b/src/mergeAxeResults.ts @@ -5,6 +5,7 @@ import printMessage from 'print-message'; import path from 'path'; import ejs from 'ejs'; import { fileURLToPath } from 'url'; +import { Dataset, RequestQueue, Configuration } from 'crawlee'; import constants, { BrowserTypes, ScannerTypes, @@ -1020,36 +1021,34 @@ const generateArtifacts = async ( 1, ); - // Suppress uncaught EPERM errors from lingering Crawlee async lock-file operations - // (Windows holds mandatory file locks; Crawlee may still attempt mkdir on .json.lock - // files after the crawl has finished). Without this, Node crashes with uncaughtException. - const crawleeEpermHandler = (err: Error & { code?: string }) => { - if (err.code === 'EPERM' && err.message?.includes('crawlee')) { - consoleLogger.info(`Suppressed lingering Crawlee storage error: ${err.message}`); - return; - } - // Re-throw non-crawlee EPERM errors so they aren't silently swallowed - throw err; - }; - process.on('uncaughtException', crawleeEpermHandler); - process.on('unhandledRejection', crawleeEpermHandler); + // Flush pending background storage operations (metadata writes, lock-file ops) + const storageClient = Configuration.getStorageClient(); + if (storageClient.teardown) { + await storageClient.teardown(); + } + + // Gracefully drop Dataset and RequestQueue — releases locks and removes files + const crawleeDir = path.join(storagePath, 'crawlee'); + try { + const dataset = await Dataset.open(crawleeDir); + await dataset.drop(); + } catch (error) { + consoleLogger.info(`Dataset drop: ${error.message}`); + } - // Brief delay to allow lingering async crawlee storage operations to flush - await new Promise(resolve => setTimeout(resolve, process.platform === 'win32' ? 5000 : 3000)); + try { + const requestQueue = await RequestQueue.open(crawleeDir); + await requestQueue.drop(); + } catch (error) { + consoleLogger.info(`RequestQueue drop: ${error.message}`); + } + // Fallback rm for any leftover files not managed by Crawlee's storage API const crawleePath = path.join(storagePath, 'crawlee'); try { await fs.promises.rm(crawleePath, { recursive: true, force: true }); - } catch (error) { - // On Windows, retry once after a delay if the folder is still locked - if (process.platform === 'win32') { - await new Promise(resolve => setTimeout(resolve, 3000)); - try { - await fs.promises.rm(crawleePath, { recursive: true, force: true }); - } catch { - // Best-effort cleanup — leave the folder; report generation continues - } - } + } catch { + // Best-effort; storage was already dropped via API } try { @@ -1135,9 +1134,6 @@ const generateArtifacts = async ( if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE) console.log('Report generated successfully'); - process.removeListener('uncaughtException', crawleeEpermHandler); - process.removeListener('unhandledRejection', crawleeEpermHandler); - return ruleIdJson; }; diff --git a/src/utils.ts b/src/utils.ts index 769033fb..59e8b825 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -6,6 +6,7 @@ import axe, { Rule } from 'axe-core'; import { v4 as uuidv4 } from 'uuid'; import { getDomain } from 'tldts'; import { normalizeUrl } from '@apify/utilities'; +import { Dataset, RequestQueue, Configuration } from 'crawlee'; import constants, { BrowserTypes, destinationPath, @@ -390,6 +391,19 @@ export const cleanUp = async (randomToken?: string, isError: boolean = false): P if (randomToken !== undefined) { const storagePath = getStoragePath(randomToken); + try { + const storageClient = Configuration.getStorageClient(); + if (storageClient.teardown) { + await storageClient.teardown(); + } + const crawleeDir = path.join(storagePath, 'crawlee'); + const dataset = await Dataset.open(crawleeDir); + await dataset.drop(); + const requestQueue = await RequestQueue.open(crawleeDir); + await requestQueue.drop(); + } catch (error) { + consoleLogger.info(`Crawlee storage drop in cleanUp: ${error.message}`); + } try { fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true }); } catch (error) {