From 27c7901905714abf5d153dc7f97aa6190055b51d Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Tue, 19 Aug 2025 08:29:10 +0200 Subject: [PATCH 1/8] fix(general): adding custom headers to client()=> preserve whole config to be passed to Axios later --- components/apify/apify.app.mjs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/components/apify/apify.app.mjs b/components/apify/apify.app.mjs index e9f5e2042fd33..506ab5bff6f48 100644 --- a/components/apify/apify.app.mjs +++ b/components/apify/apify.app.mjs @@ -140,9 +140,12 @@ export default { return new ApifyClient({ token: this.$auth.api_token, requestInterceptors: [ - ({ headers }) => ({ - ...headers, - "x-apify-integration-platform": "pipedream", + (config) => ({ + ...config, + headers: { + ...(config.headers || {}), + "x-apify-integration-platform": "pipedream", + }, }), ], }); From b57f07f0f42ba7f4229b528504d8f211f96cce6d Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Tue, 19 Aug 2025 08:31:06 +0200 Subject: [PATCH 2/8] feat(general): add linter script --- components/apify/package.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/components/apify/package.json b/components/apify/package.json index 34c16a25b517f..dd65a3d28a1b9 100644 --- a/components/apify/package.json +++ b/components/apify/package.json @@ -16,5 +16,8 @@ "@apify/consts": "^2.41.0", "@pipedream/platform": "^3.1.0", "apify-client": "^2.12.6" + }, + "scripts": { + "lint:fix": "eslint . --fix" } } From 53ebd540b8d50fbac75cedc6b35e33c17d41ceca Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Tue, 19 Aug 2025 10:39:12 +0200 Subject: [PATCH 3/8] fix(apify-get-dataset-items): a function for getting items and parsing of a result --- .../get-dataset-items/get-dataset-items.mjs | 2 +- components/apify/apify.app.mjs | 27 +++++++------------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/components/apify/actions/get-dataset-items/get-dataset-items.mjs b/components/apify/actions/get-dataset-items/get-dataset-items.mjs index 72dffe6a703ef..eab1243482cfc 100644 --- a/components/apify/actions/get-dataset-items/get-dataset-items.mjs +++ b/components/apify/actions/get-dataset-items/get-dataset-items.mjs @@ -60,7 +60,7 @@ export default { let total; do { - const items = await this.apify.listDatasetItems({ + const { items } = await this.apify.listDatasetItems({ datasetId: this.datasetId, params, }); diff --git a/components/apify/apify.app.mjs b/components/apify/apify.app.mjs index 506ab5bff6f48..9ff5154cb9dea 100644 --- a/components/apify/apify.app.mjs +++ b/components/apify/apify.app.mjs @@ -11,11 +11,9 @@ export default { description: "The Id of the key-value store.", async options({ page }) { const { items } = await this.listKeyValueStores({ - params: { - offset: LIMIT * page, - limit: LIMIT, - unnamed: true, - }, + offset: LIMIT * page, + limit: LIMIT, + unnamed: true, }); return items.map(({ @@ -53,11 +51,9 @@ export default { label: "Task ID", description: "The ID of the task to monitor.", async options({ page }) { - const { data: { items } } = await this.listTasks({ - params: { - offset: LIMIT * page, - limit: LIMIT, - }, + const { items } = await this.listTasks({ + offset: LIMIT * page, + limit: LIMIT, }); return items.map((task) => ({ @@ -72,10 +68,8 @@ export default { description: "The ID of the dataset to retrieve items within", async options({ page }) { const { items } = await this.listDatasets({ - params: { - offset: LIMIT * page, - limit: LIMIT, - }, + offset: LIMIT * page, + limit: LIMIT, }); return items?.map(({ id: value, name: label, @@ -226,11 +220,10 @@ export default { .list(opts); }, listDatasetItems({ - datasetId, ...opts + datasetId, params, }) { return this._client().dataset(datasetId) - .items() - .list(opts); + .listItems(params); }, async runTaskSynchronously({ taskId, params, From cfe9ab28ec11823d2189c1e3425d1525d8d6353b Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Tue, 19 Aug 2025 16:54:39 +0200 Subject: [PATCH 4/8] fix(apify-run-actor): working sync and async, dynamic input schema injection, KVS output retrieval tested only string --- .../apify/actions/run-actor/run-actor.mjs | 248 ++++++++++++------ components/apify/apify.app.mjs | 28 +- 2 files changed, 186 insertions(+), 90 deletions(-) diff --git a/components/apify/actions/run-actor/run-actor.mjs b/components/apify/actions/run-actor/run-actor.mjs index d897602fc1f71..9105b81a76333 100644 --- a/components/apify/actions/run-actor/run-actor.mjs +++ b/components/apify/actions/run-actor/run-actor.mjs @@ -36,6 +36,7 @@ export default { actorSource: c.actorSource, }), ], + reloadProps: true, }, buildTag: { propDefinition: [ @@ -57,13 +58,13 @@ export default { }, timeout: { type: "string", - label: "Timeout", + label: "Timeout (seconds)", description: "Optional timeout for the run, in seconds. By default, the run uses a timeout specified in the default run configuration for the Actor.", optional: true, }, memory: { type: "string", - label: "Memory", + label: "Memory (MB)", description: "Memory limit for the run, in megabytes. The amount of memory can be set to a power of 2 with a minimum of 128. By default, the run uses a memory limit specified in the default run configuration for the Actor.", optional: true, }, @@ -81,7 +82,7 @@ export default { }, webhook: { type: "string", - label: "Webhook", + label: "Webhook URL", description: "Specifies optional webhook associated with the Actor run, which can be used to receive a notification e.g. when the Actor finished or failed.", optional: true, reloadProps: true, @@ -99,73 +100,83 @@ export default { : "string[]"; }, async getSchema(actorId, buildId) { - const { data: { inputSchema } } = await this.apify.getBuild(actorId, buildId); - return JSON.parse(inputSchema); + const build = await this.apify.getBuild(actorId, buildId); + if (!build) { + throw new Error(`No build found for actor ${actorId}`); + } + + // Case 1: schema is already an object + if (build.actorDefinition && build.actorDefinition.input) { + return build.actorDefinition.input; + } + + // Case 2: schema is a string in inputSchema + if (build.inputSchema) { + try { + return typeof build.inputSchema === "string" + ? JSON.parse(build.inputSchema) + : build.inputSchema; + } catch (err) { + throw new Error( + `Failed to parse inputSchema for actor ${actorId}: ${err.message}`, + ); + } + } + + // Case 3: no schema at all + throw new Error( + `No input schema found for actor ${actorId}. Has it been built successfully?`, + ); }, async prepareData(data) { const newData = {}; + const { properties } = await this.apify.getSchema(this.actorId, this.buildId); - const { properties } = await this.getSchema(this.actorId, this.buildId); for (const [ key, value, ] of Object.entries(data)) { - let editor; - - if (properties[key]) { - editor = properties[key].editor; - } else { - console.warn(`Property "${key}" is not defined in the schema`); - editor = "hidden"; // This will prevent it from showing up - } - - newData[key] = (Array.isArray(value)) + let editor = properties[key]?.editor || "hidden"; + newData[key] = Array.isArray(value) ? value.map((item) => this.setValue(editor, item)) : value; } return newData; }, prepareOptions(value) { - let options = []; if (value.enum && value.enumTitles) { - for (const [ - index, - val, - ] of value.enum.entries()) { - if (val) { - options.push({ - value: val, - label: value.enumTitles[index], - }); - } - } + return value.enum.map((val, i) => ({ + value: val, + label: value.enumTitles[i], + })); } - return options.length - ? options - : undefined; }, setValue(editor, item) { switch (editor) { - case "requestListSources" : return { - url: item, - }; - case "pseudoUrls" : return { - purl: item, - }; - case "globs" : return { - glob: item, - }; - default: return item; + case "requestListSources": + return { + url: item, + }; + case "pseudoUrls": + return { + purl: item, + }; + case "globs": + return { + glob: item, + }; + default: + return item; } }, }, async additionalProps() { const props = {}; - try { + const schema = await this.getSchema(this.actorId, this.buildId); const { properties, required: requiredProps = [], - } = await this.getSchema(this.actorId, this.buildId); + } = schema; for (const [ key, @@ -179,30 +190,36 @@ export default { description: value.description, optional: !requiredProps.includes(key), }; + const options = this.prepareOptions(value); if (options) props[key].options = options; + if (value.default) { props[key].description += ` Default: \`${JSON.stringify(value.default)}\``; - if (props[key].type !== "object") { // default values don't work properly for object props + if (props[key].type !== "object") { props[key].default = value.default; } } } - } catch { + } catch (e) { props.properties = { type: "object", label: "Properties", - description: "Properties to set for this actor", + description: e.message || "Schema not available, showing fallback.", }; } - if (this.runAsynchronously) { + + if (!this.runAsynchronously) { props.outputRecordKey = { type: "string", label: "Output Record Key", - description: "Key of the record from run's default key-value store to be returned in the response. By default, it is OUTPUT.", + description: + "Key of the record from the run's default key-value store to return. Default is `OUTPUT`.", optional: true, + default: "OUTPUT", }; } + if (this.webhook) { props.eventTypes = { type: "string[]", @@ -211,19 +228,14 @@ export default { options: Object.values(WEBHOOK_EVENT_TYPES), }; } + return props; }, async run({ $ }) { const { - getType, - getSchema, - prepareOptions, - setValue, - prepareData, apify, actorId, buildTag, - properties, runAsynchronously, outputRecordKey, timeout, @@ -235,36 +247,106 @@ export default { ...data } = this; - const fn = runAsynchronously - ? apify.runActorAsynchronously - : apify.runActor; - - const response = await fn({ + // --- Validation step --- + const actorDetails = await apify.getActor({ actorId, - data: properties - ? parseObject(properties) - : await prepareData(data), - params: { - outputRecordKey, - timeout, - memory, - maxItems, - maxTotalChargeUsd, - build: buildTag, - webhooks: webhook - ? btoa(JSON.stringify([ - { - eventTypes, - requestUrl: webhook, - }, - ])) - : undefined, - }, }); - const summary = this.runAsynchronously - ? `Successfully started Actor run with ID: ${response.data.id}` - : `Successfully ran Actor with ID: ${this.actorId}`; - $.export("$summary", `${summary}`); - return response; + + if (!actorDetails) { + throw new Error(`Actor with ID "${actorId}" does not exist.`); + } + + if (!actorDetails.stats?.totalBuilds || actorDetails.stats.totalBuilds === 0) { + throw new Error( + `Actor "${actorDetails.title || actorDetails.name}" has no builds. Please build it first before running.`, + ); + } + + if (buildTag) { + const taggedBuilds = actorDetails.taggedBuilds || {}; + if (!taggedBuilds[buildTag]) { + throw new Error( + `Build with tag "${buildTag}" was not found for actor "${actorDetails.title || actorDetails.name}".`, + ); + } + } + + // Prepare input + const input = this.properties + ? parseObject(this.properties) + : data; + + // Build params safely + const params = { + ...(buildTag && { + build: buildTag, + }), + ...(timeout && { + timeoutSecs: Number(timeout), + }), + ...(memory && { + memoryMbytes: Number(memory), + }), + ...(maxItems && { + maxItems: Number(maxItems), + }), + ...(maxTotalChargeUsd && { + maxTotalCostUsd: Number(maxTotalChargeUsd), + }), + ...(webhook && { + webhooks: [ + { + eventTypes, + requestUrl: webhook, + }, + ], + }), + }; + + let run; + + if (runAsynchronously) { + // async run + run = await apify.runActorAsynchronously({ + actorId, + data: input, + params, + }); + + $.export("$summary", `Successfully started Actor run with ID: ${run.id}`); + return run; + } else { + // sync run + run = await apify.runActor({ + actorId, + data: input, + params, + }); + + // Fetch OUTPUT record manually + let output; + if (run.defaultKeyValueStoreId) { + const record = await apify + ._client() + .keyValueStore(run.defaultKeyValueStoreId) + .getRecord(outputRecordKey); + + output = record?.value; + } + + $.export( + "$summary", + `The ran of an Actor with ID: ${actorId} has finished with status "${run.status}", retrieved ${ + output + ? "OUTPUT" + : "no output" + }`, + ); + + return { + run, + output, + }; + } }, }; diff --git a/components/apify/apify.app.mjs b/components/apify/apify.app.mjs index 9ff5154cb9dea..a36ab58ded2c6 100644 --- a/components/apify/apify.app.mjs +++ b/components/apify/apify.app.mjs @@ -178,16 +178,30 @@ export default { return this._client().actor(actorId) .get(); }, - getBuild(actorId, buildId) { - if (buildId) { - return this._client().build(buildId) + async getBuild(actorId, buildId) { + if (!buildId) { + // Get actor details + const actor = await this._client().actor(actorId) .get(); + + if (!actor) { + throw new Error(`Actor ${actorId} not found.`); + } + + // Ensure "latest" build tag exists + const latestBuild = actor.taggedBuilds?.latest; + if (!latestBuild) { + throw new Error( + `Actor ${actorId} has no build tagged "latest". Please build the actor first.`, + ); + } + + buildId = latestBuild.buildId; } - return this._client().actor(actorId) - .builds() - .list() - .then(({ items }) => items[0]); + // Fetch the build by ID + return this._client().build(buildId) + .get(); }, listActors(opts = {}) { return this._client().store() From c4a57a1d9607e79329d9d70c399f2de84ecdeb01 Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Thu, 21 Aug 2025 10:45:16 +0200 Subject: [PATCH 5/8] fix(general): change maxResults for limit as an input field --- .../actions/get-dataset-items/get-dataset-items.mjs | 10 +++++----- components/apify/apify.app.mjs | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/components/apify/actions/get-dataset-items/get-dataset-items.mjs b/components/apify/actions/get-dataset-items/get-dataset-items.mjs index eab1243482cfc..2b6403485579a 100644 --- a/components/apify/actions/get-dataset-items/get-dataset-items.mjs +++ b/components/apify/actions/get-dataset-items/get-dataset-items.mjs @@ -39,10 +39,10 @@ export default { "flatten", ], }, - maxResults: { + limit: { propDefinition: [ apify, - "maxResults", + "limit", ], }, }, @@ -65,15 +65,15 @@ export default { params, }); results.push(...items); - if (results.length >= this.maxResults) { + if (results.length >= this.limit) { break; } total = items?.length; params.offset += LIMIT; } while (total); - if (results.length > this.maxResults) { - results.length = this.maxResults; + if (results.length > this.limit) { + results.length = this.limit; } if (results.length > 0) { diff --git a/components/apify/apify.app.mjs b/components/apify/apify.app.mjs index a36ab58ded2c6..d17fbec4c2c44 100644 --- a/components/apify/apify.app.mjs +++ b/components/apify/apify.app.mjs @@ -121,9 +121,9 @@ export default { description: "An array of fields which should transform nested objects into flat structures. For example, with `flatten=\"foo\"` the object `{\"foo\":{\"bar\": \"hello\"}}` is turned into `{\"foo.bar\": \"hello\"}`", optional: true, }, - maxResults: { + limit: { type: "integer", - label: "Max Results", + label: "Limit", description: "The maximum number of items to return", default: LIMIT, optional: true, From 8b25cc9f559bd4a8c13e756bbdad8b669a0ff7af Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Thu, 21 Aug 2025 11:34:12 +0200 Subject: [PATCH 6/8] fix(run-task-sync): move items retrieval to the component, add waitSecs determined by input or plan to prevent blunt timeout error, have the item retrieval logic be connected to run status, clean return value --- .../run-task-synchronously.mjs | 85 ++++++++++++++++--- components/apify/apify.app.mjs | 8 +- 2 files changed, 77 insertions(+), 16 deletions(-) diff --git a/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs b/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs index cda8fd8dbc450..1714c5a7009e8 100644 --- a/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs +++ b/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs @@ -1,4 +1,7 @@ import apify from "../../apify.app.mjs"; +import { + ACTOR_JOB_STATUSES, ACTOR_JOB_TERMINAL_STATUSES, +} from "@apify/consts"; export default { key: "apify-run-task-synchronously", @@ -15,6 +18,15 @@ export default { ], description: "The ID of the task to run", }, + // Start task run options + paidPlan: { + type: "boolean", + label: "Paid plan", + description: "Indicates whether the current user is on a paid plan. Paid plans allow longer execution times for this step. If set to true but the user is actually on a Free plan, the step will fail.", + optional: false, + default: false, + reloadProps: true, + }, timeout: { type: "integer", label: "Timeout", @@ -33,6 +45,7 @@ export default { description: "Specifies the Actor build to run. It can be either a build tag or build number. By default, the run uses the build specified in the task settings (typically latest).", optional: true, }, + // Retrieve dataset output option clean: { propDefinition: [ apify, @@ -57,30 +70,82 @@ export default { "flatten", ], }, - maxResults: { + limit: { propDefinition: [ apify, - "maxResults", + "limit", ], }, }, + async additionalProps() { + const props = {}; + if (this.paidPlan) { + props.waitSecs = { + type: "integer", + label: "Waiting time (seconds)", + description: "Specifies how long to wait for the run to complete. If not set, the wait time defaults to the Pipedream’s platform limits (up to 300 seconds for the whole step execution).", + optional: true, + default: 280, + }; + } + return props; + }, async run({ $ }) { - const response = await this.apify.runTaskSynchronously({ + const { + status, + id, + actId, + startedAt, + finishedAt, + options: { build }, + buildId, + defaultKeyValueStoreId, + defaultDatasetId, + defaultRequestQueueId, + consoleUrl, + } = await this.apify.runTaskSynchronously({ taskId: this.taskId, params: { timeout: this.timeout, memory: this.memory, build: this.build, - clean: this.clean, - fields: this.fields && this.fields.join(), - omit: this.omit && this.omit.join(), - flatten: this.flatten && this.flatten.join(), - maxItems: this.maxResults, + waitSecs: this.waitSecs ?? 28, // up to 30 seconds for a whole step execution if not on a paid plan }, }); - $.export("$summary", `Successfully ran task with ID: ${this.taskId}`); + const datasetItems = []; + + if (ACTOR_JOB_TERMINAL_STATUSES.includes(status)) { + if (status !== ACTOR_JOB_STATUSES.SUCCEEDED) { + throw new Error(`Run has finished with status: ${status}. Inspect it here: ${consoleUrl}`); + } + const { items } = await this.apify.listDatasetItems({ + datasetId: defaultDatasetId, + params: { + clean: this.clean, + fields: this.fields && this.fields.join(), + omit: this.omit && this.omit.join(), + flatten: this.flatten && this.flatten.join(), + limit: this.limit, + }, + }); + datasetItems.push(...items); + } else { + throw new Error(`The run did not finish in time (${status}): waiting for run to finish timed out or Pipedream platform limitation were reached. To retrieve the items reliably, chain this step with a Get Dataset Items step.`); + } - return response; + $.export("$summary", `Run with task id ${this.taskId} finished successfully. Retrieved ${datasetItems.length} dataset items.`); + return { + runId: id, + actId, + startedAt, + finishedAt, + build, + buildId, + defaultKeyValueStoreId, + defaultDatasetId, + defaultRequestQueueId, + datasetItems, + }; }, }; diff --git a/components/apify/apify.app.mjs b/components/apify/apify.app.mjs index d17fbec4c2c44..af8bb267037c3 100644 --- a/components/apify/apify.app.mjs +++ b/components/apify/apify.app.mjs @@ -239,15 +239,11 @@ export default { return this._client().dataset(datasetId) .listItems(params); }, - async runTaskSynchronously({ + runTaskSynchronously({ taskId, params, }) { - const run = await this._client().task(taskId) + return this._client().task(taskId) .call({}, params); - - return this.listDatasetItems({ - datasetId: run.defaultDatasetId, - }); }, setKeyValueStoreRecord({ storeId, recordKey, data, From f7847fefe01dc7ea8ffdbffc7da367f23cdeff3b Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Mon, 25 Aug 2025 16:05:54 +0200 Subject: [PATCH 7/8] fix(apify-scrape-single-url): incorporate timeouts, rework the whole API interaction logic --- .../run-task-synchronously.mjs | 14 +- .../scrape-single-url/scrape-single-url.mjs | 142 +++++++++++++----- components/apify/apify.app.mjs | 23 ++- components/apify/common/constants.mjs | 2 +- pnpm-lock.yaml | 106 ++++++++++--- 5 files changed, 215 insertions(+), 72 deletions(-) diff --git a/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs b/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs index 1714c5a7009e8..d69161b78a400 100644 --- a/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs +++ b/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs @@ -20,12 +20,10 @@ export default { }, // Start task run options paidPlan: { - type: "boolean", - label: "Paid plan", - description: "Indicates whether the current user is on a paid plan. Paid plans allow longer execution times for this step. If set to true but the user is actually on a Free plan, the step will fail.", - optional: false, - default: false, - reloadProps: true, + propDefinition: [ + apify, + "paidPlan", + ], }, timeout: { type: "integer", @@ -85,7 +83,7 @@ export default { label: "Waiting time (seconds)", description: "Specifies how long to wait for the run to complete. If not set, the wait time defaults to the Pipedream’s platform limits (up to 300 seconds for the whole step execution).", optional: true, - default: 280, + default: this.apify.getRequestTimeout(true), }; } return props; @@ -109,7 +107,7 @@ export default { timeout: this.timeout, memory: this.memory, build: this.build, - waitSecs: this.waitSecs ?? 28, // up to 30 seconds for a whole step execution if not on a paid plan + waitSecs: this.waitSecs ?? this.apify.getRequestTimeout(), }, }); diff --git a/components/apify/actions/scrape-single-url/scrape-single-url.mjs b/components/apify/actions/scrape-single-url/scrape-single-url.mjs index 02545e2753e5e..342e121981307 100644 --- a/components/apify/actions/scrape-single-url/scrape-single-url.mjs +++ b/components/apify/actions/scrape-single-url/scrape-single-url.mjs @@ -1,5 +1,5 @@ import apify from "../../apify.app.mjs"; -import { ACTOR_ID } from "../../common/constants.mjs"; +import { WCC_ACTOR_ID } from "../../common/constants.mjs"; import { ACTOR_JOB_STATUSES, ACTOR_JOB_TERMINAL_STATUSES, } from "@apify/consts"; @@ -12,6 +12,12 @@ export default { type: "action", props: { apify, + paidPlan: { + propDefinition: [ + apify, + "paidPlan", + ], + }, url: { type: "string", label: "URL", @@ -35,14 +41,72 @@ export default { label: "Raw HTTP client (Cheerio) - Extremely fast, but cannot handle dynamic content", value: "cheerio", }, + { + label: "The crawler automatically switches between raw HTTP for static pages and Chrome browser (via Playwright) for dynamic pages, to get the maximum performance wherever possible.", + value: "playwright:adaptive", + }, + ], + }, + clean: { + propDefinition: [ + apify, + "clean", + ], + }, + fields: { + propDefinition: [ + apify, + "fields", + ], + }, + omit: { + propDefinition: [ + apify, + "omit", + ], + }, + flatten: { + propDefinition: [ + apify, + "flatten", + ], + }, + limit: { + propDefinition: [ + apify, + "limit", ], }, }, + async additionalProps() { + const props = {}; + if (this.paidPlan) { + props.waitSecs = { + type: "integer", + label: "Waiting time (seconds)", + description: "Specifies how long to wait for the run to complete. If not set, the wait time defaults to the Pipedream’s platform limits (up to 300 seconds for the whole step execution).", + optional: true, + default: this.apify.getRequestTimeout(true), + }; + } + return props; + }, async run({ $ }) { - const startActorResponse = await this.apify.runActorAsynchronously({ - $, - actorId: ACTOR_ID, - data: { + const { + status, + id, + actId, + startedAt, + finishedAt, + options: { build }, + buildId, + defaultKeyValueStoreId, + defaultDatasetId, + defaultRequestQueueId, + consoleUrl, + } = await this.apify.runActor({ + actorId: WCC_ACTOR_ID, + input: { crawlerType: this.crawlerType, maxCrawlDepth: 0, maxCrawlPages: 1, @@ -53,46 +117,44 @@ export default { }, ], }, - }); - - const { - data: { - id: runId, defaultDatasetId, + options: { + waitSecs: this.apify.getRequestTimeout(this.waitSecs), }, - } = startActorResponse; + }); - let actorRunStatus = null; - let retries = 0; - const maxRetries = 30; - const delay = 1000; + const datasetItems = []; - while ((!actorRunStatus || !ACTOR_JOB_TERMINAL_STATUSES.includes(actorRunStatus)) - && retries < maxRetries - ) { - await new Promise((resolve) => setTimeout(resolve, delay)); - const runDetails = await this.apify.getActorRun({ - runId, + if (ACTOR_JOB_TERMINAL_STATUSES.includes(status)) { + if (status !== ACTOR_JOB_STATUSES.SUCCEEDED) { + throw new Error(`Run has finished with status: ${status}. Inspect it here: ${consoleUrl}`); + } + const { items } = await this.apify.listDatasetItems({ + datasetId: defaultDatasetId, + params: { + clean: this.clean, + fields: this.fields && this.fields.join(), + omit: this.omit && this.omit.join(), + flatten: this.flatten && this.flatten.join(), + limit: this.limit, + }, }); - actorRunStatus = runDetails.status; - retries++; - } - - if (actorRunStatus !== ACTOR_JOB_STATUSES.SUCCEEDED) { - throw new Error(`Actor run did not succeed. Final status: ${actorRunStatus}`); + datasetItems.push(...items); + } else { + throw new Error(`The run did not finish in time (${status}): waiting for run to finish timed out or Pipedream platform limitation were reached. To retrieve the items reliably, chain this step with a Get Dataset Items step.`); } - const { items } = await this.apify.listDatasetItems({ - $, - datasetId: defaultDatasetId, - params: { - limit: 1, - offset: 0, - }, - }); - - console.log(items); - - $.export("$summary", `Successfully scraped content from ${this.url}`); - return items[0]; + $.export("$summary", "Run of Web Content Crawler finished successfully."); + return { + runId: id, + actId, + startedAt, + finishedAt, + build, + buildId, + defaultKeyValueStoreId, + defaultDatasetId, + defaultRequestQueueId, + datasetItems, + }; }, }; diff --git a/components/apify/apify.app.mjs b/components/apify/apify.app.mjs index af8bb267037c3..6026c3e5e38d3 100644 --- a/components/apify/apify.app.mjs +++ b/components/apify/apify.app.mjs @@ -128,6 +128,15 @@ export default { default: LIMIT, optional: true, }, + // Start task run options + paidPlan: { + type: "boolean", + label: "Paid plan", + description: "Indicates whether the current user is on a paid plan. Paid plans allow longer execution times for this step. If set to true but the user is actually on a Free plan, the step will fail.", + optional: false, + default: false, + reloadProps: true, + }, }, methods: { _client() { @@ -153,10 +162,10 @@ export default { .delete(); }, runActor({ - actorId, data, params, + actorId, input, options, }) { return this._client().actor(actorId) - .call(data, params); + .call(input, options); }, getActorRun({ runId }) { return this._client().run(runId) @@ -259,5 +268,15 @@ export default { } return `${username}/${name}`; }, + /* + @returns Maximum request duration limit + @description Individual steps execution can take up to 30 seconds, + if the user is not on a paid plan, otherwise timeout it is set to 300 seconds. + */ + getRequestTimeout(isPaidPlan = false) { + return isPaidPlan + ? 298 + : 28; + }, }, }; diff --git a/components/apify/common/constants.mjs b/components/apify/common/constants.mjs index a16d51a32af1a..fdf2a04b3a3c2 100644 --- a/components/apify/common/constants.mjs +++ b/components/apify/common/constants.mjs @@ -1,2 +1,2 @@ -export const ACTOR_ID = "aYG0l9s7dbB7j3gbS"; +export const WCC_ACTOR_ID = "aYG0l9s7dbB7j3gbS"; export const LIMIT = 100; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ea66ec975f858..fc175238cb065 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -935,8 +935,11 @@ importers: specifier: ^2.41.0 version: 2.43.0 '@pipedream/platform': - specifier: ^3.0.3 - version: 3.0.3 + specifier: ^3.1.0 + version: 3.1.0 + apify-client: + specifier: ^2.12.6 + version: 2.15.0 components/apilio: {} @@ -1078,7 +1081,10 @@ importers: version: 1.6.6 components/asters: - specifiers: {} + dependencies: + '@pipedream/platform': + specifier: ^3.1.0 + version: 3.1.0 components/astica_ai: dependencies: @@ -15998,14 +16004,6 @@ importers: specifier: ^6.0.0 version: 6.2.0 - modelcontextprotocol/node_modules2/@modelcontextprotocol/sdk/dist/cjs: {} - - modelcontextprotocol/node_modules2/@modelcontextprotocol/sdk/dist/esm: {} - - modelcontextprotocol/node_modules2/zod-to-json-schema/dist/cjs: {} - - modelcontextprotocol/node_modules2/zod-to-json-schema/dist/esm: {} - packages/ai: dependencies: '@pipedream/sdk': @@ -16348,6 +16346,15 @@ packages: '@apify/consts@2.43.0': resolution: {integrity: sha512-6bhzXeftGa+MrO0XwHLLBJyP9Vc2gZXsbk3d8rcDmiEMJwChA+Qw1WD6BWI9zVazPeXb2OrjzOwfe05f59RD4g==} + '@apify/consts@2.44.1': + resolution: {integrity: sha512-d3/IGuJRLtqDizBA0ZWKrH+U9/gt9k7A9bJE64KKsTi58WfkL7MTsmWf/XBsr1wrju+eAFiZPMwFicoyLlLDug==} + + '@apify/log@2.5.22': + resolution: {integrity: sha512-2b5bYTgHHnl36LE6JjtEc1QoieiwzKzDVajdTyuQTlgJP06SRGL9lDHqMn/unpdYE0ViZgLfWvLMQ3uBen+dog==} + + '@apify/utilities@2.18.2': + resolution: {integrity: sha512-sLB1qEum2fs+odgTJ+x11R/QCYp/zVX/CYLhst9oub7mfo1geDvioOtZjljptS7aQLyqtco3hgn+Co3rtrIC1w==} + '@apimatic/schema@0.6.0': resolution: {integrity: sha512-JgG32LQRLphHRWsn64vIt7wD2m+JH46swM6ZrY7g1rdiGiKV5m+A+TBrJKoUUQRmS14azMgePNZY30NauWqzLg==} engines: {node: '>=10.4.0'} @@ -17578,6 +17585,10 @@ packages: '@corex/deepmerge@4.0.43': resolution: {integrity: sha512-N8uEMrMPL0cu/bdboEWpQYb/0i2K5Qn8eCsxzOmxSggJbbQte7ljMRoXm917AbntqTGOzdTu+vP3KOOzoC70HQ==} + '@crawlee/types@3.14.1': + resolution: {integrity: sha512-IGO1krH5MdlINDwfESwMtYK/fyxeWoN5E9wCpabHvIYabUd6eijcR9mKzercrtMXxNG5RJUt8StSSeue1B0G7g==} + engines: {node: '>=16.0.0'} + '@csstools/css-parser-algorithms@3.0.4': resolution: {integrity: sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A==} engines: {node: '>=18'} @@ -17626,12 +17637,12 @@ packages: '@datadog/sketches-js@2.1.1': resolution: {integrity: sha512-d5RjycE+MObE/hU+8OM5Zp4VjTwiPLRa8299fj7muOmR16fb942z8byoMbCErnGh0lBevvgkGrLclQDvINbIyg==} - '@definitelytyped/header-parser@0.2.19': - resolution: {integrity: sha512-zu+RxQpUCgorYUQZoyyrRIn9CljL1CeM4qak3NDeMO1r7tjAkodfpAGnVzx/6JR2OUk0tAgwmZxNMSwd9LVgxw==} + '@definitelytyped/header-parser@0.2.20': + resolution: {integrity: sha512-97YPAlUo8XjWNtZ+6k+My+50/ljE2iX6KEPjOZ1Az1RsZdKwJ6taAX3F5g6SY1SJr50bzdm2RZzyQNdRmHcs4w==} engines: {node: '>=18.18.0'} - '@definitelytyped/typescript-versions@0.1.8': - resolution: {integrity: sha512-iz6q9aTwWW7CzN2g8jFQfZ955D63LA+wdIAKz4+2pCc/7kokmEHie1/jVWSczqLFOlmH+69bWQxIurryBP/sig==} + '@definitelytyped/typescript-versions@0.1.9': + resolution: {integrity: sha512-Qjalw9eNlcTjXhzx0Q6kHKuRCOUt/M5RGGRGKsiYlm/nveGvPX9liZSQlGXZVwyQ5I9qvq/GdaWiPchQ+ZXOrQ==} engines: {node: '>=18.18.0'} '@definitelytyped/utils@0.1.8': @@ -21953,6 +21964,9 @@ packages: resolution: {integrity: sha512-RbqDVdRVBd3Y/M+iAkFj4IqHhBR86FoyfcRkRs77qYQW9nL+tBC+aPkgKtlhirMHjoCmNrxnh0CNhCTqFq4PSg==} engines: {node: ^12 || ^14 || ^16} + apify-client@2.15.0: + resolution: {integrity: sha512-c5gtl6dRhY9r7bMsCkmp1dbE8ZEq4CWB2otRtVFqN4rm01SflXRLFuqXvsLqXQ3JCUlVnpMPsqM3XmHGzcbfSQ==} + aproba@1.2.0: resolution: {integrity: sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==} @@ -27967,6 +27981,10 @@ packages: resolution: {integrity: sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g==} engines: {node: '>=0.10.0'} + ow@0.28.2: + resolution: {integrity: sha512-dD4UpyBh/9m4X2NVjA+73/ZPBRF+uF4zIMFvvQsabMiEK8x41L3rQ8EENOi35kyyoaJwNxEeJcP6Fj1H4U409Q==} + engines: {node: '>=12'} + p-cancelable@2.1.1: resolution: {integrity: sha512-BZOr3nRQHOntUjTrH8+Lh54smKHoHyur8We1V8DSMVrl5A2malOOwuJRnKRDjSnkoeBh4at6BwEnb5I7Jl31wg==} engines: {node: '>=8'} @@ -30831,6 +30849,10 @@ packages: resolution: {integrity: sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==} engines: {node: '>=10.12.0'} + vali-date@1.0.0: + resolution: {integrity: sha512-sgECfZthyaCKW10N0fm27cg8HYTFK5qMWgypqkXMQ4Wbl/zZKx7xZICgcoxIIE+WFAP/MBL2EFwC/YvLxw3Zeg==} + engines: {node: '>=0.10.0'} + valid-data-url@4.0.1: resolution: {integrity: sha512-t0oA6VCnlQ/MPKP/Ie9ZD3biEpB2JTxK1Hx4KC72RbhubL9HsXznoBn228UQTazL7cPvsY36bhzt3fk424TjyA==} engines: {node: '>=10'} @@ -31539,6 +31561,18 @@ snapshots: '@apify/consts@2.43.0': {} + '@apify/consts@2.44.1': {} + + '@apify/log@2.5.22': + dependencies: + '@apify/consts': 2.44.1 + ansi-colors: 4.1.3 + + '@apify/utilities@2.18.2': + dependencies: + '@apify/consts': 2.44.1 + '@apify/log': 2.5.22 + '@apimatic/schema@0.6.0': dependencies: tslib: 2.8.1 @@ -34805,6 +34839,10 @@ snapshots: '@corex/deepmerge@4.0.43': {} + '@crawlee/types@3.14.1': + dependencies: + tslib: 2.8.1 + '@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3)': dependencies: '@csstools/css-tokenizer': 3.0.3 @@ -34854,13 +34892,13 @@ snapshots: '@datadog/sketches-js@2.1.1': {} - '@definitelytyped/header-parser@0.2.19': + '@definitelytyped/header-parser@0.2.20': dependencies: - '@definitelytyped/typescript-versions': 0.1.8 + '@definitelytyped/typescript-versions': 0.1.9 '@definitelytyped/utils': 0.1.8 semver: 7.7.2 - '@definitelytyped/typescript-versions@0.1.8': {} + '@definitelytyped/typescript-versions@0.1.9': {} '@definitelytyped/utils@0.1.8': dependencies: @@ -40100,6 +40138,22 @@ snapshots: - encoding - openapi-types + apify-client@2.15.0: + dependencies: + '@apify/consts': 2.43.0 + '@apify/log': 2.5.22 + '@apify/utilities': 2.18.2 + '@crawlee/types': 3.14.1 + agentkeepalive: 4.5.0 + async-retry: 1.3.3 + axios: 1.10.0 + content-type: 1.0.5 + ow: 0.28.2 + tslib: 2.8.1 + type-fest: 4.27.0 + transitivePeerDependencies: + - debug + aproba@1.2.0: optional: true @@ -42208,7 +42262,7 @@ snapshots: dts-critic@3.3.11(typescript@5.7.2): dependencies: - '@definitelytyped/header-parser': 0.2.19 + '@definitelytyped/header-parser': 0.2.20 command-exists: 1.2.9 rimraf: 3.0.2 semver: 6.3.1 @@ -42220,8 +42274,8 @@ snapshots: dtslint@4.2.1(typescript@5.7.2): dependencies: - '@definitelytyped/header-parser': 0.2.19 - '@definitelytyped/typescript-versions': 0.1.8 + '@definitelytyped/header-parser': 0.2.20 + '@definitelytyped/typescript-versions': 0.1.9 '@definitelytyped/utils': 0.1.8 dts-critic: 3.3.11(typescript@5.7.2) fs-extra: 6.0.1 @@ -48137,6 +48191,14 @@ snapshots: os-tmpdir@1.0.2: {} + ow@0.28.2: + dependencies: + '@sindresorhus/is': 4.6.0 + callsites: 3.1.0 + dot-prop: 6.0.1 + lodash.isequal: 4.5.0 + vali-date: 1.0.0 + p-cancelable@2.1.1: {} p-cancelable@3.0.0: {} @@ -52045,6 +52107,8 @@ snapshots: '@types/istanbul-lib-coverage': 2.0.6 convert-source-map: 2.0.0 + vali-date@1.0.0: {} + valid-data-url@4.0.1: {} validate-npm-package-license@3.0.4: From c17fe73e515f79d4b0f86d47b5104a154a7b98c0 Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Mon, 25 Aug 2025 18:18:50 +0200 Subject: [PATCH 8/8] fix(apify-set-key-value-store-record): detection of content type, fixed API interaction --- .../set-key-value-store-record.mjs | 96 +++++++++++++++++-- components/apify/apify.app.mjs | 7 +- 2 files changed, 91 insertions(+), 12 deletions(-) diff --git a/components/apify/actions/set-key-value-store-record/set-key-value-store-record.mjs b/components/apify/actions/set-key-value-store-record/set-key-value-store-record.mjs index cea0a30161568..f73670e316cfe 100644 --- a/components/apify/actions/set-key-value-store-record/set-key-value-store-record.mjs +++ b/components/apify/actions/set-key-value-store-record/set-key-value-store-record.mjs @@ -1,11 +1,11 @@ import apify from "../../apify.app.mjs"; -import { parseObject } from "../../common/utils.mjs"; export default { key: "apify-set-key-value-store-record", name: "Set Key-Value Store Record", - description: "Create or update a record in the key-value store of Apify. [See the documentation](https://docs.apify.com/api/v2#/reference/key-value-stores/record-collection/put-record)", - version: "0.0.4", + description: + "Create or update a record in an Apify Key-Value Store. Supports strings, numbers, booleans, null, arrays, and objects. Automatically infers content type (JSON vs. plain text).", + version: "0.2.0", type: "action", props: { apify, @@ -14,25 +14,101 @@ export default { apify, "keyValueStoreId", ], + optional: false, }, key: { type: "string", label: "Key", - description: "The key of the record to create or update in the key-value store.", + description: "The key of the record to create or update.", + optional: false, }, value: { - type: "object", + type: "any", label: "Value", - description: "The value of the record to create or update in the key-value store.", + description: + "String, number, boolean, null, array, or object. Strings that are valid JSON will be stored as JSON; otherwise as plain text.", + optional: false, + }, + }, + methods: { + inferFromValue(input) { + // Returns { data, contentType, mode } + if ( + input === null || + typeof input === "number" || + typeof input === "boolean" || + Array.isArray(input) || + (typeof input === "object") + ) { + return { + data: input, + contentType: "application/json; charset=utf-8", + mode: "json", + }; + } + if (typeof input === "string") { + const trimmed = input.trim(); + // Try to parse as JSON if it looks plausible + if (this.looksLikeJson(trimmed)) { + try { + const parsed = JSON.parse(trimmed); + return { + data: parsed, + contentType: "application/json; charset=utf-8", + mode: "json-from-string", + }; + } catch { + // fall back to text/plain + return { + data: trimmed, + contentType: "text/plain; charset=utf-8", + mode: "plain-text", + }; + } + } + } + // Fallback: coerce to string as text/plain + return { + data: String(input ?? ""), + contentType: "text/plain; charset=utf-8", + mode: "coerced-text", + }; + }, + looksLikeJson(string) { + if (!string) return false; + const firstChar = string[0]; + const lastChar = string[string.length - 1]; + if ((firstChar === "{" && lastChar === "}") || (firstChar === "[" && lastChar === "]")) return true; + if (string === "null" || string === "true" || string === "false") return true; + if (firstChar === "\"" && lastChar === "\"") return true; + return !Number.isNaN(Number(string)); + }, }, async run({ $ }) { + const { + data, contentType, mode, + } = this.inferFromValue(this.value); + const response = await this.apify.setKeyValueStoreRecord({ storeId: this.keyValueStoreId, - recordKey: this.key, - data: parseObject(this.value), + key: this.key, + value: data, + contentType, }); - $.export("$summary", `Successfully set the record with key '${this.key}'`); - return response; + + $.export( + "$summary", + `Set record '${this.key}' as ${mode} (${contentType}) in store '${this.keyValueStoreId}'.`, + ); + + return { + success: true, + storeId: this.keyValueStoreId, + key: this.key, + mode, + usedContentType: contentType, + apifyResponse: response, + }; }, }; diff --git a/components/apify/apify.app.mjs b/components/apify/apify.app.mjs index 6026c3e5e38d3..517cc79ce6808 100644 --- a/components/apify/apify.app.mjs +++ b/components/apify/apify.app.mjs @@ -255,10 +255,13 @@ export default { .call({}, params); }, setKeyValueStoreRecord({ - storeId, recordKey, data, + storeId, key, value, }) { return this._client().keyValueStore(storeId) - .setRecord(recordKey, data); + .setRecord({ + key, + value, + }); }, formatActorOrTaskLabel({ title, username, name,