From 46048bc94d43696277f9575a8be027c1bef60625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 28 Mar 2025 12:42:25 +0100 Subject: [PATCH] feat(scrapeURL): return js returns from f-e (FIR-1535) (#1385) * feat(scrapeURL): return js returns from f-e * feat(js-sdk): handle new results --- apps/api/src/controllers/v1/types.ts | 4 +++ .../engines/fire-engine/checkStatus.ts | 32 ++++++++++++++++++- .../scrapeURL/engines/fire-engine/index.ts | 1 + .../src/scraper/scrapeURL/engines/index.ts | 4 +++ apps/js-sdk/firecrawl/src/index.ts | 8 +++++ 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 9d9109fb..459e5e56 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -541,6 +541,10 @@ export type Document = { actions?: { screenshots?: string[]; scrapes?: ScrapeActionContent[]; + javascriptReturns?: { + type: string, + value: unknown + }[]; }; metadata: { title?: string; diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts index ac6fabfd..47322ef0 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts @@ -42,6 +42,36 @@ const successSchema = z.object({ }) .array() .optional(), + actionResults: z.union([ + z.object({ + idx: z.number(), + type: z.literal("screenshot"), + result: z.object({ + path: z.string(), + }), + }), + z.object({ + idx: z.number(), + type: z.literal("scrape"), + result: z.union([ + z.object({ + url: z.string(), + html: z.string(), + }), + z.object({ + url: z.string(), + accessibility: z.string(), + }), + ]), + }), + z.object({ + idx: z.number(), + type: z.literal("executeJavascript"), + result: z.object({ + return: z.string(), + }), + }), + ]).array().optional(), // chrome-cdp only -- file download handler file: z @@ -138,7 +168,7 @@ export async function fireEngineCheckStatus( } else if ( typeof status.error === "string" && // TODO: improve this later - status.error.includes("Element") + (status.error.includes("Element") || status.error.includes("Javascript execution failed")) ) { throw new ActionError(status.error.split("Error: ")[1]); } else { diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts index c21d9f90..93596fce 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts @@ -274,6 +274,7 @@ export async function scrapeURLWithFireEngineChromeCDP( actions: { screenshots: response.screenshots ?? [], scrapes: response.actionContent ?? [], + javascriptReturns: (response.actionResults ?? []).filter(x => x.type === "executeJavascript").map(x => JSON.parse((x.result as any as { return: string }).return)), }, } : {}), diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index 0a688fc6..ab2fe79b 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -103,6 +103,10 @@ export type EngineScrapeResult = { actions?: { screenshots: string[]; scrapes: ScrapeActionContent[]; + javascriptReturns: { + type: string; + value: unknown + }[]; }; }; diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 5cc9d119..11fb8d74 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -141,6 +141,14 @@ export interface ScrapeParams