diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 633bbdf1..45aa3c9b 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -61,8 +61,14 @@ export type ExtractOptions = z.infer; export const actionsSchema = z.array(z.union([ z.object({ type: z.literal("wait"), - milliseconds: z.number().int().positive().finite(), - }), + milliseconds: z.number().int().positive().finite().optional(), + selector: z.string().optional(), + }).refine( + (data) => (data.milliseconds !== undefined || data.selector !== undefined) && !(data.milliseconds !== undefined && data.selector !== undefined), + { + message: "Either 'milliseconds' or 'selector' must be provided, but not both.", + } + ), z.object({ type: z.literal("click"), selector: z.string(), @@ -83,6 +89,9 @@ export const actionsSchema = z.array(z.union([ type: z.literal("scroll"), direction: z.enum(["up", "down"]), }), + z.object({ + type: z.literal("scrape"), + }), ])); export const scrapeOptions = z.object({ diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 8aa1d004..81bca571 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -12,7 +12,8 @@ export interface Progress { export type Action = { type: "wait", - milliseconds: number, + milliseconds?: number, + selector?: string, } | { type: "click", selector: string, @@ -28,7 +29,9 @@ export type Action = { } | { type: "scroll", direction: "up" | "down" -}; +} | { + type: "scrape", +} export type PageOptions = { includeMarkdown?: boolean; @@ -163,11 +166,17 @@ export class SearchResult { } } +export interface ScrapeActionContent { + url: string; + html: string; +} + export interface FireEngineResponse { html: string; screenshots?: string[]; pageStatusCode?: number; pageError?: string; + scrapeActionContent?: ScrapeActionContent[]; } diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 3bbd74eb..7332874f 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -200,11 +200,13 @@ export async function scrapWithFireEngine({ logParams.html = data.content ?? ""; logParams.response_code = data.pageStatusCode; logParams.error_message = data.pageError ?? data.error; + return { html: data.content ?? "", screenshots: data.screenshots ?? [data.screenshot] ?? [], pageStatusCode: data.pageStatusCode, pageError: data.pageError ?? data.error, + scrapeActionContent: data?.actionContent ?? [], }; } } catch (error) { diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index c7185b79..611a7b5c 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -21,6 +21,7 @@ import { extractLinks } from "./utils/utils"; import { Logger } from "../../lib/logger"; import { ScrapeEvents } from "../../lib/scrape-events"; import { clientSideError } from "../../strings"; +import { ScrapeActionContent } from "../../lib/entities"; dotenv.config(); @@ -180,7 +181,8 @@ export async function scrapSingleUrl( text: string; screenshot: string; actions?: { - screenshots: string[]; + screenshots?: string[]; + scrapes?: ScrapeActionContent[]; }; metadata: { pageStatusCode?: number; pageError?: string | null }; } = { text: "", screenshot: "", metadata: {} }; @@ -259,6 +261,7 @@ export async function scrapSingleUrl( if (pageOptions.actions) { scraperResponse.actions = { screenshots: response.screenshots ?? [], + scrapes: response.scrapeActionContent ?? [], }; } scraperResponse.metadata.pageStatusCode = response.pageStatusCode; diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index b8738e5e..9b23077a 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.7.2", + "version": "1.7.3", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index bbe934fe..7ad5a5f0 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -90,7 +90,8 @@ export interface CrawlScrapeOptions { export type Action = { type: "wait", - milliseconds: number, + milliseconds?: number, + selector?: string, } | { type: "click", selector: string, @@ -106,6 +107,8 @@ export type Action = { } | { type: "scroll", direction: "up" | "down", +} | { + type: "scrape", }; export interface ScrapeParams extends CrawlScrapeOptions {