mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-10 11:48:59 +08:00
feat(actions): add typeText, pressKey, fix playwright screenshot/waitFor
This commit is contained in:
parent
01f42b980d
commit
3dd912ec91
@ -70,6 +70,14 @@ export const actionsSchema = z.array(z.union([
|
||||
type: z.literal("screenshot"),
|
||||
fullPage: z.boolean().default(false),
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal("typeText"),
|
||||
text: z.string(),
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal("pressKey"),
|
||||
key: z.string(),
|
||||
}),
|
||||
]));
|
||||
|
||||
export const scrapeOptions = z.object({
|
||||
|
@ -19,6 +19,12 @@ export type Action = {
|
||||
} | {
|
||||
type: "screenshot",
|
||||
fullPage?: boolean,
|
||||
} | {
|
||||
type: "typeText",
|
||||
text: string,
|
||||
} | {
|
||||
type: "pressKey",
|
||||
key: string,
|
||||
};
|
||||
|
||||
export type PageOptions = {
|
||||
|
@ -21,6 +21,9 @@ import * as Sentry from "@sentry/node";
|
||||
export async function scrapWithFireEngine({
|
||||
url,
|
||||
actions,
|
||||
waitFor = 0,
|
||||
screenshot = false,
|
||||
fullPageScreenshot = false,
|
||||
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
|
||||
fireEngineOptions = {},
|
||||
headers,
|
||||
@ -30,6 +33,9 @@ export async function scrapWithFireEngine({
|
||||
}: {
|
||||
url: string;
|
||||
actions?: Action[];
|
||||
waitFor?: number;
|
||||
screenshot?: boolean;
|
||||
fullPageScreenshot?: boolean;
|
||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
|
||||
fireEngineOptions?: FireEngineOptions;
|
||||
headers?: Record<string, string>;
|
||||
@ -50,7 +56,10 @@ export async function scrapWithFireEngine({
|
||||
|
||||
try {
|
||||
const reqParams = await generateRequestParams(url);
|
||||
let waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
|
||||
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
||||
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
|
||||
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
||||
|
||||
|
||||
@ -95,6 +104,9 @@ export async function scrapWithFireEngine({
|
||||
{
|
||||
url: url,
|
||||
headers: headers,
|
||||
wait: waitParam,
|
||||
screenshot: screenshotParam,
|
||||
fullPageScreenshot: fullPageScreenshotParam,
|
||||
disableJsDom: pageOptions?.disableJsDom ?? false,
|
||||
priority,
|
||||
engine,
|
||||
|
@ -204,17 +204,23 @@ export async function scrapSingleUrl(
|
||||
if (process.env.FIRE_ENGINE_BETA_URL) {
|
||||
const response = await scrapWithFireEngine({
|
||||
url,
|
||||
actions: [
|
||||
...(pageOptions.waitFor ? [{
|
||||
type: "wait" as const,
|
||||
milliseconds: pageOptions.waitFor,
|
||||
}] : []),
|
||||
...((pageOptions.screenshot || pageOptions.fullPageScreenshot) ? [{
|
||||
type: "screenshot" as const,
|
||||
fullPage: !!pageOptions.fullPageScreenshot,
|
||||
}] : []),
|
||||
...(pageOptions.actions ?? []),
|
||||
],
|
||||
...(engine === "chrome-cdp" ? ({
|
||||
actions: [
|
||||
...(pageOptions.waitFor ? [{
|
||||
type: "wait" as const,
|
||||
milliseconds: pageOptions.waitFor,
|
||||
}] : []),
|
||||
...((pageOptions.screenshot || pageOptions.fullPageScreenshot) ? [{
|
||||
type: "screenshot" as const,
|
||||
fullPage: !!pageOptions.fullPageScreenshot,
|
||||
}] : []),
|
||||
...(pageOptions.actions ?? []),
|
||||
],
|
||||
}) : ({
|
||||
waitFor: pageOptions.waitFor,
|
||||
screenshot: pageOptions.screenshot,
|
||||
fullPageScreenshot: pageOptions.fullPageScreenshot,
|
||||
})),
|
||||
pageOptions: pageOptions,
|
||||
headers: pageOptions.headers,
|
||||
fireEngineOptions: {
|
||||
|
@ -93,7 +93,13 @@ export type Action = {
|
||||
} | {
|
||||
type: "screenshot",
|
||||
fullPage?: boolean,
|
||||
};
|
||||
} | {
|
||||
type: "typeText",
|
||||
text: string,
|
||||
} | {
|
||||
type: "pressKey",
|
||||
key: string,
|
||||
};;
|
||||
|
||||
export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
|
||||
extract?: {
|
||||
|
Loading…
x
Reference in New Issue
Block a user