mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 17:48:59 +08:00
feat(actions): add typeText, pressKey, fix playwright screenshot/waitFor
This commit is contained in:
parent
01f42b980d
commit
3dd912ec91
@ -70,6 +70,14 @@ export const actionsSchema = z.array(z.union([
|
|||||||
type: z.literal("screenshot"),
|
type: z.literal("screenshot"),
|
||||||
fullPage: z.boolean().default(false),
|
fullPage: z.boolean().default(false),
|
||||||
}),
|
}),
|
||||||
|
z.object({
|
||||||
|
type: z.literal("typeText"),
|
||||||
|
text: z.string(),
|
||||||
|
}),
|
||||||
|
z.object({
|
||||||
|
type: z.literal("pressKey"),
|
||||||
|
key: z.string(),
|
||||||
|
}),
|
||||||
]));
|
]));
|
||||||
|
|
||||||
export const scrapeOptions = z.object({
|
export const scrapeOptions = z.object({
|
||||||
|
@ -19,6 +19,12 @@ export type Action = {
|
|||||||
} | {
|
} | {
|
||||||
type: "screenshot",
|
type: "screenshot",
|
||||||
fullPage?: boolean,
|
fullPage?: boolean,
|
||||||
|
} | {
|
||||||
|
type: "typeText",
|
||||||
|
text: string,
|
||||||
|
} | {
|
||||||
|
type: "pressKey",
|
||||||
|
key: string,
|
||||||
};
|
};
|
||||||
|
|
||||||
export type PageOptions = {
|
export type PageOptions = {
|
||||||
|
@ -21,6 +21,9 @@ import * as Sentry from "@sentry/node";
|
|||||||
export async function scrapWithFireEngine({
|
export async function scrapWithFireEngine({
|
||||||
url,
|
url,
|
||||||
actions,
|
actions,
|
||||||
|
waitFor = 0,
|
||||||
|
screenshot = false,
|
||||||
|
fullPageScreenshot = false,
|
||||||
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
|
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
|
||||||
fireEngineOptions = {},
|
fireEngineOptions = {},
|
||||||
headers,
|
headers,
|
||||||
@ -30,6 +33,9 @@ export async function scrapWithFireEngine({
|
|||||||
}: {
|
}: {
|
||||||
url: string;
|
url: string;
|
||||||
actions?: Action[];
|
actions?: Action[];
|
||||||
|
waitFor?: number;
|
||||||
|
screenshot?: boolean;
|
||||||
|
fullPageScreenshot?: boolean;
|
||||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
|
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
|
||||||
fireEngineOptions?: FireEngineOptions;
|
fireEngineOptions?: FireEngineOptions;
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
@ -50,7 +56,10 @@ export async function scrapWithFireEngine({
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const reqParams = await generateRequestParams(url);
|
const reqParams = await generateRequestParams(url);
|
||||||
|
let waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||||
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
|
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
|
||||||
|
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
||||||
|
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
|
||||||
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
||||||
|
|
||||||
|
|
||||||
@ -95,6 +104,9 @@ export async function scrapWithFireEngine({
|
|||||||
{
|
{
|
||||||
url: url,
|
url: url,
|
||||||
headers: headers,
|
headers: headers,
|
||||||
|
wait: waitParam,
|
||||||
|
screenshot: screenshotParam,
|
||||||
|
fullPageScreenshot: fullPageScreenshotParam,
|
||||||
disableJsDom: pageOptions?.disableJsDom ?? false,
|
disableJsDom: pageOptions?.disableJsDom ?? false,
|
||||||
priority,
|
priority,
|
||||||
engine,
|
engine,
|
||||||
|
@ -204,6 +204,7 @@ export async function scrapSingleUrl(
|
|||||||
if (process.env.FIRE_ENGINE_BETA_URL) {
|
if (process.env.FIRE_ENGINE_BETA_URL) {
|
||||||
const response = await scrapWithFireEngine({
|
const response = await scrapWithFireEngine({
|
||||||
url,
|
url,
|
||||||
|
...(engine === "chrome-cdp" ? ({
|
||||||
actions: [
|
actions: [
|
||||||
...(pageOptions.waitFor ? [{
|
...(pageOptions.waitFor ? [{
|
||||||
type: "wait" as const,
|
type: "wait" as const,
|
||||||
@ -215,6 +216,11 @@ export async function scrapSingleUrl(
|
|||||||
}] : []),
|
}] : []),
|
||||||
...(pageOptions.actions ?? []),
|
...(pageOptions.actions ?? []),
|
||||||
],
|
],
|
||||||
|
}) : ({
|
||||||
|
waitFor: pageOptions.waitFor,
|
||||||
|
screenshot: pageOptions.screenshot,
|
||||||
|
fullPageScreenshot: pageOptions.fullPageScreenshot,
|
||||||
|
})),
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
headers: pageOptions.headers,
|
headers: pageOptions.headers,
|
||||||
fireEngineOptions: {
|
fireEngineOptions: {
|
||||||
|
@ -93,7 +93,13 @@ export type Action = {
|
|||||||
} | {
|
} | {
|
||||||
type: "screenshot",
|
type: "screenshot",
|
||||||
fullPage?: boolean,
|
fullPage?: boolean,
|
||||||
};
|
} | {
|
||||||
|
type: "typeText",
|
||||||
|
text: string,
|
||||||
|
} | {
|
||||||
|
type: "pressKey",
|
||||||
|
key: string,
|
||||||
|
};;
|
||||||
|
|
||||||
export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
|
export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
|
||||||
extract?: {
|
extract?: {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user