feat(actions): add typeText, pressKey, fix playwright screenshot/waitFor

This commit is contained in:
Gergő Móricz 2024-09-20 21:02:53 +02:00
parent 01f42b980d
commit 3dd912ec91
5 changed files with 50 additions and 12 deletions

View File

@ -70,6 +70,14 @@ export const actionsSchema = z.array(z.union([
type: z.literal("screenshot"), type: z.literal("screenshot"),
fullPage: z.boolean().default(false), fullPage: z.boolean().default(false),
}), }),
z.object({
type: z.literal("typeText"),
text: z.string(),
}),
z.object({
type: z.literal("pressKey"),
key: z.string(),
}),
])); ]));
export const scrapeOptions = z.object({ export const scrapeOptions = z.object({

View File

@ -19,6 +19,12 @@ export type Action = {
} | { } | {
type: "screenshot", type: "screenshot",
fullPage?: boolean, fullPage?: boolean,
} | {
type: "typeText",
text: string,
} | {
type: "pressKey",
key: string,
}; };
export type PageOptions = { export type PageOptions = {

View File

@ -21,6 +21,9 @@ import * as Sentry from "@sentry/node";
export async function scrapWithFireEngine({ export async function scrapWithFireEngine({
url, url,
actions, actions,
waitFor = 0,
screenshot = false,
fullPageScreenshot = false,
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false }, pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
fireEngineOptions = {}, fireEngineOptions = {},
headers, headers,
@ -30,6 +33,9 @@ export async function scrapWithFireEngine({
}: { }: {
url: string; url: string;
actions?: Action[]; actions?: Action[];
waitFor?: number;
screenshot?: boolean;
fullPageScreenshot?: boolean;
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean }; pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
fireEngineOptions?: FireEngineOptions; fireEngineOptions?: FireEngineOptions;
headers?: Record<string, string>; headers?: Record<string, string>;
@ -50,7 +56,10 @@ export async function scrapWithFireEngine({
try { try {
const reqParams = await generateRequestParams(url); const reqParams = await generateRequestParams(url);
let waitParam = reqParams["params"]?.wait ?? waitFor;
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp"; let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions; let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
@ -95,6 +104,9 @@ export async function scrapWithFireEngine({
{ {
url: url, url: url,
headers: headers, headers: headers,
wait: waitParam,
screenshot: screenshotParam,
fullPageScreenshot: fullPageScreenshotParam,
disableJsDom: pageOptions?.disableJsDom ?? false, disableJsDom: pageOptions?.disableJsDom ?? false,
priority, priority,
engine, engine,

View File

@ -204,6 +204,7 @@ export async function scrapSingleUrl(
if (process.env.FIRE_ENGINE_BETA_URL) { if (process.env.FIRE_ENGINE_BETA_URL) {
const response = await scrapWithFireEngine({ const response = await scrapWithFireEngine({
url, url,
...(engine === "chrome-cdp" ? ({
actions: [ actions: [
...(pageOptions.waitFor ? [{ ...(pageOptions.waitFor ? [{
type: "wait" as const, type: "wait" as const,
@ -215,6 +216,11 @@ export async function scrapSingleUrl(
}] : []), }] : []),
...(pageOptions.actions ?? []), ...(pageOptions.actions ?? []),
], ],
}) : ({
waitFor: pageOptions.waitFor,
screenshot: pageOptions.screenshot,
fullPageScreenshot: pageOptions.fullPageScreenshot,
})),
pageOptions: pageOptions, pageOptions: pageOptions,
headers: pageOptions.headers, headers: pageOptions.headers,
fireEngineOptions: { fireEngineOptions: {

View File

@ -93,7 +93,13 @@ export type Action = {
} | { } | {
type: "screenshot", type: "screenshot",
fullPage?: boolean, fullPage?: boolean,
}; } | {
type: "typeText",
text: string,
} | {
type: "pressKey",
key: string,
};;
export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions { export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
extract?: { extract?: {