feat(actions): add typeText, pressKey, fix playwright screenshot/waitFor

This commit is contained in:
Gergő Móricz 2024-09-20 21:02:53 +02:00
parent 01f42b980d
commit 3dd912ec91
5 changed files with 50 additions and 12 deletions

View File

@ -70,6 +70,14 @@ export const actionsSchema = z.array(z.union([
type: z.literal("screenshot"),
fullPage: z.boolean().default(false),
}),
z.object({
type: z.literal("typeText"),
text: z.string(),
}),
z.object({
type: z.literal("pressKey"),
key: z.string(),
}),
]));
export const scrapeOptions = z.object({

View File

@ -19,6 +19,12 @@ export type Action = {
} | {
type: "screenshot",
fullPage?: boolean,
} | {
type: "typeText",
text: string,
} | {
type: "pressKey",
key: string,
};
export type PageOptions = {

View File

@ -21,6 +21,9 @@ import * as Sentry from "@sentry/node";
export async function scrapWithFireEngine({
url,
actions,
waitFor = 0,
screenshot = false,
fullPageScreenshot = false,
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
fireEngineOptions = {},
headers,
@ -30,6 +33,9 @@ export async function scrapWithFireEngine({
}: {
url: string;
actions?: Action[];
waitFor?: number;
screenshot?: boolean;
fullPageScreenshot?: boolean;
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
fireEngineOptions?: FireEngineOptions;
headers?: Record<string, string>;
@ -50,7 +56,10 @@ export async function scrapWithFireEngine({
try {
const reqParams = await generateRequestParams(url);
let waitParam = reqParams["params"]?.wait ?? waitFor;
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
@ -95,6 +104,9 @@ export async function scrapWithFireEngine({
{
url: url,
headers: headers,
wait: waitParam,
screenshot: screenshotParam,
fullPageScreenshot: fullPageScreenshotParam,
disableJsDom: pageOptions?.disableJsDom ?? false,
priority,
engine,

View File

@ -204,17 +204,23 @@ export async function scrapSingleUrl(
if (process.env.FIRE_ENGINE_BETA_URL) {
const response = await scrapWithFireEngine({
url,
actions: [
...(pageOptions.waitFor ? [{
type: "wait" as const,
milliseconds: pageOptions.waitFor,
}] : []),
...((pageOptions.screenshot || pageOptions.fullPageScreenshot) ? [{
type: "screenshot" as const,
fullPage: !!pageOptions.fullPageScreenshot,
}] : []),
...(pageOptions.actions ?? []),
],
...(engine === "chrome-cdp" ? ({
actions: [
...(pageOptions.waitFor ? [{
type: "wait" as const,
milliseconds: pageOptions.waitFor,
}] : []),
...((pageOptions.screenshot || pageOptions.fullPageScreenshot) ? [{
type: "screenshot" as const,
fullPage: !!pageOptions.fullPageScreenshot,
}] : []),
...(pageOptions.actions ?? []),
],
}) : ({
waitFor: pageOptions.waitFor,
screenshot: pageOptions.screenshot,
fullPageScreenshot: pageOptions.fullPageScreenshot,
})),
pageOptions: pageOptions,
headers: pageOptions.headers,
fireEngineOptions: {

View File

@ -93,7 +93,13 @@ export type Action = {
} | {
type: "screenshot",
fullPage?: boolean,
};
} | {
type: "typeText",
text: string,
} | {
type: "pressKey",
key: string,
};;
export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
extract?: {