Nick: support for the new actions

This commit is contained in:
Nicolas 2024-10-31 20:01:52 -03:00
parent 3911fe1fba
commit 446acfccde
4 changed files with 21 additions and 3 deletions

View File

@ -62,6 +62,7 @@ export const actionsSchema = z.array(z.union([
z.object({ z.object({
type: z.literal("wait"), type: z.literal("wait"),
milliseconds: z.number().int().positive().finite(), milliseconds: z.number().int().positive().finite(),
selector: z.string().optional(),
}), }),
z.object({ z.object({
type: z.literal("click"), type: z.literal("click"),
@ -83,6 +84,9 @@ export const actionsSchema = z.array(z.union([
type: z.literal("scroll"), type: z.literal("scroll"),
direction: z.enum(["up", "down"]), direction: z.enum(["up", "down"]),
}), }),
z.object({
type: z.literal("scrape"),
}),
])); ]));
export const scrapeOptions = z.object({ export const scrapeOptions = z.object({

View File

@ -12,7 +12,8 @@ export interface Progress {
export type Action = { export type Action = {
type: "wait", type: "wait",
milliseconds: number, milliseconds?: number,
selector?: string,
} | { } | {
type: "click", type: "click",
selector: string, selector: string,
@ -28,7 +29,9 @@ export type Action = {
} | { } | {
type: "scroll", type: "scroll",
direction: "up" | "down" direction: "up" | "down"
}; } | {
type: "scrape",
}
export type PageOptions = { export type PageOptions = {
includeMarkdown?: boolean; includeMarkdown?: boolean;
@ -163,11 +166,17 @@ export class SearchResult {
} }
} }
export interface ScrapeActionContent {
url: string;
html: string;
}
export interface FireEngineResponse { export interface FireEngineResponse {
html: string; html: string;
screenshots?: string[]; screenshots?: string[];
pageStatusCode?: number; pageStatusCode?: number;
pageError?: string; pageError?: string;
scrapeActionContent?: ScrapeActionContent[];
} }

View File

@ -200,11 +200,13 @@ export async function scrapWithFireEngine({
logParams.html = data.content ?? ""; logParams.html = data.content ?? "";
logParams.response_code = data.pageStatusCode; logParams.response_code = data.pageStatusCode;
logParams.error_message = data.pageError ?? data.error; logParams.error_message = data.pageError ?? data.error;
return { return {
html: data.content ?? "", html: data.content ?? "",
screenshots: data.screenshots ?? [data.screenshot] ?? [], screenshots: data.screenshots ?? [data.screenshot] ?? [],
pageStatusCode: data.pageStatusCode, pageStatusCode: data.pageStatusCode,
pageError: data.pageError ?? data.error, pageError: data.pageError ?? data.error,
scrapeActionContent: data?.actionContent ?? [],
}; };
} }
} catch (error) { } catch (error) {

View File

@ -21,6 +21,7 @@ import { extractLinks } from "./utils/utils";
import { Logger } from "../../lib/logger"; import { Logger } from "../../lib/logger";
import { ScrapeEvents } from "../../lib/scrape-events"; import { ScrapeEvents } from "../../lib/scrape-events";
import { clientSideError } from "../../strings"; import { clientSideError } from "../../strings";
import { ScrapeActionContent } from "../../lib/entities";
dotenv.config(); dotenv.config();
@ -180,7 +181,8 @@ export async function scrapSingleUrl(
text: string; text: string;
screenshot: string; screenshot: string;
actions?: { actions?: {
screenshots: string[]; screenshots?: string[];
scrapes?: ScrapeActionContent[];
}; };
metadata: { pageStatusCode?: number; pageError?: string | null }; metadata: { pageStatusCode?: number; pageError?: string | null };
} = { text: "", screenshot: "", metadata: {} }; } = { text: "", screenshot: "", metadata: {} };
@ -259,6 +261,7 @@ export async function scrapSingleUrl(
if (pageOptions.actions) { if (pageOptions.actions) {
scraperResponse.actions = { scraperResponse.actions = {
screenshots: response.screenshots ?? [], screenshots: response.screenshots ?? [],
scrapes: response.scrapeActionContent ?? [],
}; };
} }
scraperResponse.metadata.pageStatusCode = response.pageStatusCode; scraperResponse.metadata.pageStatusCode = response.pageStatusCode;