rename monitor to compare

This commit is contained in:
Gergő Móricz 2025-04-02 19:36:23 +02:00
parent 1e73fe7176
commit 97a473d3de
4 changed files with 20 additions and 15 deletions

View File

@ -21,7 +21,7 @@ export type Format =
| "screenshot" | "screenshot"
| "screenshot@fullPage" | "screenshot@fullPage"
| "extract" | "extract"
| "monitor"; | "compare";
export const url = z.preprocess( export const url = z.preprocess(
(x) => { (x) => {
@ -166,7 +166,7 @@ const baseScrapeOptions = z
"screenshot@fullPage", "screenshot@fullPage",
"extract", "extract",
"json", "json",
"monitor", "compare",
]) ])
.array() .array()
.optional() .optional()
@ -176,8 +176,8 @@ const baseScrapeOptions = z
"You may only specify either screenshot or screenshot@fullPage", "You may only specify either screenshot or screenshot@fullPage",
) )
.refine( .refine(
(x) => !x.includes("monitor") || x.includes("markdown"), (x) => !x.includes("compare") || x.includes("markdown"),
"The monitor format requires the markdown format to be specified as well", "The compare format requires the markdown format to be specified as well",
), ),
headers: z.record(z.string(), z.string()).optional(), headers: z.record(z.string(), z.string()).optional(),
includeTags: z.string().array().optional(), includeTags: z.string().array().optional(),
@ -552,7 +552,7 @@ export type Document = {
value: unknown value: unknown
}[]; }[];
}; };
monitor?: { compare?: {
previousScrapeAt: string | null; previousScrapeAt: string | null;
changeStatus: "new" | "same" | "changed" | "removed"; changeStatus: "new" | "same" | "changed" | "removed";
visibility: "visible" | "hidden"; visibility: "visible" | "hidden";

View File

@ -3,7 +3,7 @@ import { Document } from "../../../controllers/v1/types";
import { Meta } from "../index"; import { Meta } from "../index";
export async function deriveDiff(meta: Meta, document: Document): Promise<Document> { export async function deriveDiff(meta: Meta, document: Document): Promise<Document> {
if (meta.options.formats.includes("monitor")) { if (meta.options.formats.includes("compare")) {
const res = await supabase_service const res = await supabase_service
.rpc("diff_get_last_scrape_1", { .rpc("diff_get_last_scrape_1", {
i_team_id: meta.internalOptions.teamId, i_team_id: meta.internalOptions.teamId,
@ -21,20 +21,20 @@ export async function deriveDiff(meta: Meta, document: Document): Promise<Docume
const transformer = (x: string) => [...x.replace(/\s+/g, "").replace(/\[iframe\]\(.+?\)/g, "")].sort().join(""); const transformer = (x: string) => [...x.replace(/\s+/g, "").replace(/\[iframe\]\(.+?\)/g, "")].sort().join("");
document.monitor = { document.compare = {
previousScrapeAt: data.o_date_added, previousScrapeAt: data.o_date_added,
changeStatus: document.metadata.statusCode === 404 ? "removed" : transformer(previousMarkdown) === transformer(currentMarkdown) ? "same" : "changed", changeStatus: document.metadata.statusCode === 404 ? "removed" : transformer(previousMarkdown) === transformer(currentMarkdown) ? "same" : "changed",
visibility: meta.internalOptions.urlInvisibleInCurrentCrawl ? "hidden" : "visible", visibility: meta.internalOptions.urlInvisibleInCurrentCrawl ? "hidden" : "visible",
} }
} else if (!res.error) { } else if (!res.error) {
document.monitor = { document.compare = {
previousScrapeAt: null, previousScrapeAt: null,
changeStatus: document.metadata.statusCode === 404 ? "removed" : "new", changeStatus: document.metadata.statusCode === 404 ? "removed" : "new",
visibility: meta.internalOptions.urlInvisibleInCurrentCrawl ? "hidden" : "visible", visibility: meta.internalOptions.urlInvisibleInCurrentCrawl ? "hidden" : "visible",
} }
} else { } else {
meta.logger.error("Error fetching previous scrape", { error: res.error }); meta.logger.error("Error fetching previous scrape", { error: res.error });
document.warning = "Monitoring failed, please try again later." + (document.warning ? ` ${document.warning}` : ""); document.warning = "Comparing failed, please try again later." + (document.warning ? ` ${document.warning}` : "");
} }
} }

View File

@ -148,14 +148,14 @@ export function coerceFieldsToFormats(
); );
} }
if (!formats.has("monitor") && document.monitor !== undefined) { if (!formats.has("compare") && document.compare !== undefined) {
meta.logger.warn( meta.logger.warn(
"Removed monitor from Document because it wasn't in formats -- this is extremely wasteful and indicates a bug.", "Removed compare from Document because it wasn't in formats -- this is extremely wasteful and indicates a bug.",
); );
delete document.monitor; delete document.compare;
} else if (formats.has("monitor") && document.monitor === undefined) { } else if (formats.has("compare") && document.compare === undefined) {
meta.logger.warn( meta.logger.warn(
"Request had format monitor, but there was no monitor field in the result.", "Request had format compare, but there was no compare field in the result.",
); );
} }

View File

@ -69,6 +69,11 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
screenshot?: string; screenshot?: string;
metadata?: FirecrawlDocumentMetadata; metadata?: FirecrawlDocumentMetadata;
actions: ActionsSchema; actions: ActionsSchema;
compare?: {
previousScrapeAt: string | null;
changeStatus: "new" | "same" | "changed" | "removed";
visibility: "visible" | "hidden";
};
// v1 search only // v1 search only
title?: string; title?: string;
description?: string; description?: string;
@ -79,7 +84,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
* Defines the options and configurations available for scraping web content. * Defines the options and configurations available for scraping web content.
*/ */
export interface CrawlScrapeOptions { export interface CrawlScrapeOptions {
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[]; formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
headers?: Record<string, string>; headers?: Record<string, string>;
includeTags?: string[]; includeTags?: string[];
excludeTags?: string[]; excludeTags?: string[];