mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 05:45:56 +08:00
rename monitor to compare
This commit is contained in:
parent
1e73fe7176
commit
97a473d3de
@ -21,7 +21,7 @@ export type Format =
|
||||
| "screenshot"
|
||||
| "screenshot@fullPage"
|
||||
| "extract"
|
||||
| "monitor";
|
||||
| "compare";
|
||||
|
||||
export const url = z.preprocess(
|
||||
(x) => {
|
||||
@ -166,7 +166,7 @@ const baseScrapeOptions = z
|
||||
"screenshot@fullPage",
|
||||
"extract",
|
||||
"json",
|
||||
"monitor",
|
||||
"compare",
|
||||
])
|
||||
.array()
|
||||
.optional()
|
||||
@ -176,8 +176,8 @@ const baseScrapeOptions = z
|
||||
"You may only specify either screenshot or screenshot@fullPage",
|
||||
)
|
||||
.refine(
|
||||
(x) => !x.includes("monitor") || x.includes("markdown"),
|
||||
"The monitor format requires the markdown format to be specified as well",
|
||||
(x) => !x.includes("compare") || x.includes("markdown"),
|
||||
"The compare format requires the markdown format to be specified as well",
|
||||
),
|
||||
headers: z.record(z.string(), z.string()).optional(),
|
||||
includeTags: z.string().array().optional(),
|
||||
@ -552,7 +552,7 @@ export type Document = {
|
||||
value: unknown
|
||||
}[];
|
||||
};
|
||||
monitor?: {
|
||||
compare?: {
|
||||
previousScrapeAt: string | null;
|
||||
changeStatus: "new" | "same" | "changed" | "removed";
|
||||
visibility: "visible" | "hidden";
|
||||
|
@ -3,7 +3,7 @@ import { Document } from "../../../controllers/v1/types";
|
||||
import { Meta } from "../index";
|
||||
|
||||
export async function deriveDiff(meta: Meta, document: Document): Promise<Document> {
|
||||
if (meta.options.formats.includes("monitor")) {
|
||||
if (meta.options.formats.includes("compare")) {
|
||||
const res = await supabase_service
|
||||
.rpc("diff_get_last_scrape_1", {
|
||||
i_team_id: meta.internalOptions.teamId,
|
||||
@ -21,20 +21,20 @@ export async function deriveDiff(meta: Meta, document: Document): Promise<Docume
|
||||
|
||||
const transformer = (x: string) => [...x.replace(/\s+/g, "").replace(/\[iframe\]\(.+?\)/g, "")].sort().join("");
|
||||
|
||||
document.monitor = {
|
||||
document.compare = {
|
||||
previousScrapeAt: data.o_date_added,
|
||||
changeStatus: document.metadata.statusCode === 404 ? "removed" : transformer(previousMarkdown) === transformer(currentMarkdown) ? "same" : "changed",
|
||||
visibility: meta.internalOptions.urlInvisibleInCurrentCrawl ? "hidden" : "visible",
|
||||
}
|
||||
} else if (!res.error) {
|
||||
document.monitor = {
|
||||
document.compare = {
|
||||
previousScrapeAt: null,
|
||||
changeStatus: document.metadata.statusCode === 404 ? "removed" : "new",
|
||||
visibility: meta.internalOptions.urlInvisibleInCurrentCrawl ? "hidden" : "visible",
|
||||
}
|
||||
} else {
|
||||
meta.logger.error("Error fetching previous scrape", { error: res.error });
|
||||
document.warning = "Monitoring failed, please try again later." + (document.warning ? ` ${document.warning}` : "");
|
||||
document.warning = "Comparing failed, please try again later." + (document.warning ? ` ${document.warning}` : "");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,14 +148,14 @@ export function coerceFieldsToFormats(
|
||||
);
|
||||
}
|
||||
|
||||
if (!formats.has("monitor") && document.monitor !== undefined) {
|
||||
if (!formats.has("compare") && document.compare !== undefined) {
|
||||
meta.logger.warn(
|
||||
"Removed monitor from Document because it wasn't in formats -- this is extremely wasteful and indicates a bug.",
|
||||
"Removed compare from Document because it wasn't in formats -- this is extremely wasteful and indicates a bug.",
|
||||
);
|
||||
delete document.monitor;
|
||||
} else if (formats.has("monitor") && document.monitor === undefined) {
|
||||
delete document.compare;
|
||||
} else if (formats.has("compare") && document.compare === undefined) {
|
||||
meta.logger.warn(
|
||||
"Request had format monitor, but there was no monitor field in the result.",
|
||||
"Request had format compare, but there was no compare field in the result.",
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -69,6 +69,11 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
||||
screenshot?: string;
|
||||
metadata?: FirecrawlDocumentMetadata;
|
||||
actions: ActionsSchema;
|
||||
compare?: {
|
||||
previousScrapeAt: string | null;
|
||||
changeStatus: "new" | "same" | "changed" | "removed";
|
||||
visibility: "visible" | "hidden";
|
||||
};
|
||||
// v1 search only
|
||||
title?: string;
|
||||
description?: string;
|
||||
@ -79,7 +84,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
||||
* Defines the options and configurations available for scraping web content.
|
||||
*/
|
||||
export interface CrawlScrapeOptions {
|
||||
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
||||
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
||||
headers?: Record<string, string>;
|
||||
includeTags?: string[];
|
||||
excludeTags?: string[];
|
||||
|
Loading…
x
Reference in New Issue
Block a user