Rename 'compare' format and property to 'changeTracking' (#1423)

This commit is contained in:
devin-ai-integration[bot] 2025-04-08 21:09:31 +02:00 committed by GitHub
parent 62265c63c8
commit 8c801ed956
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 19 additions and 19 deletions

View File

@ -85,15 +85,15 @@ describe("Scrape tests", () => {
// }, 30000);
// });
describe("Compare format", () => {
describe("Change Tracking format", () => {
it.concurrent("works", async () => {
const response = await scrape({
url: "https://example.com",
formats: ["markdown", "compare"],
formats: ["markdown", "changeTracking"],
});
expect(response.compare).toBeDefined();
expect(response.compare?.previousScrapeAt).not.toBeNull();
expect(response.changeTracking).toBeDefined();
expect(response.changeTracking?.previousScrapeAt).not.toBeNull();
});
});

View File

@ -21,7 +21,7 @@ export type Format =
| "screenshot"
| "screenshot@fullPage"
| "extract"
| "compare";
| "changeTracking";
export const url = z.preprocess(
(x) => {
@ -166,7 +166,7 @@ const baseScrapeOptions = z
"screenshot@fullPage",
"extract",
"json",
"compare",
"changeTracking",
])
.array()
.optional()
@ -176,8 +176,8 @@ const baseScrapeOptions = z
"You may only specify either screenshot or screenshot@fullPage",
)
.refine(
(x) => !x.includes("compare") || x.includes("markdown"),
"The compare format requires the markdown format to be specified as well",
(x) => !x.includes("changeTracking") || x.includes("markdown"),
"The changeTracking format requires the markdown format to be specified as well",
),
headers: z.record(z.string(), z.string()).optional(),
includeTags: z.string().array().optional(),
@ -552,7 +552,7 @@ export type Document = {
value: unknown
}[];
};
compare?: {
changeTracking?: {
previousScrapeAt: string | null;
changeStatus: "new" | "same" | "changed" | "removed";
visibility: "visible" | "hidden";

View File

@ -3,7 +3,7 @@ import { Document } from "../../../controllers/v1/types";
import { Meta } from "../index";
export async function deriveDiff(meta: Meta, document: Document): Promise<Document> {
if (meta.options.formats.includes("compare")) {
if (meta.options.formats.includes("changeTracking")) {
const res = await supabase_service
.rpc("diff_get_last_scrape_1", {
i_team_id: meta.internalOptions.teamId,
@ -21,13 +21,13 @@ export async function deriveDiff(meta: Meta, document: Document): Promise<Docume
const transformer = (x: string) => [...x.replace(/\s+/g, "").replace(/\[iframe\]\(.+?\)/g, "")].sort().join("");
document.compare = {
document.changeTracking = {
previousScrapeAt: data.o_date_added,
changeStatus: document.metadata.statusCode === 404 ? "removed" : transformer(previousMarkdown) === transformer(currentMarkdown) ? "same" : "changed",
visibility: meta.internalOptions.urlInvisibleInCurrentCrawl ? "hidden" : "visible",
}
} else if (!res.error) {
document.compare = {
document.changeTracking = {
previousScrapeAt: null,
changeStatus: document.metadata.statusCode === 404 ? "removed" : "new",
visibility: meta.internalOptions.urlInvisibleInCurrentCrawl ? "hidden" : "visible",

View File

@ -148,14 +148,14 @@ export function coerceFieldsToFormats(
);
}
if (!formats.has("compare") && document.compare !== undefined) {
if (!formats.has("changeTracking") && document.changeTracking !== undefined) {
meta.logger.warn(
"Removed compare from Document because it wasn't in formats -- this is extremely wasteful and indicates a bug.",
"Removed changeTracking from Document because it wasn't in formats -- this is extremely wasteful and indicates a bug.",
);
delete document.compare;
} else if (formats.has("compare") && document.compare === undefined) {
delete document.changeTracking;
} else if (formats.has("changeTracking") && document.changeTracking === undefined) {
meta.logger.warn(
"Request had format compare, but there was no compare field in the result.",
"Request had format changeTracking, but there was no changeTracking field in the result.",
);
}

View File

@ -68,7 +68,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
screenshot?: string;
metadata?: FirecrawlDocumentMetadata;
actions: ActionsSchema;
compare?: {
changeTracking?: {
previousScrapeAt: string | null;
changeStatus: "new" | "same" | "changed" | "removed";
visibility: "visible" | "hidden";
@ -83,7 +83,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
* Defines the options and configurations available for scraping web content.
*/
export interface CrawlScrapeOptions {
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "changeTracking")[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];