diffing improvements

This commit is contained in:
Gergő Móricz 2025-03-25 17:00:41 +01:00
parent 50db3e9e8a
commit 80f9b44f0b

View File

@ -1,36 +1,39 @@
import { supabase_rr_service } from "../../../services/supabase"; import { supabase_rr_service, supabase_service } from "../../../services/supabase";
import { Document } from "../../../controllers/v1/types"; import { Document } from "../../../controllers/v1/types";
import { Meta } from "../index"; import { Meta } from "../index";
export async function deriveDiff(meta: Meta, document: Document): Promise<Document> { export async function deriveDiff(meta: Meta, document: Document): Promise<Document> {
if (meta.options.formats.includes("diff")) { if (meta.options.formats.includes("diff")) {
const { data, error } = await supabase_rr_service const res = await supabase_service
.from("firecrawl_jobs") .rpc("diff_get_last_scrape_1", {
.select() i_team_id: meta.internalOptions.teamId,
.eq("team_id", meta.internalOptions.teamId) i_url: document.metadata.sourceURL ?? meta.url,
.eq("url", document.metadata.url ?? document.metadata.sourceURL ?? meta.url) });
.contains("page_options->>'formats'", "markdown")
.order("date_added", { ascending: false })
.limit(1)
.single();
if (data) { const data: {
const previousMarkdown = data.docs[0].markdown; o_docs: Document[],
o_date_added: string,
} | undefined | null = res.data[0] as any;
if (data && data.o_docs.length > 0) {
const previousMarkdown = data.o_docs[0].markdown!;
const currentMarkdown = document.markdown!; const currentMarkdown = document.markdown!;
const transformer = (x: string) => [...x.replace(/\s+/g, "").replace(/\[iframe\]\(.+?\)/g, "")].sort().join("");
document.diff = { document.diff = {
previousScrapeAt: data.date_added, previousScrapeAt: data.o_date_added,
changeStatus: previousMarkdown.replace(/\s+/g, "") === currentMarkdown.replace(/\s+/g, "") ? "same" : "changed", changeStatus: transformer(previousMarkdown) === transformer(currentMarkdown) ? "same" : "changed",
visibility: "visible", visibility: "visible",
} }
} else if (!error) { } else if (!res.error) {
document.diff = { document.diff = {
previousScrapeAt: null, previousScrapeAt: null,
changeStatus: "new", changeStatus: "new",
visibility: "visible", visibility: "visible",
} }
} else { } else {
meta.logger.error("Error fetching previous scrape", { error }); meta.logger.error("Error fetching previous scrape", { error: res.error });
document.warning = "Diffing failed, please try again later." + (document.warning ? ` ${document.warning}` : ""); document.warning = "Diffing failed, please try again later." + (document.warning ? ` ${document.warning}` : "");
} }
} }