diff --git a/apps/api/src/scraper/scrapeURL/transformers/diff.ts b/apps/api/src/scraper/scrapeURL/transformers/diff.ts index edc6efd3..ea98fa6a 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/diff.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/diff.ts @@ -1,22 +1,29 @@ import { supabase_service } from "../../../services/supabase"; import { Document } from "../../../controllers/v1/types"; import { Meta } from "../index"; +import { getJob } from "../../../controllers/v1/crawl-status"; export async function deriveDiff(meta: Meta, document: Document): Promise { if (meta.options.formats.includes("changeTracking")) { const res = await supabase_service - .rpc("diff_get_last_scrape_1", { + .rpc("diff_get_last_scrape_2", { i_team_id: meta.internalOptions.teamId, i_url: document.metadata.sourceURL ?? meta.url, }); const data: { - o_docs: Document[], + o_job_id: string, o_date_added: string, } | undefined | null = (res.data ?? [])[0] as any; - if (data && data.o_docs.length > 0) { - const previousMarkdown = data.o_docs[0].markdown!; + const job: { + returnvalue: Document, + } | null = data?.o_job_id ? await getJob(data.o_job_id) : null; + + console.log(data, job); + + if (data && job && job?.returnvalue) { + const previousMarkdown = job.returnvalue.markdown!; const currentMarkdown = document.markdown!; const transformer = (x: string) => [...x.replace(/\s+/g, "").replace(/\[iframe\]\(.+?\)/g, "")].sort().join(""); diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index 25cdf2d4..ce6a4887 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -89,7 +89,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) { success: job.success, message: job.message, num_docs: job.num_docs, - docs: (job.mode === "single_urls" || job.mode === "scrape") ? null : cleanOfNull(job.docs), + docs: ((job.mode === "single_urls" || job.mode === "scrape") && process.env.GCS_BUCKET_NAME) ? null : cleanOfNull(job.docs), time_taken: job.time_taken, team_id: (job.team_id === "preview" || job.team_id?.startsWith("preview_"))? null : job.team_id, mode: job.mode,