crawl incomplete issues

This commit is contained in:
Gergő Móricz 2025-01-07 19:38:17 +01:00
parent ccfada98ca
commit c6a63793bb
2 changed files with 27 additions and 15 deletions

View File

@ -7,6 +7,18 @@ import { logger } from "../../lib/logger";
import { configDotenv } from "dotenv";
configDotenv();
function cleanOfNull<T>(x: T): T {
if (Array.isArray(x)) {
return x.map(x => cleanOfNull(x)) as T;
} else if (typeof x === "object" && x !== null) {
return Object.fromEntries(Object.entries(x).map(([k,v]) => [k,cleanOfNull(v)])) as T
} else if (typeof x === "string") {
return x.replaceAll("\u0000", "") as T;
} else {
return x;
}
}
export async function logJob(job: FirecrawlJob, force: boolean = false) {
try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
@ -33,7 +45,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
success: job.success,
message: job.message,
num_docs: job.num_docs,
docs: job.docs,
docs: cleanOfNull(job.docs),
time_taken: job.time_taken,
team_id: job.team_id === "preview" ? null : job.team_id,
mode: job.mode,

View File

@ -211,20 +211,6 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
const jobIDs = await getCrawlJobs(job.data.crawl_id);
const jobStatus = sc.cancelled ? "failed" : "completed";
// v1 web hooks, call when done with no data, but with event completed
if (job.data.v1 && job.data.webhook) {
callWebhook(
job.data.team_id,
job.data.crawl_id,
[],
job.data.webhook,
job.data.v1,
job.data.crawlerOptions !== null
? "crawl.completed"
: "batch_scrape.completed",
);
}
await logJob(
{
job_id: job.data.crawl_id,
@ -244,6 +230,20 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
},
true,
);
// v1 web hooks, call when done with no data, but with event completed
if (job.data.v1 && job.data.webhook) {
callWebhook(
job.data.team_id,
job.data.crawl_id,
[],
job.data.webhook,
job.data.v1,
job.data.crawlerOptions !== null
? "crawl.completed"
: "batch_scrape.completed",
);
}
}
}
}