mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 00:55:57 +08:00
crawl incomplete issues
This commit is contained in:
parent
ccfada98ca
commit
c6a63793bb
@ -7,6 +7,18 @@ import { logger } from "../../lib/logger";
|
||||
import { configDotenv } from "dotenv";
|
||||
configDotenv();
|
||||
|
||||
function cleanOfNull<T>(x: T): T {
|
||||
if (Array.isArray(x)) {
|
||||
return x.map(x => cleanOfNull(x)) as T;
|
||||
} else if (typeof x === "object" && x !== null) {
|
||||
return Object.fromEntries(Object.entries(x).map(([k,v]) => [k,cleanOfNull(v)])) as T
|
||||
} else if (typeof x === "string") {
|
||||
return x.replaceAll("\u0000", "") as T;
|
||||
} else {
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
||||
try {
|
||||
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
|
||||
@ -33,7 +45,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
||||
success: job.success,
|
||||
message: job.message,
|
||||
num_docs: job.num_docs,
|
||||
docs: job.docs,
|
||||
docs: cleanOfNull(job.docs),
|
||||
time_taken: job.time_taken,
|
||||
team_id: job.team_id === "preview" ? null : job.team_id,
|
||||
mode: job.mode,
|
||||
|
@ -211,20 +211,6 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
||||
const jobIDs = await getCrawlJobs(job.data.crawl_id);
|
||||
const jobStatus = sc.cancelled ? "failed" : "completed";
|
||||
|
||||
// v1 web hooks, call when done with no data, but with event completed
|
||||
if (job.data.v1 && job.data.webhook) {
|
||||
callWebhook(
|
||||
job.data.team_id,
|
||||
job.data.crawl_id,
|
||||
[],
|
||||
job.data.webhook,
|
||||
job.data.v1,
|
||||
job.data.crawlerOptions !== null
|
||||
? "crawl.completed"
|
||||
: "batch_scrape.completed",
|
||||
);
|
||||
}
|
||||
|
||||
await logJob(
|
||||
{
|
||||
job_id: job.data.crawl_id,
|
||||
@ -244,6 +230,20 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
||||
},
|
||||
true,
|
||||
);
|
||||
|
||||
// v1 web hooks, call when done with no data, but with event completed
|
||||
if (job.data.v1 && job.data.webhook) {
|
||||
callWebhook(
|
||||
job.data.team_id,
|
||||
job.data.crawl_id,
|
||||
[],
|
||||
job.data.webhook,
|
||||
job.data.v1,
|
||||
job.data.crawlerOptions !== null
|
||||
? "crawl.completed"
|
||||
: "batch_scrape.completed",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user