mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 15:35:54 +08:00
crawl incomplete issues
This commit is contained in:
parent
ccfada98ca
commit
c6a63793bb
@ -7,6 +7,18 @@ import { logger } from "../../lib/logger";
|
|||||||
import { configDotenv } from "dotenv";
|
import { configDotenv } from "dotenv";
|
||||||
configDotenv();
|
configDotenv();
|
||||||
|
|
||||||
|
function cleanOfNull<T>(x: T): T {
|
||||||
|
if (Array.isArray(x)) {
|
||||||
|
return x.map(x => cleanOfNull(x)) as T;
|
||||||
|
} else if (typeof x === "object" && x !== null) {
|
||||||
|
return Object.fromEntries(Object.entries(x).map(([k,v]) => [k,cleanOfNull(v)])) as T
|
||||||
|
} else if (typeof x === "string") {
|
||||||
|
return x.replaceAll("\u0000", "") as T;
|
||||||
|
} else {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
||||||
try {
|
try {
|
||||||
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
|
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
|
||||||
@ -33,7 +45,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
|||||||
success: job.success,
|
success: job.success,
|
||||||
message: job.message,
|
message: job.message,
|
||||||
num_docs: job.num_docs,
|
num_docs: job.num_docs,
|
||||||
docs: job.docs,
|
docs: cleanOfNull(job.docs),
|
||||||
time_taken: job.time_taken,
|
time_taken: job.time_taken,
|
||||||
team_id: job.team_id === "preview" ? null : job.team_id,
|
team_id: job.team_id === "preview" ? null : job.team_id,
|
||||||
mode: job.mode,
|
mode: job.mode,
|
||||||
|
@ -211,20 +211,6 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
|||||||
const jobIDs = await getCrawlJobs(job.data.crawl_id);
|
const jobIDs = await getCrawlJobs(job.data.crawl_id);
|
||||||
const jobStatus = sc.cancelled ? "failed" : "completed";
|
const jobStatus = sc.cancelled ? "failed" : "completed";
|
||||||
|
|
||||||
// v1 web hooks, call when done with no data, but with event completed
|
|
||||||
if (job.data.v1 && job.data.webhook) {
|
|
||||||
callWebhook(
|
|
||||||
job.data.team_id,
|
|
||||||
job.data.crawl_id,
|
|
||||||
[],
|
|
||||||
job.data.webhook,
|
|
||||||
job.data.v1,
|
|
||||||
job.data.crawlerOptions !== null
|
|
||||||
? "crawl.completed"
|
|
||||||
: "batch_scrape.completed",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
await logJob(
|
await logJob(
|
||||||
{
|
{
|
||||||
job_id: job.data.crawl_id,
|
job_id: job.data.crawl_id,
|
||||||
@ -244,6 +230,20 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
|||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// v1 web hooks, call when done with no data, but with event completed
|
||||||
|
if (job.data.v1 && job.data.webhook) {
|
||||||
|
callWebhook(
|
||||||
|
job.data.team_id,
|
||||||
|
job.data.crawl_id,
|
||||||
|
[],
|
||||||
|
job.data.webhook,
|
||||||
|
job.data.v1,
|
||||||
|
job.data.crawlerOptions !== null
|
||||||
|
? "crawl.completed"
|
||||||
|
: "batch_scrape.completed",
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user