mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 12:00:40 +08:00
Nick:
This commit is contained in:
parent
a150aa820c
commit
1a1ac9fd60
@ -50,11 +50,15 @@ export async function crawlStatusController(req: Request, res: Response) {
|
||||
return res.status(403).json({ error: "Forbidden" });
|
||||
}
|
||||
|
||||
const jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
let jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
|
||||
const jobs = (await getJobs(req.params.jobId, jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
|
||||
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobs.some((x, i) => jobStatuses[i] === "failed" && x.failedReason !== "Concurrency limit hit") ? "failed" : "active";
|
||||
let jobStatuses = await Promise.all(jobs.map(x => x.getState()));
|
||||
// filter out failed jobs
|
||||
jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed"));
|
||||
// filter the job statues
|
||||
jobStatuses = jobStatuses.filter(x => x[1] !== "failed");
|
||||
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : "active";
|
||||
|
||||
const data = jobs.filter(x => x.failedReason !== "Concurreny limit hit").map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
|
||||
|
||||
|
@ -94,11 +94,15 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth<CrawlStatusPara
|
||||
|
||||
doneJobIDs = await getDoneJobsOrdered(req.params.jobId);
|
||||
|
||||
const jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
let jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
|
||||
const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
|
||||
jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed
|
||||
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : jobStatuses.some(x => x[1] === "failed") ? "failed" : "scraping";
|
||||
// filter out failed jobs
|
||||
jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed"));
|
||||
// filter the job statues
|
||||
jobStatuses = jobStatuses.filter(x => x[1] !== "failed");
|
||||
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
|
||||
const doneJobs = await getJobs(doneJobIDs);
|
||||
const data = doneJobs.map(x => x.returnvalue);
|
||||
|
||||
|
@ -57,11 +57,15 @@ export async function crawlStatusController(req: RequestWithAuth<CrawlStatusPara
|
||||
const start = typeof req.query.skip === "string" ? parseInt(req.query.skip, 10) : 0;
|
||||
const end = typeof req.query.limit === "string" ? (start + parseInt(req.query.limit, 10) - 1) : undefined;
|
||||
|
||||
const jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
let jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
|
||||
const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
|
||||
jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed
|
||||
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : jobStatuses.some(x => x[1] === "failed") ? "failed" : "scraping";
|
||||
// filter out failed jobs
|
||||
jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed"));
|
||||
// filter the job statues
|
||||
jobStatuses = jobStatuses.filter(x => x[1] !== "failed");
|
||||
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
|
||||
const doneJobsLength = await getDoneJobsOrderedLength(req.params.jobId);
|
||||
const doneJobsOrder = await getDoneJobsOrdered(req.params.jobId, start, end ?? -1);
|
||||
|
||||
|
@ -59,6 +59,7 @@ export async function startWebScraperPipeline({
|
||||
is_scrape: job.data.is_scrape ?? false,
|
||||
})) as { success: boolean; message: string; docs: Document[] };
|
||||
}
|
||||
|
||||
export async function runWebScraper({
|
||||
url,
|
||||
mode,
|
||||
|
@ -395,6 +395,7 @@ async function processJob(job: Job, token: string) {
|
||||
pageOptions: sc.pageOptions,
|
||||
origin: job.data.origin,
|
||||
crawl_id: job.data.crawl_id,
|
||||
webhook: job.data.webhook,
|
||||
v1: job.data.v1,
|
||||
},
|
||||
{},
|
||||
@ -468,9 +469,8 @@ async function processJob(job: Job, token: string) {
|
||||
}
|
||||
} else {
|
||||
const jobIDs = await getCrawlJobs(job.data.crawl_id);
|
||||
const jobStatuses = await Promise.all(jobIDs.map((x) => getScrapeQueue().getJobState(x)));
|
||||
const jobStatus =
|
||||
sc.cancelled || jobStatuses.some((x) => x === "failed")
|
||||
sc.cancelled
|
||||
? "failed"
|
||||
: "completed";
|
||||
|
||||
@ -554,16 +554,16 @@ async function processJob(job: Job, token: string) {
|
||||
job.data.v1
|
||||
);
|
||||
}
|
||||
if (job.data.v1) {
|
||||
callWebhook(
|
||||
job.data.team_id,
|
||||
job.id as string,
|
||||
[],
|
||||
job.data.webhook,
|
||||
job.data.v1,
|
||||
"crawl.failed"
|
||||
);
|
||||
}
|
||||
// if (job.data.v1) {
|
||||
// callWebhook(
|
||||
// job.data.team_id,
|
||||
// job.id as string,
|
||||
// [],
|
||||
// job.data.webhook,
|
||||
// job.data.v1,
|
||||
// "crawl.failed"
|
||||
// );
|
||||
// }
|
||||
|
||||
if (job.data.crawl_id) {
|
||||
await logJob({
|
||||
|
Loading…
x
Reference in New Issue
Block a user