mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 15:49:02 +08:00
feat(crawl-status): same for v0
This commit is contained in:
parent
95ce3c3b71
commit
a7b56ab87c
@ -9,31 +9,53 @@ import * as Sentry from "@sentry/node";
|
||||
import { configDotenv } from "dotenv";
|
||||
import { Job } from "bullmq";
|
||||
import { toLegacyDocument } from "../v1/types";
|
||||
import type { DBJob, PseudoJob } from "../v1/crawl-status";
|
||||
configDotenv();
|
||||
|
||||
export async function getJobs(crawlId: string, ids: string[]) {
|
||||
const jobs = (
|
||||
await Promise.all(ids.map((x) => getScrapeQueue().getJob(x)))
|
||||
).filter((x) => x) as Job[];
|
||||
|
||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
const supabaseData = await supabaseGetJobsByCrawlId(crawlId);
|
||||
|
||||
supabaseData.forEach((x) => {
|
||||
const job = jobs.find((y) => y.id === x.job_id);
|
||||
if (job) {
|
||||
job.returnvalue = x.docs;
|
||||
export async function getJobs(crawlId: string, ids: string[]): Promise<PseudoJob<any>[]> {
|
||||
const [bullJobs, dbJobs] = await Promise.all([
|
||||
Promise.all(ids.map((x) => getScrapeQueue().getJob(x))).then(x => x.filter(x => x)) as Promise<(Job<any, any, string> & { id: string })[]>,
|
||||
process.env.USE_DB_AUTHENTICATION === "true" ? await supabaseGetJobsByCrawlId(crawlId) : [],
|
||||
]);
|
||||
|
||||
const bullJobMap = new Map<string, PseudoJob<any>>();
|
||||
const dbJobMap = new Map<string, DBJob>();
|
||||
|
||||
for (const job of bullJobs) {
|
||||
bullJobMap.set(job.id, job);
|
||||
}
|
||||
|
||||
for (const job of dbJobs) {
|
||||
dbJobMap.set(job.job_id, job);
|
||||
}
|
||||
|
||||
const jobs: PseudoJob<any>[] = [];
|
||||
|
||||
for (const id of ids) {
|
||||
const bullJob = bullJobMap.get(id);
|
||||
const dbJob = dbJobMap.get(id);
|
||||
|
||||
if (!bullJob && !dbJob) continue;
|
||||
|
||||
const data = dbJob?.docs ?? bullJob?.returnvalue;
|
||||
|
||||
const job: PseudoJob<any> = {
|
||||
id,
|
||||
getState: bullJob ? (() => bullJob.getState()) : (() => dbJob!.success ? "completed" : "failed"),
|
||||
returnvalue: Array.isArray(data)
|
||||
? data[0]
|
||||
: data,
|
||||
data: {
|
||||
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
|
||||
},
|
||||
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
|
||||
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
jobs.forEach((job) => {
|
||||
job.returnvalue = Array.isArray(job.returnvalue)
|
||||
? job.returnvalue[0]
|
||||
: job.returnvalue;
|
||||
});
|
||||
|
||||
return jobs;
|
||||
|
||||
jobs.push(job);
|
||||
}
|
||||
|
||||
return jobs;
|
||||
}
|
||||
|
||||
export async function crawlStatusController(req: Request, res: Response) {
|
||||
@ -93,8 +115,9 @@ export async function crawlStatusController(req: Request, res: Response) {
|
||||
if (
|
||||
jobs.length > 0 &&
|
||||
jobs[0].data &&
|
||||
jobs[0].data.pageOptions &&
|
||||
!jobs[0].data.pageOptions.includeRawHtml
|
||||
jobs[0].data.scrapeOptions &&
|
||||
jobs[0].data.scrapeOptions.formats &&
|
||||
!jobs[0].data.scrapeOptions.formats.includes("rawHtml")
|
||||
) {
|
||||
data.forEach((item) => {
|
||||
if (item) {
|
||||
|
@ -25,7 +25,7 @@ import { logger } from "../../lib/logger";
|
||||
import { supabase_service } from "../../services/supabase";
|
||||
configDotenv();
|
||||
|
||||
type PseudoJob<T> = {
|
||||
export type PseudoJob<T> = {
|
||||
id: string,
|
||||
getState(): Promise<JobState | "unknown"> | JobState | "unknown",
|
||||
returnvalue: T | null,
|
||||
@ -33,9 +33,10 @@ type PseudoJob<T> = {
|
||||
data: {
|
||||
scrapeOptions: any,
|
||||
},
|
||||
failedReason?: string,
|
||||
}
|
||||
|
||||
type DBJob = { docs: any, success: boolean, page_options: any, date_added: any }
|
||||
export type DBJob = { docs: any, success: boolean, page_options: any, date_added: any, message: string | null }
|
||||
|
||||
export async function getJob(id: string): Promise<PseudoJob<any> | null> {
|
||||
const [bullJob, dbJob] = await Promise.all([
|
||||
@ -57,6 +58,7 @@ export async function getJob(id: string): Promise<PseudoJob<any> | null> {
|
||||
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
|
||||
},
|
||||
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
|
||||
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
|
||||
}
|
||||
|
||||
return job;
|
||||
@ -99,6 +101,7 @@ export async function getJobs(ids: string[]): Promise<PseudoJob<any>[]> {
|
||||
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
|
||||
},
|
||||
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
|
||||
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
|
||||
}
|
||||
|
||||
jobs.push(job);
|
||||
|
Loading…
x
Reference in New Issue
Block a user