mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 22:49:00 +08:00
feat(v0): fix jobs
This commit is contained in:
parent
ebdf182b00
commit
0b50349fed
@ -10,16 +10,19 @@ import { configDotenv } from "dotenv";
|
|||||||
import { Job } from "bullmq";
|
import { Job } from "bullmq";
|
||||||
import { toLegacyDocument } from "../v1/types";
|
import { toLegacyDocument } from "../v1/types";
|
||||||
import type { DBJob, PseudoJob } from "../v1/crawl-status";
|
import type { DBJob, PseudoJob } from "../v1/crawl-status";
|
||||||
|
import { getJobFromGCS } from "../../lib/gcs-jobs";
|
||||||
configDotenv();
|
configDotenv();
|
||||||
|
|
||||||
export async function getJobs(crawlId: string, ids: string[]): Promise<PseudoJob<any>[]> {
|
export async function getJobs(crawlId: string, ids: string[]): Promise<PseudoJob<any>[]> {
|
||||||
const [bullJobs, dbJobs] = await Promise.all([
|
const [bullJobs, dbJobs, gcsJobs] = await Promise.all([
|
||||||
Promise.all(ids.map((x) => getScrapeQueue().getJob(x))).then(x => x.filter(x => x)) as Promise<(Job<any, any, string> & { id: string })[]>,
|
Promise.all(ids.map((x) => getScrapeQueue().getJob(x))).then(x => x.filter(x => x)) as Promise<(Job<any, any, string> & { id: string })[]>,
|
||||||
process.env.USE_DB_AUTHENTICATION === "true" ? await supabaseGetJobsByCrawlId(crawlId) : [],
|
process.env.USE_DB_AUTHENTICATION === "true" ? await supabaseGetJobsByCrawlId(crawlId) : [],
|
||||||
|
process.env.GCS_BUCKET_NAME ? Promise.all(ids.map(async (x) => ({ id: x, job: await getJobFromGCS(x) }))).then(x => x.filter(x => x.job)) as Promise<({ id: string, job: any | null })[]> : [],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const bullJobMap = new Map<string, PseudoJob<any>>();
|
const bullJobMap = new Map<string, PseudoJob<any>>();
|
||||||
const dbJobMap = new Map<string, DBJob>();
|
const dbJobMap = new Map<string, DBJob>();
|
||||||
|
const gcsJobMap = new Map<string, any>();
|
||||||
|
|
||||||
for (const job of bullJobs) {
|
for (const job of bullJobs) {
|
||||||
bullJobMap.set(job.id, job);
|
bullJobMap.set(job.id, job);
|
||||||
@ -29,15 +32,25 @@ export async function getJobs(crawlId: string, ids: string[]): Promise<PseudoJob
|
|||||||
dbJobMap.set(job.job_id, job);
|
dbJobMap.set(job.job_id, job);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const job of gcsJobs) {
|
||||||
|
gcsJobMap.set(job.id, job.job);
|
||||||
|
}
|
||||||
|
|
||||||
const jobs: PseudoJob<any>[] = [];
|
const jobs: PseudoJob<any>[] = [];
|
||||||
|
|
||||||
for (const id of ids) {
|
for (const id of ids) {
|
||||||
const bullJob = bullJobMap.get(id);
|
const bullJob = bullJobMap.get(id);
|
||||||
const dbJob = dbJobMap.get(id);
|
const dbJob = dbJobMap.get(id);
|
||||||
|
const gcsJob = gcsJobMap.get(id);
|
||||||
|
|
||||||
if (!bullJob && !dbJob) continue;
|
if (!bullJob && !dbJob) continue;
|
||||||
|
|
||||||
const data = dbJob?.docs ?? bullJob?.returnvalue;
|
const data = gcsJob ?? dbJob?.docs ?? bullJob?.returnvalue;
|
||||||
|
if (gcsJob === null && data) {
|
||||||
|
logger.warn("GCS Job not found", {
|
||||||
|
jobId: id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const job: PseudoJob<any> = {
|
const job: PseudoJob<any> = {
|
||||||
id,
|
id,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user