From e6738abf96380ae068310bf4802e4a2bae100bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 Aug 2024 23:39:39 +0200 Subject: [PATCH] fix(crawl-status): retrieve from DB in bulk --- apps/api/src/controllers/crawl-status.ts | 37 +++++++++++++++--------- apps/api/src/lib/supabase-jobs.ts | 18 ++++++++++++ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/apps/api/src/controllers/crawl-status.ts b/apps/api/src/controllers/crawl-status.ts index 93c463c0..b429fe9c 100644 --- a/apps/api/src/controllers/crawl-status.ts +++ b/apps/api/src/controllers/crawl-status.ts @@ -4,7 +4,28 @@ import { RateLimiterMode } from "../../src/types"; import { getScrapeQueue } from "../../src/services/queue-service"; import { Logger } from "../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; -import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; +import { supabaseGetJobsById } from "../../src/lib/supabase-jobs"; + +async function getJobs(ids: string[]) { + const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x); + + if (process.env.USE_DB_AUTHENTICATION === "true") { + const supabaseData = await supabaseGetJobsById(ids); + + supabaseData.forEach(x => { + const job = jobs.find(y => y.id === x.job_id); + if (job) { + job.returnvalue = x.docs; + } + }) + } + + jobs.forEach(job => { + job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue; + }); + + return jobs; +} export async function crawlStatusController(req: Request, res: Response) { try { @@ -28,19 +49,7 @@ export async function crawlStatusController(req: Request, res: Response) { const jobIDs = await getCrawlJobs(req.params.jobId); - const jobs = (await Promise.all(jobIDs.map(async x => { - const job = await getScrapeQueue().getJob(x); - - if (process.env.USE_DB_AUTHENTICATION === "true") { - const supabaseData = await supabaseGetJobById(job.id); - - if (supabaseData) { - job.returnvalue = supabaseData.docs; - } - } - - return job; - }))).sort((a, b) => a.timestamp - b.timestamp); + const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp); const jobStatuses = await Promise.all(jobs.map(x => x.getState())); const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active"; diff --git a/apps/api/src/lib/supabase-jobs.ts b/apps/api/src/lib/supabase-jobs.ts index 1f9531e5..b4247883 100644 --- a/apps/api/src/lib/supabase-jobs.ts +++ b/apps/api/src/lib/supabase-jobs.ts @@ -17,3 +17,21 @@ export const supabaseGetJobById = async (jobId: string) => { return data; } + +export const supabaseGetJobsById = async (jobIds: string[]) => { + const { data, error } = await supabase_service + .from('firecrawl_jobs') + .select('*') + .in('job_id', jobIds); + + if (error) { + return []; + } + + if (!data) { + return []; + } + + return data; +} +