fix(crawl-status): retrieve from DB in bulk

This commit is contained in:
Gergő Móricz 2024-08-16 23:39:39 +02:00
parent 78ca94251c
commit e6738abf96
2 changed files with 41 additions and 14 deletions

View File

@ -4,7 +4,28 @@ import { RateLimiterMode } from "../../src/types";
import { getScrapeQueue } from "../../src/services/queue-service";
import { Logger } from "../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
import { supabaseGetJobsById } from "../../src/lib/supabase-jobs";
async function getJobs(ids: string[]) {
const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
if (process.env.USE_DB_AUTHENTICATION === "true") {
const supabaseData = await supabaseGetJobsById(ids);
supabaseData.forEach(x => {
const job = jobs.find(y => y.id === x.job_id);
if (job) {
job.returnvalue = x.docs;
}
})
}
jobs.forEach(job => {
job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
});
return jobs;
}
export async function crawlStatusController(req: Request, res: Response) {
try {
@ -28,19 +49,7 @@ export async function crawlStatusController(req: Request, res: Response) {
const jobIDs = await getCrawlJobs(req.params.jobId);
const jobs = (await Promise.all(jobIDs.map(async x => {
const job = await getScrapeQueue().getJob(x);
if (process.env.USE_DB_AUTHENTICATION === "true") {
const supabaseData = await supabaseGetJobById(job.id);
if (supabaseData) {
job.returnvalue = supabaseData.docs;
}
}
return job;
}))).sort((a, b) => a.timestamp - b.timestamp);
const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";

View File

@ -17,3 +17,21 @@ export const supabaseGetJobById = async (jobId: string) => {
return data;
}
export const supabaseGetJobsById = async (jobIds: string[]) => {
const { data, error } = await supabase_service
.from('firecrawl_jobs')
.select('*')
.in('job_id', jobIds);
if (error) {
return [];
}
if (!data) {
return [];
}
return data;
}