feat(crawl-status): same for v0

This commit is contained in:
Gergő Móricz 2025-01-23 19:39:33 +01:00
parent 95ce3c3b71
commit a7b56ab87c
2 changed files with 52 additions and 26 deletions

View File

@ -9,31 +9,53 @@ import * as Sentry from "@sentry/node";
import { configDotenv } from "dotenv"; import { configDotenv } from "dotenv";
import { Job } from "bullmq"; import { Job } from "bullmq";
import { toLegacyDocument } from "../v1/types"; import { toLegacyDocument } from "../v1/types";
import type { DBJob, PseudoJob } from "../v1/crawl-status";
configDotenv(); configDotenv();
export async function getJobs(crawlId: string, ids: string[]) { export async function getJobs(crawlId: string, ids: string[]): Promise<PseudoJob<any>[]> {
const jobs = ( const [bullJobs, dbJobs] = await Promise.all([
await Promise.all(ids.map((x) => getScrapeQueue().getJob(x))) Promise.all(ids.map((x) => getScrapeQueue().getJob(x))).then(x => x.filter(x => x)) as Promise<(Job<any, any, string> & { id: string })[]>,
).filter((x) => x) as Job[]; process.env.USE_DB_AUTHENTICATION === "true" ? await supabaseGetJobsByCrawlId(crawlId) : [],
]);
if (process.env.USE_DB_AUTHENTICATION === "true") { const bullJobMap = new Map<string, PseudoJob<any>>();
const supabaseData = await supabaseGetJobsByCrawlId(crawlId); const dbJobMap = new Map<string, DBJob>();
supabaseData.forEach((x) => { for (const job of bullJobs) {
const job = jobs.find((y) => y.id === x.job_id); bullJobMap.set(job.id, job);
if (job) { }
job.returnvalue = x.docs;
for (const job of dbJobs) {
dbJobMap.set(job.job_id, job);
}
const jobs: PseudoJob<any>[] = [];
for (const id of ids) {
const bullJob = bullJobMap.get(id);
const dbJob = dbJobMap.get(id);
if (!bullJob && !dbJob) continue;
const data = dbJob?.docs ?? bullJob?.returnvalue;
const job: PseudoJob<any> = {
id,
getState: bullJob ? (() => bullJob.getState()) : (() => dbJob!.success ? "completed" : "failed"),
returnvalue: Array.isArray(data)
? data[0]
: data,
data: {
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
},
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
} }
});
}
jobs.forEach((job) => { jobs.push(job);
job.returnvalue = Array.isArray(job.returnvalue) }
? job.returnvalue[0]
: job.returnvalue;
});
return jobs; return jobs;
} }
export async function crawlStatusController(req: Request, res: Response) { export async function crawlStatusController(req: Request, res: Response) {
@ -93,8 +115,9 @@ export async function crawlStatusController(req: Request, res: Response) {
if ( if (
jobs.length > 0 && jobs.length > 0 &&
jobs[0].data && jobs[0].data &&
jobs[0].data.pageOptions && jobs[0].data.scrapeOptions &&
!jobs[0].data.pageOptions.includeRawHtml jobs[0].data.scrapeOptions.formats &&
!jobs[0].data.scrapeOptions.formats.includes("rawHtml")
) { ) {
data.forEach((item) => { data.forEach((item) => {
if (item) { if (item) {

View File

@ -25,7 +25,7 @@ import { logger } from "../../lib/logger";
import { supabase_service } from "../../services/supabase"; import { supabase_service } from "../../services/supabase";
configDotenv(); configDotenv();
type PseudoJob<T> = { export type PseudoJob<T> = {
id: string, id: string,
getState(): Promise<JobState | "unknown"> | JobState | "unknown", getState(): Promise<JobState | "unknown"> | JobState | "unknown",
returnvalue: T | null, returnvalue: T | null,
@ -33,9 +33,10 @@ type PseudoJob<T> = {
data: { data: {
scrapeOptions: any, scrapeOptions: any,
}, },
failedReason?: string,
} }
type DBJob = { docs: any, success: boolean, page_options: any, date_added: any } export type DBJob = { docs: any, success: boolean, page_options: any, date_added: any, message: string | null }
export async function getJob(id: string): Promise<PseudoJob<any> | null> { export async function getJob(id: string): Promise<PseudoJob<any> | null> {
const [bullJob, dbJob] = await Promise.all([ const [bullJob, dbJob] = await Promise.all([
@ -57,6 +58,7 @@ export async function getJob(id: string): Promise<PseudoJob<any> | null> {
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options, scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
}, },
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(), timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
} }
return job; return job;
@ -99,6 +101,7 @@ export async function getJobs(ids: string[]): Promise<PseudoJob<any>[]> {
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options, scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
}, },
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(), timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
} }
jobs.push(job); jobs.push(job);