diff --git a/apps/api/src/controllers/crawl-status.ts b/apps/api/src/controllers/crawl-status.ts index feda86c0..448c119c 100644 --- a/apps/api/src/controllers/crawl-status.ts +++ b/apps/api/src/controllers/crawl-status.ts @@ -3,6 +3,7 @@ import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../src/types"; import { addWebScraperJob } from "../../src/services/queue-jobs"; import { getWebScraperQueue } from "../../src/services/queue-service"; +import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; export async function crawlStatusController(req: Request, res: Response) { try { @@ -20,15 +21,27 @@ export async function crawlStatusController(req: Request, res: Response) { } const { current, current_url, total, current_step, partialDocs } = await job.progress(); + + let data = job.returnvalue; + if (process.env.USE_DB_AUTHENTICATION) { + const supabaseData = await supabaseGetJobById(req.params.jobId); + + if (supabaseData) { + data = supabaseData.docs; + } + } + + const jobStatus = await job.getState(); + res.json({ - status: await job.getState(), + status: jobStatus, // progress: job.progress(), - current: current, - current_url: current_url, - current_step: current_step, - total: total, - data: job.returnvalue, - partial_data: partialDocs ?? [], + current, + current_url, + current_step, + total, + data, + partial_data: jobStatus == 'completed' ? [] : partialDocs, }); } catch (error) { console.error(error); diff --git a/apps/api/src/lib/supabase-jobs.ts b/apps/api/src/lib/supabase-jobs.ts new file mode 100644 index 00000000..4dab0aea --- /dev/null +++ b/apps/api/src/lib/supabase-jobs.ts @@ -0,0 +1,20 @@ +import { supabase_service } from "../services/supabase"; + +export const supabaseGetJobById = async (jobId: string) => { + const { data, error } = await supabase_service + .from('firecrawl_jobs') + .select('*') + .eq('job_id', jobId) + .single(); + + if (error) { + console.error('Error while fetching supabase for job:', jobId, 'error:', error); + return null; + } + + if (!data) { + return null; + } + + return data; +} diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index e97e1413..57d6f322 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -4,6 +4,7 @@ import { WebScraperDataProvider } from "../scraper/WebScraper"; import { DocumentUrl, Progress } from "../lib/entities"; import { billTeam } from "../services/billing/credit_billing"; import { Document } from "../lib/entities"; +import { supabase_service } from "../services/supabase"; export async function startWebScraperPipeline({ job, @@ -26,7 +27,7 @@ export async function startWebScraperPipeline({ } }, onSuccess: (result) => { - job.moveToCompleted(result); + saveJob(job, result); }, onError: (error) => { job.moveToFailed(error); @@ -107,3 +108,17 @@ export async function runWebScraper({ return { success: false, message: error.message, docs: [] }; } } + +const saveJob = async (job: Job, result: any) => { + if (process.env.USE_DB_AUTHENTICATION) { + const { data, error } = await supabase_service + .from("firecrawl_jobs") + .update({ docs: result }) + .eq("job_id", job.id); + + job.moveToCompleted(null); // returnvalue + } else { + job.moveToCompleted(result); // returnvalue + } +} + diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index 6cb6ec3b..d4810770 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -7,14 +7,15 @@ import "dotenv/config"; export async function logJob(job: FirecrawlJob) { try { // Only log jobs in production - if (process.env.ENV !== "production") { - return; - } - + // if (process.env.ENV !== "production") { + // return; + // } + const { data, error } = await supabase_service .from("firecrawl_jobs") .insert([ { + job_id: job.job_id ? job.job_id : null, success: job.success, message: job.message, num_docs: job.num_docs, diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index f6328cf3..228d02f5 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -41,6 +41,7 @@ getWebScraperQueue().process( await callWebhook(job.data.team_id, job.id as string, data); await logJob({ + job_id: job.id as string, success: success, message: message, num_docs: docs.length, @@ -80,6 +81,7 @@ getWebScraperQueue().process( }; await callWebhook(job.data.team_id, job.id as string, data); await logJob({ + job_id: job.id as string, success: false, message: typeof error === 'string' ? error : (error.message ?? "Something went wrong... Contact help@mendable.ai"), num_docs: 0, diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index e69353b6..cb742537 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -48,6 +48,7 @@ export interface RunWebScraperResult { } export interface FirecrawlJob { + job_id?: string; success: boolean; message: string; num_docs: number;