Added implementation for saving docs on supabase

- TODO: remove the comments on `log_job.ts` before deploying to prod
This commit is contained in:
rafaelsideguide 2024-06-26 18:23:28 -03:00
parent 3b92fb8433
commit c40da77be0
6 changed files with 64 additions and 12 deletions

View File

@ -3,6 +3,7 @@ import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types";
import { addWebScraperJob } from "../../src/services/queue-jobs";
import { getWebScraperQueue } from "../../src/services/queue-service";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
export async function crawlStatusController(req: Request, res: Response) {
try {
@ -20,15 +21,27 @@ export async function crawlStatusController(req: Request, res: Response) {
}
const { current, current_url, total, current_step, partialDocs } = await job.progress();
let data = job.returnvalue;
if (process.env.USE_DB_AUTHENTICATION) {
const supabaseData = await supabaseGetJobById(req.params.jobId);
if (supabaseData) {
data = supabaseData.docs;
}
}
const jobStatus = await job.getState();
res.json({
status: await job.getState(),
status: jobStatus,
// progress: job.progress(),
current: current,
current_url: current_url,
current_step: current_step,
total: total,
data: job.returnvalue,
partial_data: partialDocs ?? [],
current,
current_url,
current_step,
total,
data,
partial_data: jobStatus == 'completed' ? [] : partialDocs,
});
} catch (error) {
console.error(error);

View File

@ -0,0 +1,20 @@
import { supabase_service } from "../services/supabase";
export const supabaseGetJobById = async (jobId: string) => {
const { data, error } = await supabase_service
.from('firecrawl_jobs')
.select('*')
.eq('job_id', jobId)
.single();
if (error) {
console.error('Error while fetching supabase for job:', jobId, 'error:', error);
return null;
}
if (!data) {
return null;
}
return data;
}

View File

@ -4,6 +4,7 @@ import { WebScraperDataProvider } from "../scraper/WebScraper";
import { DocumentUrl, Progress } from "../lib/entities";
import { billTeam } from "../services/billing/credit_billing";
import { Document } from "../lib/entities";
import { supabase_service } from "../services/supabase";
export async function startWebScraperPipeline({
job,
@ -26,7 +27,7 @@ export async function startWebScraperPipeline({
}
},
onSuccess: (result) => {
job.moveToCompleted(result);
saveJob(job, result);
},
onError: (error) => {
job.moveToFailed(error);
@ -107,3 +108,17 @@ export async function runWebScraper({
return { success: false, message: error.message, docs: [] };
}
}
const saveJob = async (job: Job, result: any) => {
if (process.env.USE_DB_AUTHENTICATION) {
const { data, error } = await supabase_service
.from("firecrawl_jobs")
.update({ docs: result })
.eq("job_id", job.id);
job.moveToCompleted(null); // returnvalue
} else {
job.moveToCompleted(result); // returnvalue
}
}

View File

@ -7,14 +7,15 @@ import "dotenv/config";
export async function logJob(job: FirecrawlJob) {
try {
// Only log jobs in production
if (process.env.ENV !== "production") {
return;
}
// if (process.env.ENV !== "production") {
// return;
// }
const { data, error } = await supabase_service
.from("firecrawl_jobs")
.insert([
{
job_id: job.job_id ? job.job_id : null,
success: job.success,
message: job.message,
num_docs: job.num_docs,

View File

@ -41,6 +41,7 @@ getWebScraperQueue().process(
await callWebhook(job.data.team_id, job.id as string, data);
await logJob({
job_id: job.id as string,
success: success,
message: message,
num_docs: docs.length,
@ -80,6 +81,7 @@ getWebScraperQueue().process(
};
await callWebhook(job.data.team_id, job.id as string, data);
await logJob({
job_id: job.id as string,
success: false,
message: typeof error === 'string' ? error : (error.message ?? "Something went wrong... Contact help@mendable.ai"),
num_docs: 0,

View File

@ -48,6 +48,7 @@ export interface RunWebScraperResult {
}
export interface FirecrawlJob {
job_id?: string;
success: boolean;
message: string;
num_docs: number;