mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-10 00:09:02 +08:00
Added implementation for saving docs on supabase
- TODO: remove the comments on `log_job.ts` before deploying to prod
This commit is contained in:
parent
3b92fb8433
commit
c40da77be0
@ -3,6 +3,7 @@ import { authenticateUser } from "./auth";
|
||||
import { RateLimiterMode } from "../../src/types";
|
||||
import { addWebScraperJob } from "../../src/services/queue-jobs";
|
||||
import { getWebScraperQueue } from "../../src/services/queue-service";
|
||||
import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
|
||||
|
||||
export async function crawlStatusController(req: Request, res: Response) {
|
||||
try {
|
||||
@ -20,15 +21,27 @@ export async function crawlStatusController(req: Request, res: Response) {
|
||||
}
|
||||
|
||||
const { current, current_url, total, current_step, partialDocs } = await job.progress();
|
||||
|
||||
let data = job.returnvalue;
|
||||
if (process.env.USE_DB_AUTHENTICATION) {
|
||||
const supabaseData = await supabaseGetJobById(req.params.jobId);
|
||||
|
||||
if (supabaseData) {
|
||||
data = supabaseData.docs;
|
||||
}
|
||||
}
|
||||
|
||||
const jobStatus = await job.getState();
|
||||
|
||||
res.json({
|
||||
status: await job.getState(),
|
||||
status: jobStatus,
|
||||
// progress: job.progress(),
|
||||
current: current,
|
||||
current_url: current_url,
|
||||
current_step: current_step,
|
||||
total: total,
|
||||
data: job.returnvalue,
|
||||
partial_data: partialDocs ?? [],
|
||||
current,
|
||||
current_url,
|
||||
current_step,
|
||||
total,
|
||||
data,
|
||||
partial_data: jobStatus == 'completed' ? [] : partialDocs,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
|
20
apps/api/src/lib/supabase-jobs.ts
Normal file
20
apps/api/src/lib/supabase-jobs.ts
Normal file
@ -0,0 +1,20 @@
|
||||
import { supabase_service } from "../services/supabase";
|
||||
|
||||
export const supabaseGetJobById = async (jobId: string) => {
|
||||
const { data, error } = await supabase_service
|
||||
.from('firecrawl_jobs')
|
||||
.select('*')
|
||||
.eq('job_id', jobId)
|
||||
.single();
|
||||
|
||||
if (error) {
|
||||
console.error('Error while fetching supabase for job:', jobId, 'error:', error);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!data) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
@ -4,6 +4,7 @@ import { WebScraperDataProvider } from "../scraper/WebScraper";
|
||||
import { DocumentUrl, Progress } from "../lib/entities";
|
||||
import { billTeam } from "../services/billing/credit_billing";
|
||||
import { Document } from "../lib/entities";
|
||||
import { supabase_service } from "../services/supabase";
|
||||
|
||||
export async function startWebScraperPipeline({
|
||||
job,
|
||||
@ -26,7 +27,7 @@ export async function startWebScraperPipeline({
|
||||
}
|
||||
},
|
||||
onSuccess: (result) => {
|
||||
job.moveToCompleted(result);
|
||||
saveJob(job, result);
|
||||
},
|
||||
onError: (error) => {
|
||||
job.moveToFailed(error);
|
||||
@ -107,3 +108,17 @@ export async function runWebScraper({
|
||||
return { success: false, message: error.message, docs: [] };
|
||||
}
|
||||
}
|
||||
|
||||
const saveJob = async (job: Job, result: any) => {
|
||||
if (process.env.USE_DB_AUTHENTICATION) {
|
||||
const { data, error } = await supabase_service
|
||||
.from("firecrawl_jobs")
|
||||
.update({ docs: result })
|
||||
.eq("job_id", job.id);
|
||||
|
||||
job.moveToCompleted(null); // returnvalue
|
||||
} else {
|
||||
job.moveToCompleted(result); // returnvalue
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,14 +7,15 @@ import "dotenv/config";
|
||||
export async function logJob(job: FirecrawlJob) {
|
||||
try {
|
||||
// Only log jobs in production
|
||||
if (process.env.ENV !== "production") {
|
||||
return;
|
||||
}
|
||||
|
||||
// if (process.env.ENV !== "production") {
|
||||
// return;
|
||||
// }
|
||||
|
||||
const { data, error } = await supabase_service
|
||||
.from("firecrawl_jobs")
|
||||
.insert([
|
||||
{
|
||||
job_id: job.job_id ? job.job_id : null,
|
||||
success: job.success,
|
||||
message: job.message,
|
||||
num_docs: job.num_docs,
|
||||
|
@ -41,6 +41,7 @@ getWebScraperQueue().process(
|
||||
await callWebhook(job.data.team_id, job.id as string, data);
|
||||
|
||||
await logJob({
|
||||
job_id: job.id as string,
|
||||
success: success,
|
||||
message: message,
|
||||
num_docs: docs.length,
|
||||
@ -80,6 +81,7 @@ getWebScraperQueue().process(
|
||||
};
|
||||
await callWebhook(job.data.team_id, job.id as string, data);
|
||||
await logJob({
|
||||
job_id: job.id as string,
|
||||
success: false,
|
||||
message: typeof error === 'string' ? error : (error.message ?? "Something went wrong... Contact help@mendable.ai"),
|
||||
num_docs: 0,
|
||||
|
@ -48,6 +48,7 @@ export interface RunWebScraperResult {
|
||||
}
|
||||
|
||||
export interface FirecrawlJob {
|
||||
job_id?: string;
|
||||
success: boolean;
|
||||
message: string;
|
||||
num_docs: number;
|
||||
|
Loading…
x
Reference in New Issue
Block a user