fix(crawl): call webhooks

This commit is contained in:
Gergo Moricz 2024-08-13 22:10:17 +02:00
parent a33596de3c
commit 2e5e480cc2
2 changed files with 18 additions and 3 deletions

View File

@ -30,6 +30,15 @@ export async function addCrawlJob(id: string, job_id: string) {
await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
}
export async function addCrawlJobDone(id: string, job_id: string) {
await redisConnection.sadd("crawl:" + id + ":jobs_done", job_id);
await redisConnection.expire("crawl:" + id + ":jobs_done", 24 * 60 * 60, "NX");
}
export async function isCrawlFinished(id: string) {
return (await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs"));
}
export async function getCrawlJobs(id: string): Promise<string[]> {
return await redisConnection.smembers("crawl:" + id + ":jobs");
}

View File

@ -18,7 +18,7 @@ import systemMonitor from "./system-monitor";
import { v4 as uuidv4 } from "uuid";
import { WebCrawler } from "../scraper/WebScraper/crawler";
import { getAdjustedMaxDepth } from "../scraper/WebScraper/utils/maxDepthUtils";
import { addCrawlJob, crawlToCrawler, getCrawl, lockURL } from "../lib/crawl-redis";
import { addCrawlJob, addCrawlJobDone, crawlToCrawler, getCrawl, isCrawlFinished, lockURL } from "../lib/crawl-redis";
import { StoredCrawl } from "../lib/crawl-redis";
import { addScrapeJob } from "./queue-jobs";
@ -168,6 +168,8 @@ async function processJob(job: Job, token: string) {
});
if (job.data.crawl_id) {
await addCrawlJobDone(job.data.crawl_id, job.id);
if (!job.data.sitemapped) {
const sc = await getCrawl(job.data.crawl_id) as StoredCrawl;
@ -198,6 +200,10 @@ async function processJob(job: Job, token: string) {
}
}
}
if (await isCrawlFinished(job.data.crawl_id)) {
await callWebhook(job.data.team_id, job.id as string, data);
}
}
Logger.info(`🐂 Job done ${job.id}`);
@ -229,8 +235,8 @@ async function processJob(job: Job, token: string) {
error:
"Something went wrong... Contact help@mendable.ai or try again." /* etc... */,
};
if (job.data.mode === "crawl") {
await callWebhook(job.data.team_id, job.id as string, data);
if (job.data.mode === "crawl" || job.data.crawl_id) {
await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data);
}
await logJob({
job_id: job.id as string,