feat(crawl): webhook

This commit is contained in:
Gergő Móricz 2024-08-16 23:42:40 +02:00
parent 086ba6280b
commit 803577eeba
5 changed files with 8 additions and 5 deletions

View File

@ -73,6 +73,7 @@ export async function crawlController(req: RequestWithAuth<{}, CrawlResponse, Cr
pageOptions: pageOptions,
origin: "api",
crawl_id: id,
webhook: req.body.webhook,
}, {
priority: 15,
});

View File

@ -78,6 +78,7 @@ export const crawlRequestSchema = z.object({
origin: z.string().optional().default("api"),
crawlerOptions: crawlerOptions.default({}),
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
webhook: z.string().url().optional(),
});
// export type CrawlRequest = {

View File

@ -152,7 +152,7 @@ async function processJob(job: Job, token: string) {
};
if (job.data.mode === "crawl") {
await callWebhook(job.data.team_id, job.id as string, data);
await callWebhook(job.data.team_id, job.id as string, data, job.data.webhook);
}
if (job.data.crawl_id) {

View File

@ -1,15 +1,15 @@
import { Logger } from "../../src/lib/logger";
import { supabase_service } from "./supabase";
export const callWebhook = async (teamId: string, jobId: string,data: any) => {
export const callWebhook = async (teamId: string, jobId: string, data: any, specified?: string) => {
try {
const selfHostedUrl = process.env.SELF_HOSTED_WEBHOOK_URL?.replace("{{JOB_ID}}", jobId);
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
let webhookUrl = selfHostedUrl;
let webhookUrl = specified ?? selfHostedUrl;
// Only fetch the webhook URL from the database if the self-hosted webhook URL is not set
// Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set
// and the USE_DB_AUTHENTICATION environment variable is set to true
if (!selfHostedUrl && useDbAuthentication) {
if (!webhookUrl && useDbAuthentication) {
const { data: webhooksData, error } = await supabase_service
.from("webhooks")
.select("url")

View File

@ -30,6 +30,7 @@ export interface WebScraperOptions {
origin?: string;
crawl_id?: string;
sitemapped?: boolean;
webhook?: string;
}
export interface RunWebScraperParams {