feat(batch/scrape): restructure logs, add webhooks

This commit is contained in:
Gergő Móricz 2024-10-23 21:55:21 +02:00
parent e3cb00990a
commit 1da6360b77
2 changed files with 13 additions and 11 deletions

View File

@ -329,7 +329,8 @@ async function processJob(job: Job, token: string) {
job.id as string,
data,
job.data.webhook,
job.data.v1
job.data.v1,
job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
);
}
if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) {
@ -339,7 +340,7 @@ async function processJob(job: Job, token: string) {
data,
job.data.webhook,
job.data.v1,
"crawl.page",
job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
true
);
}
@ -353,7 +354,7 @@ async function processJob(job: Job, token: string) {
docs: docs,
time_taken: timeTakenInSeconds,
team_id: job.data.team_id,
mode: job.data.crawlerOptions === null ? "batch_scrape" : job.data.mode,
mode: job.data.mode,
url: job.data.url,
crawlerOptions: job.data.crawlerOptions,
pageOptions: job.data.pageOptions,
@ -414,7 +415,7 @@ async function processJob(job: Job, token: string) {
}
}
if (await finishCrawl(job.data.crawl_id) && job.data.crawlerOptions !== null) {
if (await finishCrawl(job.data.crawl_id)) {
if (!job.data.v1) {
const jobIDs = await getCrawlJobs(job.data.crawl_id);
@ -437,7 +438,7 @@ async function processJob(job: Job, token: string) {
docs: [],
time_taken: (Date.now() - sc.createdAt) / 1000,
team_id: job.data.team_id,
mode: "crawl",
mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
url: sc.originUrl,
crawlerOptions: sc.crawlerOptions,
pageOptions: sc.pageOptions,
@ -467,7 +468,7 @@ async function processJob(job: Job, token: string) {
data,
job.data.webhook,
job.data.v1,
"crawl.completed"
job.data.crawlerOptions !== null ? "crawl.completed" : "batch_scrape.completed"
);
}
} else {
@ -485,7 +486,7 @@ async function processJob(job: Job, token: string) {
[],
job.data.webhook,
job.data.v1,
"crawl.completed"
job.data.crawlerOptions !== null ? "crawl.completed" : "batch_scrape.completed"
);
}
@ -497,7 +498,7 @@ async function processJob(job: Job, token: string) {
docs: [],
time_taken: (Date.now() - sc.createdAt) / 1000,
team_id: job.data.team_id,
mode: "crawl",
mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
url: sc.originUrl,
crawlerOptions: sc.crawlerOptions,
pageOptions: sc.pageOptions,
@ -554,7 +555,8 @@ async function processJob(job: Job, token: string) {
job.data.crawl_id ?? (job.id as string),
data,
job.data.webhook,
job.data.v1
job.data.v1,
job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
);
}
// if (job.data.v1) {
@ -603,7 +605,7 @@ async function processJob(job: Job, token: string) {
docs: [],
time_taken: 0,
team_id: job.data.team_id,
mode: "crawl",
mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
url: sc ? sc.originUrl : job.data.url,
crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions,
pageOptions: sc ? sc.pageOptions : job.data.pageOptions,

View File

@ -159,4 +159,4 @@ export type PlanType =
| "";
export type WebhookEventType = "crawl.page" | "crawl.started" | "crawl.completed" | "crawl.failed";
export type WebhookEventType = "crawl.page" | "batch_scrape.page" | "crawl.started" | "crawl.completed" | "batch_scrape.completed" | "crawl.failed";