From ec361609d2343f77b3f3efdf03323ffd87a1b356 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 15 Aug 2024 18:37:19 -0400 Subject: [PATCH 1/6] Nick: added growth-2x plan --- apps/api/src/controllers/auth.ts | 2 ++ apps/api/src/services/rate-limiter.ts | 3 +++ 2 files changed, 5 insertions(+) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index e18a8a7c..cb9cacde 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -336,6 +336,8 @@ function getPlanByPriceId(price_id: string) { case process.env.STRIPE_PRICE_ID_GROWTH: case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY: return "growth"; + case process.env.STRIPE_PRICE_ID_GROWTH_DOUBLE_MONTHLY: + return "growthdouble"; default: return "free"; } diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index d90ab4f7..05fb102c 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -14,6 +14,7 @@ const RATE_LIMITS = { standardNew: 10, standardnew: 10, growth: 50, + growthdouble: 50, }, scrape: { default: 20, @@ -26,6 +27,7 @@ const RATE_LIMITS = { standardNew: 50, standardnew: 50, growth: 500, + growthdouble: 500, }, search: { default: 20, @@ -38,6 +40,7 @@ const RATE_LIMITS = { standardNew: 50, standardnew: 50, growth: 500, + growthdouble: 500, }, preview: { free: 5, From 5a6570cba21a4e62b0a0f2b30691410d9884b4a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 Aug 2024 17:42:42 +0200 Subject: [PATCH 2/6] fix(webhooks): call back with parent crawl ID --- apps/api/src/services/queue-worker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index fdbbbb36..dedb6ac8 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -260,7 +260,7 @@ async function processJob(job: Job, token: string) { docs: fullDocs, }; - await callWebhook(job.data.team_id, job.id as string, data); + await callWebhook(job.data.team_id, job.data.crawl_id, data); } } From 6bd52e63bfe36ea9a43f8f3bddb4af4b5011d882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 Aug 2024 18:42:24 +0200 Subject: [PATCH 3/6] fix(queue-worker): fix linksOnPage undefined error --- apps/api/src/services/queue-worker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index dedb6ac8..de53b495 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -174,7 +174,7 @@ async function processJob(job: Job, token: string) { if (!sc.cancelled) { const crawler = crawlToCrawler(job.data.crawl_id, sc); - const links = crawler.filterLinks((data.docs[0].linksOnPage as string[]) + const links = crawler.filterLinks((data.docs[0].linksOnPage ?? []) .map(href => crawler.filterURL(href.trim(), sc.originUrl)) .filter(x => x !== null), Infinity, From d0a8382a5be1b4815aef2050776a53c54de95e6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 Aug 2024 18:48:52 +0200 Subject: [PATCH 4/6] fix(queue-worker): crawl finishing race condition --- apps/api/src/lib/crawl-redis.ts | 10 ++++++++++ apps/api/src/services/queue-worker.ts | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts index 88d6b716..9e8a0cf6 100644 --- a/apps/api/src/lib/crawl-redis.ts +++ b/apps/api/src/lib/crawl-redis.ts @@ -45,6 +45,16 @@ export async function isCrawlFinished(id: string) { return (await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs")); } +export async function finishCrawl(id: string) { + if (await isCrawlFinished(id)) { + const set = await redisConnection.setnx("crawl:" + id + ":finish", "yes"); + if (set === 1) { + await redisConnection.expire("crawl:" + id + ":finish", 24 * 60 * 60); + } + return set === 1 + } +} + export async function getCrawlJobs(id: string): Promise { return await redisConnection.smembers("crawl:" + id + ":jobs"); } diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index de53b495..089e0aa7 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -15,7 +15,7 @@ import { Logger } from "../lib/logger"; import { Worker } from "bullmq"; import systemMonitor from "./system-monitor"; import { v4 as uuidv4 } from "uuid"; -import { addCrawlJob, addCrawlJobDone, crawlToCrawler, getCrawl, getCrawlJobs, isCrawlFinished, lockURL } from "../lib/crawl-redis"; +import { addCrawlJob, addCrawlJobDone, crawlToCrawler, finishCrawl, getCrawl, getCrawlJobs, isCrawlFinished, lockURL } from "../lib/crawl-redis"; import { StoredCrawl } from "../lib/crawl-redis"; import { addScrapeJob } from "./queue-jobs"; import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; @@ -199,7 +199,7 @@ async function processJob(job: Job, token: string) { } } - if (await isCrawlFinished(job.data.crawl_id)) { + if (await finishCrawl(job.data.crawl_id)) { const jobIDs = await getCrawlJobs(job.data.crawl_id); const jobs = (await Promise.all(jobIDs.map(async x => { From 6e549422654a11b6b0dea1146e92d53d389bc0ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 Aug 2024 19:11:53 +0200 Subject: [PATCH 5/6] fix(queue-worker): add cancelled to crawl log --- apps/api/src/services/queue-worker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 089e0aa7..7be12dad 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -233,7 +233,7 @@ async function processJob(job: Job, token: string) { await logJob({ job_id: job.data.crawl_id, success: jobStatus === "completed", - message: message, + message: sc.cancelled ? "Cancelled" : message, num_docs: fullDocs.length, docs: [], time_taken: (Date.now() - sc.createdAt) / 1000, From fd6432e7fd516449363241e04b3a6c07c145cbbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 Aug 2024 19:16:08 +0200 Subject: [PATCH 6/6] fix(queue-worker): correct job success --- apps/api/src/services/queue-worker.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 7be12dad..c15201be 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -15,7 +15,7 @@ import { Logger } from "../lib/logger"; import { Worker } from "bullmq"; import systemMonitor from "./system-monitor"; import { v4 as uuidv4 } from "uuid"; -import { addCrawlJob, addCrawlJobDone, crawlToCrawler, finishCrawl, getCrawl, getCrawlJobs, isCrawlFinished, lockURL } from "../lib/crawl-redis"; +import { addCrawlJob, addCrawlJobDone, crawlToCrawler, finishCrawl, getCrawl, getCrawlJobs, lockURL } from "../lib/crawl-redis"; import { StoredCrawl } from "../lib/crawl-redis"; import { addScrapeJob } from "./queue-jobs"; import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; @@ -226,7 +226,7 @@ async function processJob(job: Job, token: string) { return j; }))).sort((a, b) => a.timestamp - b.timestamp); const jobStatuses = await Promise.all(jobs.map(x => x.getState())); - const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active"; + const jobStatus = sc.cancelled || jobStatuses.some(x => x === "failed") ? "failed" : "completed"; const fullDocs = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);