From c44694230601a3a701992b70d3c98132365e9197 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 29 Jul 2024 21:28:29 -0400 Subject: [PATCH 01/65] Nick: --- apps/api/package.json | 2 +- apps/api/pnpm-lock.yaml | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/apps/api/package.json b/apps/api/package.json index 15e97377..a4e10b93 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -53,7 +53,7 @@ "@bull-board/express": "^5.20.5", "@devil7softwares/pos": "^1.0.2", "@dqbd/tiktoken": "^1.0.13", - "@hyperdx/node-opentelemetry": "^0.8.0", + "@hyperdx/node-opentelemetry": "^0.8.1", "@logtail/node": "^0.4.12", "@nangohq/node": "^0.40.8", "@sentry/node": "^8.13.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index ec83e18b..56a86be7 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -27,8 +27,8 @@ importers: specifier: ^1.0.13 version: 1.0.15 '@hyperdx/node-opentelemetry': - specifier: ^0.8.0 - version: 0.8.0 + specifier: ^0.8.1 + version: 0.8.1 '@logtail/node': specifier: ^0.4.12 version: 0.4.21 @@ -496,8 +496,8 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@hyperdx/node-opentelemetry@0.8.0': - resolution: {integrity: sha512-2z1jQqg2czctHOgo17WETUJOX2BJJ2jqg50R/z4o4ADRCS7Ynp4n3eVMLtsJHypQeDdrInUDE0VtVoXN5b+6hw==} + '@hyperdx/node-opentelemetry@0.8.1': + resolution: {integrity: sha512-wNw0yQf54j/9KXVWeEOu8G6C5FT5EFlrz4dcmscTkwCvo6fQOLRZa/NbGcqugt0LSFMc0/6/Q5RDWVqDpEn0LQ==} hasBin: true '@ioredis/commands@1.2.0': @@ -4935,7 +4935,7 @@ snapshots: transitivePeerDependencies: - supports-color - '@hyperdx/node-opentelemetry@0.8.0': + '@hyperdx/node-opentelemetry@0.8.1': dependencies: '@hyperdx/instrumentation-exception': 0.1.0(@opentelemetry/api@1.9.0) '@hyperdx/instrumentation-sentry-node': 0.1.0(@opentelemetry/api@1.9.0) @@ -4960,6 +4960,7 @@ snapshots: lodash.isobject: 3.0.2 lodash.isplainobject: 4.0.6 lodash.isstring: 4.0.1 + node-fetch: 2.7.0 open: 8.4.2 ora: 5.4.1 pino-abstract-transport: 1.2.0 From e6738abf96380ae068310bf4802e4a2bae100bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 Aug 2024 23:39:39 +0200 Subject: [PATCH 02/65] fix(crawl-status): retrieve from DB in bulk --- apps/api/src/controllers/crawl-status.ts | 37 +++++++++++++++--------- apps/api/src/lib/supabase-jobs.ts | 18 ++++++++++++ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/apps/api/src/controllers/crawl-status.ts b/apps/api/src/controllers/crawl-status.ts index 93c463c0..b429fe9c 100644 --- a/apps/api/src/controllers/crawl-status.ts +++ b/apps/api/src/controllers/crawl-status.ts @@ -4,7 +4,28 @@ import { RateLimiterMode } from "../../src/types"; import { getScrapeQueue } from "../../src/services/queue-service"; import { Logger } from "../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; -import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; +import { supabaseGetJobsById } from "../../src/lib/supabase-jobs"; + +async function getJobs(ids: string[]) { + const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x); + + if (process.env.USE_DB_AUTHENTICATION === "true") { + const supabaseData = await supabaseGetJobsById(ids); + + supabaseData.forEach(x => { + const job = jobs.find(y => y.id === x.job_id); + if (job) { + job.returnvalue = x.docs; + } + }) + } + + jobs.forEach(job => { + job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue; + }); + + return jobs; +} export async function crawlStatusController(req: Request, res: Response) { try { @@ -28,19 +49,7 @@ export async function crawlStatusController(req: Request, res: Response) { const jobIDs = await getCrawlJobs(req.params.jobId); - const jobs = (await Promise.all(jobIDs.map(async x => { - const job = await getScrapeQueue().getJob(x); - - if (process.env.USE_DB_AUTHENTICATION === "true") { - const supabaseData = await supabaseGetJobById(job.id); - - if (supabaseData) { - job.returnvalue = supabaseData.docs; - } - } - - return job; - }))).sort((a, b) => a.timestamp - b.timestamp); + const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp); const jobStatuses = await Promise.all(jobs.map(x => x.getState())); const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active"; diff --git a/apps/api/src/lib/supabase-jobs.ts b/apps/api/src/lib/supabase-jobs.ts index 1f9531e5..b4247883 100644 --- a/apps/api/src/lib/supabase-jobs.ts +++ b/apps/api/src/lib/supabase-jobs.ts @@ -17,3 +17,21 @@ export const supabaseGetJobById = async (jobId: string) => { return data; } + +export const supabaseGetJobsById = async (jobIds: string[]) => { + const { data, error } = await supabase_service + .from('firecrawl_jobs') + .select('*') + .in('job_id', jobIds); + + if (error) { + return []; + } + + if (!data) { + return []; + } + + return data; +} + From c281fe62c015f1f14de77c2f3b59b44c8a56ddac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 Aug 2024 23:43:54 +0200 Subject: [PATCH 03/65] fix(crawl): propagate db fix to preview endpoint --- apps/api/src/controllers/crawl-status.ts | 2 +- apps/api/src/controllers/status.ts | 15 ++------------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/apps/api/src/controllers/crawl-status.ts b/apps/api/src/controllers/crawl-status.ts index b429fe9c..3488ce26 100644 --- a/apps/api/src/controllers/crawl-status.ts +++ b/apps/api/src/controllers/crawl-status.ts @@ -6,7 +6,7 @@ import { Logger } from "../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; import { supabaseGetJobsById } from "../../src/lib/supabase-jobs"; -async function getJobs(ids: string[]) { +export async function getJobs(ids: string[]) { const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x); if (process.env.USE_DB_AUTHENTICATION === "true") { diff --git a/apps/api/src/controllers/status.ts b/apps/api/src/controllers/status.ts index 21a9cf47..e469060f 100644 --- a/apps/api/src/controllers/status.ts +++ b/apps/api/src/controllers/status.ts @@ -3,6 +3,7 @@ import { Logger } from "../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; import { getScrapeQueue } from "../../src/services/queue-service"; import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; +import { getJobs } from "./crawl-status"; export async function crawlJobStatusPreviewController(req: Request, res: Response) { try { @@ -22,19 +23,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons // } // } - const jobs = (await Promise.all(jobIDs.map(async x => { - const job = await getScrapeQueue().getJob(x); - - if (process.env.USE_DB_AUTHENTICATION === "true") { - const supabaseData = await supabaseGetJobById(job.id); - - if (supabaseData) { - job.returnvalue = supabaseData.docs; - } - } - - return job; - }))).sort((a, b) => a.timestamp - b.timestamp); + const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp); const jobStatuses = await Promise.all(jobs.map(x => x.getState())); const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active"; From 47123be783582dd977ef30edd252da62adc3676b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 16 Aug 2024 22:01:56 -0400 Subject: [PATCH 04/65] Nick: weird activity block --- apps/api/src/scraper/WebScraper/utils/blocklist.ts | 1 + apps/api/src/services/queue-worker.ts | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 0bdf9876..fd3c9ad1 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -15,6 +15,7 @@ const socialMediaBlocklist = [ 'whatsapp.com', 'wechat.com', 'telegram.org', + 'researchhub.com' ]; const allowedKeywords = [ diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index c15201be..890e6e7b 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -173,9 +173,14 @@ async function processJob(job: Job, token: string) { if (!job.data.sitemapped) { if (!sc.cancelled) { const crawler = crawlToCrawler(job.data.crawl_id, sc); - - const links = crawler.filterLinks((data.docs[0].linksOnPage ?? []) - .map(href => crawler.filterURL(href.trim(), sc.originUrl)) + let linksOnPage = []; + try{ + linksOnPage = data.docs[0]?.linksOnPage ?? []; + }catch(e){ + linksOnPage = [] + } + const links = crawler.filterLinks( + linksOnPage.map(href => crawler.filterURL(href.trim(), sc.originUrl)) .filter(x => x !== null), Infinity, sc.crawlerOptions?.maxDepth ?? 10 From f7973801129e01de3c2d0558605ab712191ade25 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 16 Aug 2024 22:17:38 -0400 Subject: [PATCH 05/65] Nick: --- apps/api/src/services/alerts/index.ts | 2 +- apps/api/src/services/queue-worker.ts | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/apps/api/src/services/alerts/index.ts b/apps/api/src/services/alerts/index.ts index f5e38d14..cb953e2e 100644 --- a/apps/api/src/services/alerts/index.ts +++ b/apps/api/src/services/alerts/index.ts @@ -49,7 +49,7 @@ export async function checkAlerts() { }; const checkAll = async () => { - // await checkActiveJobs(); + await checkActiveJobs(); await checkWaitingQueue(); }; diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 890e6e7b..bcf4a2b3 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -115,6 +115,20 @@ workerFun(scrapeQueueName, processJobInternal); async function processJob(job: Job, token: string) { Logger.info(`🐂 Worker taking job ${job.id}`); + // Check if the job URL is researchhub and block it immediately + // TODO: remove this once solve the root issue + if (job.data.url && job.data.url.includes("researchhub.com")) { + Logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`); + const data = { + success: false, + docs: [], + project_id: job.data.project_id, + error: "URL is blocked: researchhub.com", + }; + await job.moveToCompleted(data.docs, token, false); + return data; + } + try { job.updateProgress({ current: 1, From 3fe82b4f12277acb11e8efcac0cebcae82a01aa6 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 17 Aug 2024 03:09:31 -0400 Subject: [PATCH 06/65] Update queue-worker.ts --- apps/api/src/services/queue-worker.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index bcf4a2b3..f819242e 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -117,13 +117,13 @@ async function processJob(job: Job, token: string) { // Check if the job URL is researchhub and block it immediately // TODO: remove this once solve the root issue - if (job.data.url && job.data.url.includes("researchhub.com")) { + if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com"))) { Logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`); const data = { success: false, docs: [], project_id: job.data.project_id, - error: "URL is blocked: researchhub.com", + error: "URL is blocked. Please contact hello@firecrawl.com if you believe this is an error.", }; await job.moveToCompleted(data.docs, token, false); return data; @@ -137,6 +137,7 @@ async function processJob(job: Job, token: string) { current_url: "", }); const start = Date.now(); + const { success, message, docs } = await startWebScraperPipeline({ job, token, From b8170aaa47c642e6249b4bdaddc40dafab7f5256 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 19 Aug 2024 08:51:48 -0300 Subject: [PATCH 07/65] Update blocklist.ts --- apps/api/src/scraper/WebScraper/utils/blocklist.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index fd3c9ad1..7f1602e1 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -15,7 +15,8 @@ const socialMediaBlocklist = [ 'whatsapp.com', 'wechat.com', 'telegram.org', - 'researchhub.com' + 'researchhub.com', + 'youtube.com' ]; const allowedKeywords = [ From 4ffc60596ac028fe9b7d25d9b3ba6589a78b746b Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 19 Aug 2024 09:29:23 -0300 Subject: [PATCH 08/65] Update queue-worker.ts --- apps/api/src/services/queue-worker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index f819242e..0d24387a 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -117,7 +117,7 @@ async function processJob(job: Job, token: string) { // Check if the job URL is researchhub and block it immediately // TODO: remove this once solve the root issue - if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com"))) { + if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com") || job.data.url.includes("youtube.com"))) { Logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`); const data = { success: false, From 36b35dbc67ee82ebe3e2fb00a841c3ae1ed28360 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 19 Aug 2024 11:01:26 -0300 Subject: [PATCH 09/65] Update crawl.ts --- apps/api/src/controllers/crawl.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 54eb1f40..a80ac10f 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -38,8 +38,14 @@ export async function crawlController(req: Request, res: Response) { } } + const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions }; + const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions }; + + + const limitCheck = crawlerOptions?.limit ?? 1; const { success: creditsCheckSuccess, message: creditsCheckMessage } = - await checkTeamCredits(team_id, 1); + await checkTeamCredits(team_id, limitCheck); + if (!creditsCheckSuccess) { return res.status(402).json({ error: "Insufficient credits" }); } @@ -67,8 +73,6 @@ export async function crawlController(req: Request, res: Response) { const mode = req.body.mode ?? "crawl"; - const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions }; - const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions }; // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this? // try { From 8e4ca8646347c7a44e2cf04479bab564db013289 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 19 Aug 2024 11:02:24 -0300 Subject: [PATCH 10/65] Update crawl.ts --- apps/api/src/controllers/crawl.ts | 78 ++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index a80ac10f..bf402d38 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -7,10 +7,22 @@ import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; import { logCrawl } from "../../src/services/logging/crawl_log"; import { validateIdempotencyKey } from "../../src/services/idempotency/validate"; import { createIdempotencyKey } from "../../src/services/idempotency/create"; -import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../src/lib/default-values"; +import { + defaultCrawlPageOptions, + defaultCrawlerOptions, + defaultOrigin, +} from "../../src/lib/default-values"; import { v4 as uuidv4 } from "uuid"; import { Logger } from "../../src/lib/logger"; -import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis"; +import { + addCrawlJob, + addCrawlJobs, + crawlToCrawler, + lockURL, + lockURLs, + saveCrawl, + StoredCrawl, +} from "../../src/lib/crawl-redis"; import { getScrapeQueue } from "../../src/services/queue-service"; import { checkAndUpdateURL } from "../../src/lib/validateUrl"; @@ -38,10 +50,12 @@ export async function crawlController(req: Request, res: Response) { } } - const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions }; + const crawlerOptions = { + ...defaultCrawlerOptions, + ...req.body.crawlerOptions, + }; const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions }; - const limitCheck = crawlerOptions?.limit ?? 1; const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, limitCheck); @@ -63,17 +77,14 @@ export async function crawlController(req: Request, res: Response) { } if (isUrlBlocked(url)) { - return res - .status(403) - .json({ - error: - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", - }); + return res.status(403).json({ + error: + "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + }); } const mode = req.body.mode ?? "crawl"; - // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this? // try { // const a = new WebScraperDataProvider(); @@ -123,10 +134,12 @@ export async function crawlController(req: Request, res: Response) { await saveCrawl(id, sc); - const sitemap = sc.crawlerOptions?.ignoreSitemap ? null : await crawler.tryGetSitemap(); + const sitemap = sc.crawlerOptions?.ignoreSitemap + ? null + : await crawler.tryGetSitemap(); if (sitemap !== null) { - const jobs = sitemap.map(x => { + const jobs = sitemap.map((x) => { const url = x.url; const uuid = uuidv4(); return { @@ -144,26 +157,35 @@ export async function crawlController(req: Request, res: Response) { opts: { jobId: uuid, priority: 20, - } + }, }; - }) + }); - await lockURLs(id, jobs.map(x => x.data.url)); - await addCrawlJobs(id, jobs.map(x => x.opts.jobId)); + await lockURLs( + id, + jobs.map((x) => x.data.url) + ); + await addCrawlJobs( + id, + jobs.map((x) => x.opts.jobId) + ); await getScrapeQueue().addBulk(jobs); } else { await lockURL(id, sc, url); - const job = await addScrapeJob({ - url, - mode: "single_urls", - crawlerOptions: crawlerOptions, - team_id: team_id, - pageOptions: pageOptions, - origin: req.body.origin ?? defaultOrigin, - crawl_id: id, - }, { - priority: 15, // prioritize request 0 of crawl jobs same as scrape jobs - }); + const job = await addScrapeJob( + { + url, + mode: "single_urls", + crawlerOptions: crawlerOptions, + team_id: team_id, + pageOptions: pageOptions, + origin: req.body.origin ?? defaultOrigin, + crawl_id: id, + }, + { + priority: 15, // prioritize request 0 of crawl jobs same as scrape jobs + } + ); await addCrawlJob(id, job.id); } From ecd472356b1348cc96bad434ecba80de0ce5ab79 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:41:54 -0300 Subject: [PATCH 11/65] added variables to beta customers --- apps/api/src/lib/entities.ts | 5 +++ apps/api/src/scraper/WebScraper/index.ts | 2 ++ .../scraper/WebScraper/scrapers/fireEngine.ts | 32 +++++++++++++++---- apps/api/src/scraper/WebScraper/single_url.ts | 5 ++- 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 92dd4c7c..d833bda0 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -24,6 +24,9 @@ export type PageOptions = { parsePDF?: boolean; removeTags?: string | string[]; onlyIncludeTags?: string | string[]; + useFastMode?: boolean; // beta + disableJSDom?: boolean; // beta + atsv?: boolean; // beta }; export type ExtractorOptions = { @@ -66,6 +69,7 @@ export type WebScraperOptions = { concurrentRequests?: number; bullJobId?: string; priority?: number; + teamId?: string; }; export interface DocumentUrl { @@ -142,4 +146,5 @@ export interface FireEngineOptions{ blockMedia?: boolean; blockAds?: boolean; disableJsDom?: boolean; + atsv?: boolean; // beta } diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 98f07ae5..e0bac57c 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -45,6 +45,7 @@ export class WebScraperDataProvider { private allowBackwardCrawling: boolean = false; private allowExternalContentLinks: boolean = false; private priority?: number; + private teamId?: string; authorize(): void { throw new Error("Method not implemented."); @@ -596,6 +597,7 @@ export class WebScraperDataProvider { this.allowExternalContentLinks = options.crawlerOptions?.allowExternalContentLinks ?? false; this.priority = options.priority; + this.teamId = options.teamId ?? null; // make sure all urls start with https:// this.urls = this.urls.map((url) => { diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 77697411..e427f582 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -22,21 +22,23 @@ export async function scrapWithFireEngine({ waitFor = 0, screenshot = false, fullPageScreenshot = false, - pageOptions = { parsePDF: true }, + pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false }, fireEngineOptions = {}, headers, options, priority, + teamId, }: { url: string; waitFor?: number; screenshot?: boolean; fullPageScreenshot?: boolean; - pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean }; + pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean }; fireEngineOptions?: FireEngineOptions; headers?: Record; options?: any; priority?: number; + teamId?: string; }): Promise { const logParams = { url, @@ -51,11 +53,11 @@ export async function scrapWithFireEngine({ try { const reqParams = await generateRequestParams(url); - const waitParam = reqParams["params"]?.wait ?? waitFor; - const engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright"; - const screenshotParam = reqParams["params"]?.screenshot ?? screenshot; - const fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot; - const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions; + let waitParam = reqParams["params"]?.wait ?? waitFor; + let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright"; + let screenshotParam = reqParams["params"]?.screenshot ?? screenshot; + let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot; + let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions; let endpoint = "/scrape"; @@ -70,6 +72,20 @@ export async function scrapWithFireEngine({ `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, fullPageScreenshot: ${fullPageScreenshot}, method: ${fireEngineOptionsParam?.method ?? "null"} }` ); + if (pageOptions?.useFastMode) { + console.log('using tlsclient') + fireEngineOptionsParam.engine = "tlsclient"; + engine = "tlsclient"; + } + + // atsv is only available for beta customers + const betaCustomersString = process.env.BETA_CUSTOMERS; + const betaCustomers = betaCustomersString ? betaCustomersString.split(",") : []; + if (pageOptions?.atsv && betaCustomers.includes(teamId)) { + fireEngineOptionsParam.atsv = true; + } else { + pageOptions.atsv = false; + } const response = await axios.post( process.env.FIRE_ENGINE_BETA_URL + endpoint, @@ -80,7 +96,9 @@ export async function scrapWithFireEngine({ fullPageScreenshot: fullPageScreenshotParam, headers: headers, pageOptions: pageOptions, + disableJsDom: pageOptions?.disableJsDom ?? false, priority, + engine, ...fireEngineOptionsParam, }, { diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index df9d04ab..1f2a62de 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -136,6 +136,7 @@ export async function scrapSingleUrl( }, existingHtml: string = "", priority?: number, + teamId?: string ): Promise { urlToScrap = urlToScrap.trim(); @@ -164,7 +165,7 @@ export async function scrapSingleUrl( case "fire-engine;chrome-cdp": let engine: "playwright" | "chrome-cdp" | "tlsclient" = "playwright"; - if(method === "fire-engine;chrome-cdp"){ + if (method === "fire-engine;chrome-cdp") { engine = "chrome-cdp"; } @@ -178,8 +179,10 @@ export async function scrapSingleUrl( headers: pageOptions.headers, fireEngineOptions: { engine: engine, + atsv: pageOptions.atsv, }, priority, + teamId, }); scraperResponse.text = response.html; scraperResponse.screenshot = response.screenshot; From e1c9cbf70906213cbacd3dd5e6665be74017e78f Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 20 Aug 2024 09:11:58 -0300 Subject: [PATCH 12/65] bug fixed. crawl should not stop if sitemap url is invalid --- apps/api/src/scraper/WebScraper/crawler.ts | 8 +++++++- apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 79e4bf18..af3a9d69 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -69,7 +69,13 @@ export class WebCrawler { public filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] { return sitemapLinks .filter((link) => { - const url = new URL(link.trim(), this.baseUrl); + let url: URL; + try { + url = new URL(link.trim(), this.baseUrl); + } catch (error) { + Logger.debug(`Error processing link: ${link} | Error: ${error.message}`); + return false; + } const path = url.pathname; const depth = getURLDepth(url.toString()); diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index e427f582..7c24fab4 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -73,7 +73,6 @@ export async function scrapWithFireEngine({ ); if (pageOptions?.useFastMode) { - console.log('using tlsclient') fireEngineOptionsParam.engine = "tlsclient"; engine = "tlsclient"; } From 5abd26a2677e5ed4eb63323a440fa75405da1eaf Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 20 Aug 2024 14:16:54 -0300 Subject: [PATCH 13/65] Nick: set the crawl limit to the remaining credits --- apps/api/src/controllers/crawl.ts | 9 ++++++--- apps/api/src/lib/default-values.ts | 3 ++- apps/api/src/services/billing/credit_billing.ts | 12 +++++++----- apps/api/src/services/queue-worker.ts | 4 ++-- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index bf402d38..1dfe758f 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -56,14 +56,17 @@ export async function crawlController(req: Request, res: Response) { }; const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions }; - const limitCheck = crawlerOptions?.limit ?? 1; - const { success: creditsCheckSuccess, message: creditsCheckMessage } = + const limitCheck = req.body?.crawlerOptions?.limit ?? 1; + const { success: creditsCheckSuccess, message: creditsCheckMessage, remainingCredits } = await checkTeamCredits(team_id, limitCheck); if (!creditsCheckSuccess) { - return res.status(402).json({ error: "Insufficient credits" }); + return res.status(402).json({ error: "Insufficient credits. You may be requesting with a higher limit than the amount of credits you have left. If not, upgrade your plan at https://firecrawl.dev/pricing or contact us at hello@firecrawl.com" }); } + // TODO: need to do this to v1 + crawlerOptions.limit = Math.min(remainingCredits, crawlerOptions.limit); + let url = req.body.url; if (!url) { return res.status(400).json({ error: "Url is required" }); diff --git a/apps/api/src/lib/default-values.ts b/apps/api/src/lib/default-values.ts index 152f47d7..0b469ee2 100644 --- a/apps/api/src/lib/default-values.ts +++ b/apps/api/src/lib/default-values.ts @@ -12,7 +12,8 @@ export const defaultPageOptions = { }; export const defaultCrawlerOptions = { - allowBackwardCrawling: false + allowBackwardCrawling: false, + limit: 10000 } export const defaultCrawlPageOptions = { diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index d25289b2..2ad07318 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -168,10 +168,11 @@ export async function supaBillTeam(team_id: string, credits: number) { export async function checkTeamCredits(team_id: string, credits: number) { return withAuth(supaCheckTeamCredits)(team_id, credits); } + // if team has enough credits for the operation, return true, else return false export async function supaCheckTeamCredits(team_id: string, credits: number) { if (team_id === "preview") { - return { success: true, message: "Preview team, no credits used" }; + return { success: true, message: "Preview team, no credits used", remainingCredits: Infinity }; } // Retrieve the team's active subscription and check for available coupons concurrently @@ -202,7 +203,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { if (subscriptionError || !subscription) { // If there is no active subscription but there are available coupons if (couponCredits >= credits) { - return { success: true, message: "Sufficient credits available" }; + return { success: true, message: "Sufficient credits available", remainingCredits: couponCredits }; } const { data: creditUsages, error: creditUsageError } = @@ -252,9 +253,10 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { return { success: false, message: "Insufficient credits, please upgrade!", + remainingCredits: FREE_CREDITS - totalCreditsUsed }; } - return { success: true, message: "Sufficient credits available" }; + return { success: true, message: "Sufficient credits available", remainingCredits: FREE_CREDITS - totalCreditsUsed }; } let totalCreditsUsed = 0; @@ -321,7 +323,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { subscription.current_period_start, subscription.current_period_end ); - return { success: false, message: "Insufficient credits, please upgrade!" }; + return { success: false, message: "Insufficient credits, please upgrade!", remainingCredits: creditLimit - adjustedCreditsUsed }; } else if (creditUsagePercentage >= 0.8) { // Send email notification for approaching credit limit await sendNotification( @@ -332,7 +334,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { ); } - return { success: true, message: "Sufficient credits available" }; + return { success: true, message: "Sufficient credits available", remainingCredits: creditLimit - adjustedCreditsUsed }; } // Count the total credits used by a team within the current billing period and return the remaining credits. diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 0d24387a..6e848ce6 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -117,13 +117,13 @@ async function processJob(job: Job, token: string) { // Check if the job URL is researchhub and block it immediately // TODO: remove this once solve the root issue - if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com") || job.data.url.includes("youtube.com"))) { + if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com") || job.data.url.includes("youtube.com") || job.data.url.includes("microsoft.com"))) { Logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`); const data = { success: false, docs: [], project_id: job.data.project_id, - error: "URL is blocked. Please contact hello@firecrawl.com if you believe this is an error.", + error: "URL is blocked. Suspecious activity detected. Please contact hello@firecrawl.com if you believe this is an error.", }; await job.moveToCompleted(data.docs, token, false); return data; From e326249a571d30b1fde4df961e465ec61d04ba20 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 20 Aug 2024 14:26:42 -0300 Subject: [PATCH 14/65] added check job and cancel to fire-engine requests --- .../scraper/WebScraper/scrapers/fireEngine.ts | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 7c24fab4..9c52b9e1 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -86,7 +86,12 @@ export async function scrapWithFireEngine({ pageOptions.atsv = false; } - const response = await axios.post( + const axiosInstance = axios.create({ + headers: { "Content-Type": "application/json" } + }); + + const startTime = Date.now(); + const response = await axiosInstance.post( process.env.FIRE_ENGINE_BETA_URL + endpoint, { url: url, @@ -98,16 +103,31 @@ export async function scrapWithFireEngine({ disableJsDom: pageOptions?.disableJsDom ?? false, priority, engine, + instantReturn: true, ...fireEngineOptionsParam, }, { headers: { "Content-Type": "application/json", - }, - timeout: universalTimeout + waitParam, + } } ); + let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${response.data.jobId}`); + while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) { + await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second + checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${response.data.jobId}`); + } + + if (checkStatusResponse.data.processing) { + axiosInstance.delete( + process.env.FIRE_ENGINE_BETA_URL + `/scrape/${response.data.jobId}`, + ); + Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`); + logParams.error_message = "Request timed out"; + return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; + } + if (response.status !== 200) { Logger.debug( `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` From efb91f9ca5e0ea2b01df7e5a4516ebb4068a164e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Tue, 20 Aug 2024 20:29:08 +0200 Subject: [PATCH 15/65] fix(search): delete jobs after done --- apps/api/src/controllers/search.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 873922c4..45105bb4 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -103,6 +103,8 @@ export async function searchHelper( return { success: true, error: "No search results found", returnCode: 200 }; } + await Promise.all(jobs.map(x => x.remove())); + // make sure doc.content is not empty const filteredDocs = docs.filter( (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 From 1f27182a139cdd6ccef33980ef265a0d56f98fe3 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 20 Aug 2024 15:42:39 -0300 Subject: [PATCH 16/65] added try catch --- .../src/scraper/WebScraper/scrapers/fireEngine.ts | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 9c52b9e1..10be4a1d 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -120,9 +120,17 @@ export async function scrapWithFireEngine({ } if (checkStatusResponse.data.processing) { - axiosInstance.delete( - process.env.FIRE_ENGINE_BETA_URL + `/scrape/${response.data.jobId}`, - ); + Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${response.data.jobId}`); + try { + axiosInstance.delete( + process.env.FIRE_ENGINE_BETA_URL + `/scrape/${response.data.jobId}`, + ); + } catch (error) { + Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${response.data.jobId} | error: ${error}`); + logParams.error_message = "Failed to delete request"; + return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; + } + Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`); logParams.error_message = "Request timed out"; return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; From f494d2b707d40b690ae41611d17f77f683570fc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Tue, 20 Aug 2024 21:00:17 +0200 Subject: [PATCH 17/65] prioritize search lower --- apps/api/src/controllers/search.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 45105bb4..759c7805 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -90,7 +90,7 @@ export async function searchHelper( }, opts: { jobId: uuid, - priority: 10, + priority: 20, } }; }) From f98be7d94e55a54a537cc060f63570da177c1b11 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 20 Aug 2024 16:53:01 -0300 Subject: [PATCH 18/65] Update fireEngine.ts --- .../scraper/WebScraper/scrapers/fireEngine.ts | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 10be4a1d..574f1944 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -136,27 +136,29 @@ export async function scrapWithFireEngine({ return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; } - if (response.status !== 200) { + if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) { Logger.debug( - `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` + `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}` ); - logParams.error_message = response.data?.pageError; - logParams.response_code = response.data?.pageStatusCode; + logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error; + logParams.response_code = checkStatusResponse.data?.pageStatusCode; - if(response.data && response.data?.pageStatusCode !== 200) { + if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) { Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`); } + const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined; + return { html: "", screenshot: "", - pageStatusCode: response.data?.pageStatusCode, - pageError: response.data?.pageError, + pageStatusCode, + pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error, }; } - const contentType = response.headers["content-type"]; + const contentType = checkStatusResponse.headers["content-type"]; if (contentType && contentType.includes("application/pdf")) { const { content, pageStatusCode, pageError } = await fetchAndProcessPdf( url, @@ -167,18 +169,18 @@ export async function scrapWithFireEngine({ logParams.error_message = pageError; return { html: content, screenshot: "", pageStatusCode, pageError }; } else { - const data = response.data; + const data = checkStatusResponse.data; logParams.success = (data.pageStatusCode >= 200 && data.pageStatusCode < 300) || data.pageStatusCode === 404; logParams.html = data.content ?? ""; logParams.response_code = data.pageStatusCode; - logParams.error_message = data.pageError; + logParams.error_message = data.pageError ?? data.error; return { html: data.content ?? "", screenshot: data.screenshot ?? "", pageStatusCode: data.pageStatusCode, - pageError: data.pageError, + pageError: data.pageError ?? data.error, }; } } catch (error) { From ffe11a5bf73e3c57657972cd36c3af1d0b9a432c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 20 Aug 2024 18:16:11 -0300 Subject: [PATCH 19/65] Revert "Merge pull request #561 from mendableai/bug/dealing-with-dns-error" This reverts commit 2030ec603109d6ce8786a011d431bc5c83917f1b, reversing changes made to f494d2b707d40b690ae41611d17f77f683570fc2. --- .../scraper/WebScraper/scrapers/fireEngine.ts | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 574f1944..10be4a1d 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -136,29 +136,27 @@ export async function scrapWithFireEngine({ return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; } - if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) { + if (response.status !== 200) { Logger.debug( - `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}` + `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` ); - logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error; - logParams.response_code = checkStatusResponse.data?.pageStatusCode; + logParams.error_message = response.data?.pageError; + logParams.response_code = response.data?.pageStatusCode; - if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) { + if(response.data && response.data?.pageStatusCode !== 200) { Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`); } - const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined; - return { html: "", screenshot: "", - pageStatusCode, - pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error, + pageStatusCode: response.data?.pageStatusCode, + pageError: response.data?.pageError, }; } - const contentType = checkStatusResponse.headers["content-type"]; + const contentType = response.headers["content-type"]; if (contentType && contentType.includes("application/pdf")) { const { content, pageStatusCode, pageError } = await fetchAndProcessPdf( url, @@ -169,18 +167,18 @@ export async function scrapWithFireEngine({ logParams.error_message = pageError; return { html: content, screenshot: "", pageStatusCode, pageError }; } else { - const data = checkStatusResponse.data; + const data = response.data; logParams.success = (data.pageStatusCode >= 200 && data.pageStatusCode < 300) || data.pageStatusCode === 404; logParams.html = data.content ?? ""; logParams.response_code = data.pageStatusCode; - logParams.error_message = data.pageError ?? data.error; + logParams.error_message = data.pageError; return { html: data.content ?? "", screenshot: data.screenshot ?? "", pageStatusCode: data.pageStatusCode, - pageError: data.pageError ?? data.error, + pageError: data.pageError, }; } } catch (error) { From 441628998f8d853944bc264e4c4f5c3d83ba93be Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 20 Aug 2024 19:16:48 -0300 Subject: [PATCH 20/65] Reapply "Merge pull request #561 from mendableai/bug/dealing-with-dns-error" This reverts commit ffe11a5bf73e3c57657972cd36c3af1d0b9a432c. --- .../scraper/WebScraper/scrapers/fireEngine.ts | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 10be4a1d..574f1944 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -136,27 +136,29 @@ export async function scrapWithFireEngine({ return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; } - if (response.status !== 200) { + if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) { Logger.debug( - `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` + `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}` ); - logParams.error_message = response.data?.pageError; - logParams.response_code = response.data?.pageStatusCode; + logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error; + logParams.response_code = checkStatusResponse.data?.pageStatusCode; - if(response.data && response.data?.pageStatusCode !== 200) { + if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) { Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`); } + const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined; + return { html: "", screenshot: "", - pageStatusCode: response.data?.pageStatusCode, - pageError: response.data?.pageError, + pageStatusCode, + pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error, }; } - const contentType = response.headers["content-type"]; + const contentType = checkStatusResponse.headers["content-type"]; if (contentType && contentType.includes("application/pdf")) { const { content, pageStatusCode, pageError } = await fetchAndProcessPdf( url, @@ -167,18 +169,18 @@ export async function scrapWithFireEngine({ logParams.error_message = pageError; return { html: content, screenshot: "", pageStatusCode, pageError }; } else { - const data = response.data; + const data = checkStatusResponse.data; logParams.success = (data.pageStatusCode >= 200 && data.pageStatusCode < 300) || data.pageStatusCode === 404; logParams.html = data.content ?? ""; logParams.response_code = data.pageStatusCode; - logParams.error_message = data.pageError; + logParams.error_message = data.pageError ?? data.error; return { html: data.content ?? "", screenshot: data.screenshot ?? "", pageStatusCode: data.pageStatusCode, - pageError: data.pageError, + pageError: data.pageError ?? data.error, }; } } catch (error) { From 1b3ad60a2c2d3e7881209ab175b233f4404f8c3f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 20 Aug 2024 19:22:09 -0300 Subject: [PATCH 21/65] Reapply "Merge pull request #561 from mendableai/bug/dealing-with-dns-error" This reverts commit ffe11a5bf73e3c57657972cd36c3af1d0b9a432c. --- .../scraper/WebScraper/scrapers/fireEngine.ts | 17 +++++++++-------- .../scraper/WebScraper/scrapers/scrapingBee.ts | 3 +++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 574f1944..17b65a90 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -91,7 +91,7 @@ export async function scrapWithFireEngine({ }); const startTime = Date.now(); - const response = await axiosInstance.post( + const _response = await axiosInstance.post( process.env.FIRE_ENGINE_BETA_URL + endpoint, { url: url, @@ -113,20 +113,20 @@ export async function scrapWithFireEngine({ } ); - let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${response.data.jobId}`); + let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`); while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) { await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second - checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${response.data.jobId}`); + checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`); } if (checkStatusResponse.data.processing) { - Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${response.data.jobId}`); + Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`); try { axiosInstance.delete( - process.env.FIRE_ENGINE_BETA_URL + `/scrape/${response.data.jobId}`, + process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, ); } catch (error) { - Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${response.data.jobId} | error: ${error}`); + Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`); logParams.error_message = "Failed to delete request"; return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; } @@ -145,7 +145,7 @@ export async function scrapWithFireEngine({ logParams.response_code = checkStatusResponse.data?.pageStatusCode; if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) { - Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`); + Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.data?.pageStatusCode}`); } const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined; @@ -158,7 +158,7 @@ export async function scrapWithFireEngine({ }; } - const contentType = checkStatusResponse.headers["content-type"]; + const contentType = checkStatusResponse.data.responseHeaders["content-type"]; if (contentType && contentType.includes("application/pdf")) { const { content, pageStatusCode, pageError } = await fetchAndProcessPdf( url, @@ -170,6 +170,7 @@ export async function scrapWithFireEngine({ return { html: content, screenshot: "", pageStatusCode, pageError }; } else { const data = checkStatusResponse.data; + logParams.success = (data.pageStatusCode >= 200 && data.pageStatusCode < 300) || data.pageStatusCode === 404; diff --git a/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts b/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts index 554bfe22..b72fa8b2 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts @@ -43,6 +43,9 @@ export async function scrapWithScrapingBee( transparent_status_code: "True", }, }); + Logger.info( + `⛏️ ScrapingBee: Scraping ${url}` + ); const contentType = response.headers["content-type"]; if (contentType && contentType.includes("application/pdf")) { logParams.success = true; From 819ad50af3a567febb9f6d4a5a318a9e1f6d6072 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 20 Aug 2024 21:16:33 -0300 Subject: [PATCH 22/65] Update fireEngine.ts --- apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 17b65a90..3dd5f9d6 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -158,7 +158,8 @@ export async function scrapWithFireEngine({ }; } - const contentType = checkStatusResponse.data.responseHeaders["content-type"]; + const contentType = checkStatusResponse.data.responseHeaders?.["content-type"]; + if (contentType && contentType.includes("application/pdf")) { const { content, pageStatusCode, pageError } = await fetchAndProcessPdf( url, From 90b32f16c8e519cd1090b55c69c7f6100ad419de Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 20 Aug 2024 21:38:11 -0300 Subject: [PATCH 23/65] Nick: fixes --- apps/api/src/main/runWebScraper.ts | 1 + apps/api/src/scraper/WebScraper/index.ts | 1 + apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts | 1 + 3 files changed, 3 insertions(+) diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 77d392f5..2be05bd5 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -87,6 +87,7 @@ export async function runWebScraper({ crawlerOptions: crawlerOptions, pageOptions: pageOptions, priority, + teamId: team_id }); } const docs = (await provider.getDocuments(false, (progress: Progress) => { diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index e0bac57c..65247df1 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -76,6 +76,7 @@ export class WebScraperDataProvider { this.extractorOptions, existingHTML, this.priority, + this.teamId, ); processedUrls++; if (inProgress) { diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 3dd5f9d6..67ee018c 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -80,6 +80,7 @@ export async function scrapWithFireEngine({ // atsv is only available for beta customers const betaCustomersString = process.env.BETA_CUSTOMERS; const betaCustomers = betaCustomersString ? betaCustomersString.split(",") : []; + if (pageOptions?.atsv && betaCustomers.includes(teamId)) { fireEngineOptionsParam.atsv = true; } else { From 5e48bec1fd509b9daa17a68b9d84cf54fed85f8c Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 21 Aug 2024 08:10:46 -0300 Subject: [PATCH 24/65] commenting out delete, crashing on fire-engine --- apps/api/requests.http | 114 ++++++++++++++++-- .../scraper/WebScraper/scrapers/fireEngine.ts | 6 +- 2 files changed, 104 insertions(+), 16 deletions(-) diff --git a/apps/api/requests.http b/apps/api/requests.http index 3a1a9902..f43e4de6 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -1,15 +1,96 @@ +### fc-a12ee91b42d243f5990a2c821be0a978 +### fc-e7e08faf6b864bd9b9b70c693a01ccae ### Crawl Website -POST http://localhost:3002/v0/scrape HTTP/1.1 -Authorization: Bearer fc +POST http://localhost:3002/v1/crawl HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae content-type: application/json { - "url":"firecrawl.dev" + "url": "https://roastmywebsite.ai", + "excludePaths": ["blog/*"], + "includePaths": ["/"], + "maxDepth": 2, + "ignoreSitemap": true, + "limit": 10, + "allowBackwardLinks": true, + "allowExternalLinks": true, + "scrapeOptions": { + "formats": ["markdown", "html", "rawHtml", "screenshot", "links"], + "headers": { "x-key": "test" }, + "includeTags": ["h1"], + "excludeTags": ["h2"], + "onlyMainContent": true, + "waitFor": 1000 + } +} + +### +POST http://localhost:3002/v1/scrape HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae +content-type: application/json + +{ + "url": "https://roastmywebsite.ai", + "formats": ["markdown", "html", "rawHtml", "screenshot", "links"], + "headers": { "x-key": "test" }, + "includeTags": ["h1"], + "excludeTags": ["h2"], + "onlyMainContent": true +} + +### +POST https://firescraper.fly.dev/scrape HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae +content-type: application/json + +{ + "url": "https://ajskhdioauhsdoas.com", + "instantReturn": true } +### +POST https://api.firecrawl.dev/v0/crawl HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae +content-type: application/json + +{ + "url": "https://pytorch.org/docs/stable", + "crawlerOptions": { + "limit": 100, + "returnOnlyUrls": true, + "ignoreSitemap": true, + "allowBackwardCrawling": true + } +} + +### +GET http://localhost:3002/v1/crawl/eff7095b-97b6-4944-b6da-7e7551396a38 HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae + +### 1644d4b6-7fbd-4ce5-b55d-46b3c5e92526 +### ef3714c7-e095-416d-a4b0-69611af0b00f + +### + + +POST http://localhost:3002/v0/crawl HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae +content-type: application/json + +{ + "url": "https://docs.usepylon.com/getting-started/introduction", + "crawlerOptions": { + "limit": 100 + } +} + ### Check Job Status -GET http://localhost:3002/v0/jobs/active HTTP/1.1 +GET http://localhost:3002/v0/crawl/status/645ff228-4a83-4504-8b1f-8879ca56fe19 HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae + +### Check Job Status +GET https://api.firecrawl.dev/v0/jobs/active HTTP/1.1 ### Scrape Website @@ -40,28 +121,35 @@ content-type: application/json ### Scrape Website -POST http://localhost:3002/v0/scrape HTTP/1.1 -Authorization: Bearer +POST http://localhost:3002/v0/crawl HTTP/1.1 +# Authorization: Bearer fc-*** content-type: application/json { - "url":"https://mendable.ai" + "url":"example.com" } - +# 5bc71216-e1ac-4bb1-9d9f-59785a92bb02 ### Check Job Status -GET http://localhost:3002/v0/crawl/status/a6053912-d602-4709-841f-3d2cb46fea0a HTTP/1.1 -Authorization: Bearer +GET http://localhost:3002/v0/crawl/status/478fec7c-ae43-4fb7-912d-c006e17e7024 HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae + +### +DELETE https://api.firecrawl.dev/v0/crawl/cancel/b749f45e-c1c9-4140-b596-87eaa0457b0b HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae ### Get Job Result -POST https://api.firecrawl.dev/v0/crawl HTTP/1.1 -Authorization: Bearer +POST http://localhost:3002/v0/crawl HTTP/1.1 +Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae content-type: application/json { - "url":"https://mendable.ai" + "url":"https://mendable.ai", + "crawlerOptions": { + "ignoreSitemap": true + } } ### Check Job Status diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 67ee018c..f9b6a330 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -123,9 +123,9 @@ export async function scrapWithFireEngine({ if (checkStatusResponse.data.processing) { Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`); try { - axiosInstance.delete( - process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, - ); + // axiosInstance.delete( + // process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, + // ); } catch (error) { Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`); logParams.error_message = "Failed to delete request"; From 138437d616c342762dc6926a502a91d307338ec0 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 21 Aug 2024 08:11:24 -0300 Subject: [PATCH 25/65] commenting out delete, crashing on fire-engine --- apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index f9b6a330..408cad29 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -121,8 +121,8 @@ export async function scrapWithFireEngine({ } if (checkStatusResponse.data.processing) { - Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`); try { + Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`); // axiosInstance.delete( // process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, // ); From b66553867e8a0cafb5fe7ee7a82a22bec0916489 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 21 Aug 2024 09:28:20 -0300 Subject: [PATCH 26/65] reverting delete, fixed express bug on checkCredits --- apps/api/src/controllers/auth.ts | 2 +- apps/api/src/controllers/scrape.ts | 24 ++++++++----------- .../scraper/WebScraper/scrapers/fireEngine.ts | 6 ++--- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index cb9cacde..9d46d005 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -152,7 +152,7 @@ export async function supaAuthenticateUser( ); } } catch (error) { - Logger.error(`Error with auth function: ${error.message}`); + Logger.error(`Error with auth function: ${error}`); // const { // success, // teamId: tId, diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 3d568790..273b4c56 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -108,22 +108,18 @@ export async function scrapeController(req: Request, res: Response) { timeout = req.body.timeout ?? 90000; } - const checkCredits = async () => { - try { - const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1); - if (!creditsCheckSuccess) { - earlyReturn = true; - return res.status(402).json({ error: "Insufficient credits" }); - } - } catch (error) { - Logger.error(error); + // checkCredits + try { + const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { earlyReturn = true; - return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." }); + return res.status(402).json({ error: "Insufficient credits" }); } - }; - - - await checkCredits(); + } catch (error) { + Logger.error(error); + earlyReturn = true; + return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." }); + } const jobId = uuidv4(); diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 408cad29..6f97d96f 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -123,9 +123,9 @@ export async function scrapWithFireEngine({ if (checkStatusResponse.data.processing) { try { Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`); - // axiosInstance.delete( - // process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, - // ); + axiosInstance.delete( + process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, + ); } catch (error) { Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`); logParams.error_message = "Failed to delete request"; From db8c84ff0f59bd1f50bf337f6688cb33f396a2d6 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 21 Aug 2024 10:19:37 -0300 Subject: [PATCH 27/65] Update requests.http --- apps/api/requests.http | 114 +++++------------------------------------ 1 file changed, 13 insertions(+), 101 deletions(-) diff --git a/apps/api/requests.http b/apps/api/requests.http index f43e4de6..3a1a9902 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -1,96 +1,15 @@ -### fc-a12ee91b42d243f5990a2c821be0a978 -### fc-e7e08faf6b864bd9b9b70c693a01ccae ### Crawl Website -POST http://localhost:3002/v1/crawl HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae +POST http://localhost:3002/v0/scrape HTTP/1.1 +Authorization: Bearer fc content-type: application/json { - "url": "https://roastmywebsite.ai", - "excludePaths": ["blog/*"], - "includePaths": ["/"], - "maxDepth": 2, - "ignoreSitemap": true, - "limit": 10, - "allowBackwardLinks": true, - "allowExternalLinks": true, - "scrapeOptions": { - "formats": ["markdown", "html", "rawHtml", "screenshot", "links"], - "headers": { "x-key": "test" }, - "includeTags": ["h1"], - "excludeTags": ["h2"], - "onlyMainContent": true, - "waitFor": 1000 - } + "url":"firecrawl.dev" } -### -POST http://localhost:3002/v1/scrape HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae -content-type: application/json - -{ - "url": "https://roastmywebsite.ai", - "formats": ["markdown", "html", "rawHtml", "screenshot", "links"], - "headers": { "x-key": "test" }, - "includeTags": ["h1"], - "excludeTags": ["h2"], - "onlyMainContent": true -} - -### -POST https://firescraper.fly.dev/scrape HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae -content-type: application/json - -{ - "url": "https://ajskhdioauhsdoas.com", - "instantReturn": true -} - - -### -POST https://api.firecrawl.dev/v0/crawl HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae -content-type: application/json - -{ - "url": "https://pytorch.org/docs/stable", - "crawlerOptions": { - "limit": 100, - "returnOnlyUrls": true, - "ignoreSitemap": true, - "allowBackwardCrawling": true - } -} - -### -GET http://localhost:3002/v1/crawl/eff7095b-97b6-4944-b6da-7e7551396a38 HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae - -### 1644d4b6-7fbd-4ce5-b55d-46b3c5e92526 -### ef3714c7-e095-416d-a4b0-69611af0b00f - -### - - -POST http://localhost:3002/v0/crawl HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae -content-type: application/json - -{ - "url": "https://docs.usepylon.com/getting-started/introduction", - "crawlerOptions": { - "limit": 100 - } -} ### Check Job Status -GET http://localhost:3002/v0/crawl/status/645ff228-4a83-4504-8b1f-8879ca56fe19 HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae - -### Check Job Status -GET https://api.firecrawl.dev/v0/jobs/active HTTP/1.1 +GET http://localhost:3002/v0/jobs/active HTTP/1.1 ### Scrape Website @@ -121,35 +40,28 @@ content-type: application/json ### Scrape Website -POST http://localhost:3002/v0/crawl HTTP/1.1 -# Authorization: Bearer fc-*** +POST http://localhost:3002/v0/scrape HTTP/1.1 +Authorization: Bearer content-type: application/json { - "url":"example.com" + "url":"https://mendable.ai" } -# 5bc71216-e1ac-4bb1-9d9f-59785a92bb02 -### Check Job Status -GET http://localhost:3002/v0/crawl/status/478fec7c-ae43-4fb7-912d-c006e17e7024 HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae -### -DELETE https://api.firecrawl.dev/v0/crawl/cancel/b749f45e-c1c9-4140-b596-87eaa0457b0b HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae +### Check Job Status +GET http://localhost:3002/v0/crawl/status/a6053912-d602-4709-841f-3d2cb46fea0a HTTP/1.1 +Authorization: Bearer ### Get Job Result -POST http://localhost:3002/v0/crawl HTTP/1.1 -Authorization: Bearer fc-e7e08faf6b864bd9b9b70c693a01ccae +POST https://api.firecrawl.dev/v0/crawl HTTP/1.1 +Authorization: Bearer content-type: application/json { - "url":"https://mendable.ai", - "crawlerOptions": { - "ignoreSitemap": true - } + "url":"https://mendable.ai" } ### Check Job Status From 52abec41c205458cd3532cfb16d52993eb277bea Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:35:50 -0300 Subject: [PATCH 28/65] fixing delete --- .../scraper/WebScraper/scrapers/fireEngine.ts | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 6f97d96f..b520bfe2 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -121,16 +121,14 @@ export async function scrapWithFireEngine({ } if (checkStatusResponse.data.processing) { - try { - Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`); - axiosInstance.delete( - process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, - ); - } catch (error) { - Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`); - logParams.error_message = "Failed to delete request"; - return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; - } + Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`); + axiosInstance.delete( + process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, { + validateStatus: (status) => true + } + ).catch((error) => { + Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`); + }); Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`); logParams.error_message = "Request timed out"; From 35decb1af2fb17e6fee1266ee4ebec22ec0549fa Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 21 Aug 2024 12:35:03 -0300 Subject: [PATCH 29/65] Nick: --- apps/api/.gitignore | 3 + apps/api/package.json | 11 +- apps/api/pnpm-lock.yaml | 513 +++++++++++++++++++++++++- apps/api/src/index.ts | 9 +- apps/api/src/lib/default-values.ts | 2 +- apps/api/src/services/queue-worker.ts | 2 +- apps/api/src/services/sentry.ts | 15 + apps/api/tsconfig.json | 15 +- 8 files changed, 559 insertions(+), 11 deletions(-) create mode 100644 apps/api/src/services/sentry.ts diff --git a/apps/api/.gitignore b/apps/api/.gitignore index edc2faf4..34221e2a 100644 --- a/apps/api/.gitignore +++ b/apps/api/.gitignore @@ -6,3 +6,6 @@ dump.rdb /mongo-data /.next/ + +# Sentry Config File +.sentryclirc diff --git a/apps/api/package.json b/apps/api/package.json index 56e30792..a51f51aa 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -5,11 +5,11 @@ "main": "index.js", "scripts": { "start": "nodemon --exec ts-node src/index.ts", - "start:production": "tsc && node dist/src/index.js", + "start:production": "tsc && pnpm sentry:sourcemaps && node dist/src/index.js", "format": "prettier --write \"src/**/*.(js|ts)\"", "flyio": "node dist/src/index.js", "start:dev": "nodemon --exec ts-node src/index.ts", - "build": "tsc", + "build": "tsc && pnpm sentry:sourcemaps", "test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", "test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'", "test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'", @@ -20,7 +20,8 @@ "mongo-docker-console": "docker exec -it mongodb mongosh", "run-example": "npx ts-node src/example.ts", "deploy:fly": "flyctl deploy", - "deploy:fly:staging": "fly deploy -c fly.staging.toml" + "deploy:fly:staging": "fly deploy -c fly.staging.toml", + "sentry:sourcemaps": "sentry-cli sourcemaps inject --org caleb-peffer --project firecrawl-scraper-js ./dist && sentry-cli sourcemaps upload --org caleb-peffer --project firecrawl-scraper-js ./dist" }, "author": "", "license": "ISC", @@ -55,7 +56,9 @@ "@hyperdx/node-opentelemetry": "^0.8.1", "@logtail/node": "^0.4.12", "@nangohq/node": "^0.40.8", + "@sentry/cli": "^2.33.1", "@sentry/node": "^8.13.0", + "@sentry/profiling-node": "^8.26.0", "@supabase/supabase-js": "^2.44.2", "ajv": "^8.16.0", "async": "^3.2.5", @@ -116,4 +119,4 @@ "temp" ] } -} +} \ No newline at end of file diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 419c6323..7065d1cf 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -35,9 +35,15 @@ importers: '@nangohq/node': specifier: ^0.40.8 version: 0.40.8 + '@sentry/cli': + specifier: ^2.33.1 + version: 2.33.1 '@sentry/node': specifier: ^8.13.0 version: 8.13.0 + '@sentry/profiling-node': + specifier: ^8.26.0 + version: 8.26.0 '@supabase/supabase-js': specifier: ^2.44.2 version: 2.44.2 @@ -809,6 +815,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-connect@0.38.0': + resolution: {integrity: sha512-2/nRnx3pjYEmdPIaBwtgtSviTKHWnDZN3R+TkRUnhIVrvBKVcq+I5B2rtd6mr6Fe9cHlZ9Ojcuh7pkNh/xdWWg==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-cucumber@0.6.0': resolution: {integrity: sha512-90eAF2JPSbPAsOuGfYyctYaoYXqy4Clbxt0j/uUgg6dto4oqwUw3AvTyHQEztLGxeXwEzC1EQigDtVPg5ZexYA==} engines: {node: '>=14'} @@ -839,6 +851,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-express@0.41.1': + resolution: {integrity: sha512-uRx0V3LPGzjn2bxAnV8eUsDT82vT7NTwI0ezEuPMBOTOsnPpGhWdhcdNdhH80sM4TrWrOfXm9HGEdfWE3TRIww==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-fastify@0.36.1': resolution: {integrity: sha512-3Nfm43PI0I+3EX+1YbSy6xbDu276R1Dh1tqAk68yd4yirnIh52Kd5B+nJ8CgHA7o3UKakpBjj6vSzi5vNCzJIA==} engines: {node: '>=14'} @@ -851,12 +869,24 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-fastify@0.38.0': + resolution: {integrity: sha512-HBVLpTSYpkQZ87/Df3N0gAw7VzYZV3n28THIBrJWfuqw3Or7UqdhnjeuMIPQ04BKk3aZc0cWn2naSQObbh5vXw==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-fs@0.12.0': resolution: {integrity: sha512-Waf+2hekJRxIwq1PmivxOWLdMOtYbY22hKr34gEtfbv2CArSv8FBJH4BmQxB9o5ZcwkdKu589qs009dbuSfNmQ==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-fs@0.14.0': + resolution: {integrity: sha512-pVc8P5AgliC1DphyyBUgsxXlm2XaPH4BpYvt7rAZDMIqUpRk8gs19SioABtKqqxvFzg5jPtgJfJsdxq0Y+maLw==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-generic-pool@0.36.0': resolution: {integrity: sha512-CExAEqJvK8jYxrhN8cl6EaGg57EGJi+qsSKouLC5lndXi68gZLOKbZIMZg4pF0kNfp/D4BFaGmA6Ap7d5WoPTw==} engines: {node: '>=14'} @@ -875,6 +905,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-graphql@0.42.0': + resolution: {integrity: sha512-N8SOwoKL9KQSX7z3gOaw5UaTeVQcfDO1c21csVHnmnmGUoqsXbArK2B8VuwPWcv6/BC/i3io+xTo7QGRZ/z28Q==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-grpc@0.51.1': resolution: {integrity: sha512-coRTugFL7De/VNH/1NqPlxnfik87jS+jBXsny+Y/lMhXIA3x8t71IyL9ihuewkD+lNtIxIz6Y7Sq6kPuOqz5dQ==} engines: {node: '>=14'} @@ -893,6 +929,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-hapi@0.40.0': + resolution: {integrity: sha512-8U/w7Ifumtd2bSN1OLaSwAAFhb9FyqWUki3lMMB0ds+1+HdSxYBe9aspEJEgvxAqOkrQnVniAPTEGf1pGM7SOw==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-http@0.51.1': resolution: {integrity: sha512-6b3nZnFFEz/3xZ6w8bVxctPUWIPWiXuPQ725530JgxnN1cvYFd8CJ75PrHZNjynmzSSnqBkN3ef4R9N+RpMh8Q==} engines: {node: '>=14'} @@ -917,6 +959,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-ioredis@0.42.0': + resolution: {integrity: sha512-P11H168EKvBB9TUSasNDOGJCSkpT44XgoM6d3gRIWAa9ghLpYhl0uRkS8//MqPzcJVHr3h3RmfXIpiYLjyIZTw==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-knex@0.36.1': resolution: {integrity: sha512-6bEuiI+yMf3D0+ZWZE2AKmXhIhBvZ0brdO/0A8lUqeqeS+sS4fTcjA1F2CclsCNxYWEgcs8o3QyQqPceBeVRlg==} engines: {node: '>=14'} @@ -935,6 +983,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-koa@0.42.0': + resolution: {integrity: sha512-H1BEmnMhho8o8HuNRq5zEI4+SIHDIglNB7BPKohZyWG4fWNuR7yM4GTlR01Syq21vODAS7z5omblScJD/eZdKw==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-lru-memoizer@0.37.0': resolution: {integrity: sha512-dHLrn55qVWsHJQYdForPWPUWDk2HZ2jjzkT+WoQSqpYT1j4HxfoiLfBTF+I3EbEYFAJnDRmRAUfA6nU5GPdCLQ==} engines: {node: '>=14'} @@ -959,6 +1013,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-mongodb@0.46.0': + resolution: {integrity: sha512-VF/MicZ5UOBiXrqBslzwxhN7TVqzu1/LN/QDpkskqM0Zm0aZ4CVRbUygL8d7lrjLn15x5kGIe8VsSphMfPJzlA==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-mongoose@0.38.1': resolution: {integrity: sha512-zaeiasdnRjXe6VhYCBMdkmAVh1S5MmXC/0spet+yqoaViGnYst/DOxPvhwg3yT4Yag5crZNWsVXnA538UjP6Ow==} engines: {node: '>=14'} @@ -971,6 +1031,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-mongoose@0.40.0': + resolution: {integrity: sha512-niRi5ZUnkgzRhIGMOozTyoZIvJKNJyhijQI4nF4iFSb+FUx2v5fngfR+8XLmdQAO7xmsD8E5vEGdDVYVtKbZew==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-mysql2@0.38.1': resolution: {integrity: sha512-qkpHMgWSDTYVB1vlZ9sspf7l2wdS5DDq/rbIepDwX5BA0N0068JTQqh0CgAh34tdFqSCnWXIhcyOXC2TtRb0sg==} engines: {node: '>=14'} @@ -983,6 +1049,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-mysql2@0.40.0': + resolution: {integrity: sha512-0xfS1xcqUmY7WE1uWjlmI67Xg3QsSUlNT+AcXHeA4BDUPwZtWqF4ezIwLgpVZfHOnkAEheqGfNSWd1PIu3Wnfg==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-mysql@0.38.1': resolution: {integrity: sha512-+iBAawUaTfX/HAlvySwozx0C2B6LBfNPXX1W8Z2On1Uva33AGkw2UjL9XgIg1Pj4eLZ9R4EoJ/aFz+Xj4E/7Fw==} engines: {node: '>=14'} @@ -995,6 +1067,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-mysql@0.40.0': + resolution: {integrity: sha512-d7ja8yizsOCNMYIJt5PH/fKZXjb/mS48zLROO4BzZTtDfhNCl2UM/9VIomP2qkGIFVouSJrGr/T00EzY7bPtKA==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-nestjs-core@0.37.1': resolution: {integrity: sha512-ebYQjHZEmGHWEALwwDGhSQVLBaurFnuLIkZD5igPXrt7ohfF4lc5/4al1LO+vKc0NHk8SJWStuRueT86ISA8Vg==} engines: {node: '>=14'} @@ -1007,6 +1085,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-nestjs-core@0.39.0': + resolution: {integrity: sha512-mewVhEXdikyvIZoMIUry8eb8l3HUjuQjSjVbmLVTt4NQi35tkpnHQrG9bTRBrl3403LoWZ2njMPJyg4l6HfKvA==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-net@0.36.0': resolution: {integrity: sha512-rZlbSgwAJys8lpug+xIeAdO98ypYMAPVqrHqc4AHuUl5S4MULHEcjGLMZLoE/guEGO4xAQ5XUezpRFGM1SAnsg==} engines: {node: '>=14'} @@ -1025,6 +1109,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-pg@0.43.0': + resolution: {integrity: sha512-og23KLyoxdnAeFs1UWqzSonuCkePUzCX30keSYigIzJe/6WSYA8rnEI5lobcxPEzg+GcU06J7jzokuEHbjVJNw==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-pino@0.39.0': resolution: {integrity: sha512-uA17F2iP77o3NculB63QD2zv3jkJ093Gfb0GxHLEqTIqpYs1ToJ53ybWwjJwqFByxk7GrliaxaxVtWC23PKzBg==} engines: {node: '>=14'} @@ -1043,6 +1133,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-redis-4@0.41.0': + resolution: {integrity: sha512-H7IfGTqW2reLXqput4yzAe8YpDC0fmVNal95GHMLOrS89W+qWUKIqxolSh63hJyfmwPSFwXASzj7wpSk8Az+Dg==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation-redis@0.39.1': resolution: {integrity: sha512-HUjTerD84jRJnSyDrRPqn6xQ7K91o9qLflRPZqzRvq0GRj5PMfc6TJ/z3q/ayWy/2Kzffhrp7HCIVp0u0TkgUg==} engines: {node: '>=14'} @@ -1097,6 +1193,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation@0.46.0': + resolution: {integrity: sha512-a9TijXZZbk0vI5TGLZl+0kxyFfrXHhX6Svtz7Pp2/VBlCSKrazuULEyoJQrOknJyFWNMEmbbJgOciHCCpQcisw==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/instrumentation@0.51.1': resolution: {integrity: sha512-JIrvhpgqY6437QIqToyozrUG1h5UhwHkaGK/WAX+fkrpyPtc+RO5FkRtUd9BH0MibabHHvqsnBGKfKVijbmp8w==} engines: {node: '>=14'} @@ -1273,6 +1375,9 @@ packages: '@prisma/instrumentation@5.16.0': resolution: {integrity: sha512-MVzNRW2ikWvVNnMIEgQMcwWxpFD+XF2U2h0Qz7MjutRqJxrhWexWV2aSi2OXRaU8UL5wzWw7pnjdKUzYhWauLg==} + '@prisma/instrumentation@5.17.0': + resolution: {integrity: sha512-c1Sle4ji8aasMcYfBBHFM56We4ljfenVtRmS8aY06BllS7SoU6SmJBwG7vil+GHiR0Yrh+t9iBwt4AY0Jr4KNQ==} + '@protobufjs/aspromise@1.1.2': resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} @@ -1344,14 +1449,68 @@ packages: '@selderee/plugin-htmlparser2@0.11.0': resolution: {integrity: sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==} + '@sentry/cli-darwin@2.33.1': + resolution: {integrity: sha512-+4/VIx/E1L2hChj5nGf5MHyEPHUNHJ/HoG5RY+B+vyEutGily1c1+DM2bum7RbD0xs6wKLIyup5F02guzSzG8A==} + engines: {node: '>=10'} + os: [darwin] + + '@sentry/cli-linux-arm64@2.33.1': + resolution: {integrity: sha512-DbGV56PRKOLsAZJX27Jt2uZ11QfQEMmWB4cIvxkKcFVE+LJP4MVA+MGGRUL6p+Bs1R9ZUuGbpKGtj0JiG6CoXw==} + engines: {node: '>=10'} + cpu: [arm64] + os: [linux, freebsd] + + '@sentry/cli-linux-arm@2.33.1': + resolution: {integrity: sha512-zbxEvQju+tgNvzTOt635le4kS/Fbm2XC2RtYbCTs034Vb8xjrAxLnK0z1bQnStUV8BkeBHtsNVrG+NSQDym2wg==} + engines: {node: '>=10'} + cpu: [arm] + os: [linux, freebsd] + + '@sentry/cli-linux-i686@2.33.1': + resolution: {integrity: sha512-g2LS4oPXkPWOfKWukKzYp4FnXVRRSwBxhuQ9eSw2peeb58ZIObr4YKGOA/8HJRGkooBJIKGaAR2mH2Pk1TKaiA==} + engines: {node: '>=10'} + cpu: [x86, ia32] + os: [linux, freebsd] + + '@sentry/cli-linux-x64@2.33.1': + resolution: {integrity: sha512-IV3dcYV/ZcvO+VGu9U6kuxSdbsV2kzxaBwWUQxtzxJ+cOa7J8Hn1t0koKGtU53JVZNBa06qJWIcqgl4/pCuKIg==} + engines: {node: '>=10'} + cpu: [x64] + os: [linux, freebsd] + + '@sentry/cli-win32-i686@2.33.1': + resolution: {integrity: sha512-F7cJySvkpzIu7fnLKNHYwBzZYYwlhoDbAUnaFX0UZCN+5DNp/5LwTp37a5TWOsmCaHMZT4i9IO4SIsnNw16/zQ==} + engines: {node: '>=10'} + cpu: [x86, ia32] + os: [win32] + + '@sentry/cli-win32-x64@2.33.1': + resolution: {integrity: sha512-8VyRoJqtb2uQ8/bFRKNuACYZt7r+Xx0k2wXRGTyH05lCjAiVIXn7DiS2BxHFty7M1QEWUCMNsb/UC/x/Cu2wuA==} + engines: {node: '>=10'} + cpu: [x64] + os: [win32] + + '@sentry/cli@2.33.1': + resolution: {integrity: sha512-dUlZ4EFh98VFRPJ+f6OW3JEYQ7VvqGNMa0AMcmvk07ePNeK/GicAWmSQE4ZfJTTl80ul6HZw1kY01fGQOQlVRA==} + engines: {node: '>= 10'} + hasBin: true + '@sentry/core@8.13.0': resolution: {integrity: sha512-N9Qg4ZGxZWp8eb2eUUHVVKgjBLtFIjS805nG92s6yJmkvOpKm6mLtcUaT/iDf3Hta6nG+xRkhbE3r+Z4cbXG8w==} engines: {node: '>=14.18'} + '@sentry/core@8.26.0': + resolution: {integrity: sha512-g/tVmTZD4GNbLFf++hKJfBpcCAtduFEMLnbfa9iT/QEZjlmP+EzY+GsH9bafM5VsNe8DiOUp+kJKWtShzlVdBA==} + engines: {node: '>=14.18'} + '@sentry/node@8.13.0': resolution: {integrity: sha512-OeZ7K90RhyxfwfreerIi4cszzHrPRRH36STJno2+p3sIGbG5VScOccqXzYEOAqHpByxnti4KQN34BLAT2BFOEA==} engines: {node: '>=14.18'} + '@sentry/node@8.26.0': + resolution: {integrity: sha512-N9mNLzicnfGgsq6P10ckPdTzEFusjTC7gpqPopwq5eEMF7g798hH8CcE5o6FZ4iAAR3vWliAR/jgccdoMmJMpQ==} + engines: {node: '>=14.18'} + '@sentry/opentelemetry@8.13.0': resolution: {integrity: sha512-NYn/HNE/SxFXe8pfnxJknhrrRzYRMHNssCoi5M1CeR5G7F2BGxxVmaGsd8j0WyTCpUS4i97G4vhYtDGxHvWN6w==} engines: {node: '>=14.18'} @@ -1362,14 +1521,37 @@ packages: '@opentelemetry/sdk-trace-base': ^1.25.1 '@opentelemetry/semantic-conventions': ^1.25.1 + '@sentry/opentelemetry@8.26.0': + resolution: {integrity: sha512-HBDheM/+ysfIz8R1OH4bBIxdgD7ZbQkKLJAUXkdAbBcfbpK/CTtwcplbauF5wY7Q+GYvwL/ShuDwvXRfW+gFyQ==} + engines: {node: '>=14.18'} + peerDependencies: + '@opentelemetry/api': ^1.9.0 + '@opentelemetry/core': ^1.25.1 + '@opentelemetry/instrumentation': ^0.52.1 + '@opentelemetry/sdk-trace-base': ^1.25.1 + '@opentelemetry/semantic-conventions': ^1.25.1 + + '@sentry/profiling-node@8.26.0': + resolution: {integrity: sha512-yGHFoqSKe5j9fDK9n5ntJxDyZnedwjCm6fAXwIlsLJOUBqn5g7l8V1XgBPlCJLZzOG0fbvGvSo4WyBfDoSD8vQ==} + engines: {node: '>=14.18'} + hasBin: true + '@sentry/types@8.13.0': resolution: {integrity: sha512-r63s/H5gvQnQM9tTGBXz2xErUbxZALh4e2Lg/1aHj4zIvGLBjA2z5qWsh6TEZYbpmgAyGShLDr6+rWeUVf9yBQ==} engines: {node: '>=14.18'} + '@sentry/types@8.26.0': + resolution: {integrity: sha512-zKmh6SWsJh630rpt7a9vP4Cm4m1C2gDTUqUiH565CajCL/4cePpNWYrNwalSqsOSL7B9OrczA1+n6a6XvND+ng==} + engines: {node: '>=14.18'} + '@sentry/utils@8.13.0': resolution: {integrity: sha512-PxV0v9VbGWH9zP37P5w2msLUFDr287nYjoY2XVF+RSolyiTs1CQNI5ZMUO3o4MsSac/dpXxjyrZXQd72t/jRYA==} engines: {node: '>=14.18'} + '@sentry/utils@8.26.0': + resolution: {integrity: sha512-xvlPU9Hd2BlyT+FhWHGNwnxWqdVRk2AHnDtVcW4Ma0Ri5EwS+uy4Jeik5UkSv8C5RVb9VlxFmS8LN3I1MPJsLw==} + engines: {node: '>=14.18'} + '@sinclair/typebox@0.27.8': resolution: {integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==} @@ -1719,6 +1901,10 @@ packages: afinn-165@1.0.4: resolution: {integrity: sha512-7+Wlx3BImrK0HiG6y3lU4xX7SpBPSSu8T9iguPMlaueRFxjbYwAQrp9lqZUuFikqKbd/en8lVREILvP2J80uJA==} + agent-base@6.0.2: + resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} + engines: {node: '>= 6.0.0'} + agent-base@7.1.1: resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==} engines: {node: '>= 14'} @@ -2663,6 +2849,10 @@ packages: engines: {node: '>=12'} hasBin: true + https-proxy-agent@5.0.1: + resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} + engines: {node: '>= 6'} + https-proxy-agent@7.0.4: resolution: {integrity: sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==} engines: {node: '>= 14'} @@ -2699,9 +2889,15 @@ packages: resolution: {integrity: sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==} engines: {node: '>=6'} + import-in-the-middle@1.11.0: + resolution: {integrity: sha512-5DimNQGoe0pLUHbR9qK84iWaWjjbsxiqXnw6Qz64+azRgleqv9k2kTt5fw7QsOpmaGYtuxxursnPPsnTKEx10Q==} + import-in-the-middle@1.4.2: resolution: {integrity: sha512-9WOz1Yh/cvO/p69sxRmhyQwrIGGSp7EIdcb+fFNVi7CzQGQB8U1/1XrKVSbEd/GNOAeM0peJtmi7+qphe7NvAw==} + import-in-the-middle@1.7.1: + resolution: {integrity: sha512-1LrZPDtW+atAxH42S6288qyDFNQ2YCty+2mxEPRtfazH6Z5QwkaBSTS2ods7hnVJioF6rkRfNoA6A/MstpFXLg==} + import-in-the-middle@1.7.4: resolution: {integrity: sha512-Lk+qzWmiQuRPPulGQeK5qq0v32k2bHnWrRPFgqyvhw7Kkov5L6MOLOIU3pcWeujc9W4q54Cp3Q2WV16eQkc7Bg==} @@ -3527,6 +3723,10 @@ packages: resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==} engines: {node: '>= 0.4.0'} + node-abi@3.67.0: + resolution: {integrity: sha512-bLn/fU/ALVBE9wj+p4Y21ZJWYFjUXLXPi/IewyLZkx3ApxKDNBWCKdReeKOtD8dWpOdDCeMyLh6ZewzcLsG2Nw==} + engines: {node: '>=10'} + node-abort-controller@3.1.1: resolution: {integrity: sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==} @@ -3629,6 +3829,12 @@ packages: resolution: {integrity: sha512-aiSt/4ubOTyb1N5C2ZbGrBvaJOXIZhZvpRPYuUVxQJe27wJZqf/o65iPrqgLcgfeOLaQ8cS2Q+762jrYvniTrA==} engines: {node: '>18.0.0'} + opentelemetry-instrumentation-fetch-node@1.2.3: + resolution: {integrity: sha512-Qb11T7KvoCevMaSeuamcLsAD+pZnavkhDnlVL0kRozfhl42dKG5Q3anUklAFKJZjY3twLR+BnRa6DlwwkIE/+A==} + engines: {node: '>18.0.0'} + peerDependencies: + '@opentelemetry/api': ^1.6.0 + option@0.2.4: resolution: {integrity: sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==} @@ -5496,6 +5702,16 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-connect@0.38.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + '@types/connect': 3.4.36 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-cucumber@0.6.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5538,6 +5754,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-express@0.41.1(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-fastify@0.36.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5556,6 +5781,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-fastify@0.38.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-fs@0.12.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5564,6 +5798,14 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-fs@0.14.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-generic-pool@0.36.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5586,6 +5828,13 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-graphql@0.42.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-grpc@0.51.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5612,6 +5861,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-hapi@0.40.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-http@0.51.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5650,6 +5908,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-ioredis@0.42.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/redis-common': 0.36.2 + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-knex@0.36.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5680,6 +5947,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-koa@0.42.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-lru-memoizer@0.37.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5714,6 +5990,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-mongodb@0.46.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-mongoose@0.38.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5732,6 +6017,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-mongoose@0.40.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-mysql2@0.38.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5750,6 +6044,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-mysql2@0.40.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + '@opentelemetry/sql-common': 0.40.1(@opentelemetry/api@1.9.0) + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-mysql@0.38.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5768,6 +6071,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-mysql@0.40.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + '@types/mysql': 2.15.22 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-nestjs-core@0.37.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5784,6 +6096,14 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-nestjs-core@0.39.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-net@0.36.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5814,6 +6134,17 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-pg@0.43.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + '@opentelemetry/sql-common': 0.40.1(@opentelemetry/api@1.9.0) + '@types/pg': 8.6.1 + '@types/pg-pool': 2.0.4 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-pino@0.39.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5839,6 +6170,15 @@ snapshots: transitivePeerDependencies: - supports-color + '@opentelemetry/instrumentation-redis-4@0.41.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/redis-common': 0.36.2 + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + '@opentelemetry/instrumentation-redis@0.39.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5917,6 +6257,18 @@ snapshots: - supports-color optional: true + '@opentelemetry/instrumentation@0.46.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@types/shimmer': 1.0.5 + import-in-the-middle: 1.7.1 + require-in-the-middle: 7.3.0 + semver: 7.6.2 + shimmer: 1.2.1 + transitivePeerDependencies: + - supports-color + optional: true + '@opentelemetry/instrumentation@0.51.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6132,6 +6484,14 @@ snapshots: transitivePeerDependencies: - supports-color + '@prisma/instrumentation@5.17.0': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0) + transitivePeerDependencies: + - supports-color + '@protobufjs/aspromise@1.1.2': {} '@protobufjs/base64@1.1.2': {} @@ -6207,11 +6567,56 @@ snapshots: domhandler: 5.0.3 selderee: 0.11.0 + '@sentry/cli-darwin@2.33.1': + optional: true + + '@sentry/cli-linux-arm64@2.33.1': + optional: true + + '@sentry/cli-linux-arm@2.33.1': + optional: true + + '@sentry/cli-linux-i686@2.33.1': + optional: true + + '@sentry/cli-linux-x64@2.33.1': + optional: true + + '@sentry/cli-win32-i686@2.33.1': + optional: true + + '@sentry/cli-win32-x64@2.33.1': + optional: true + + '@sentry/cli@2.33.1': + dependencies: + https-proxy-agent: 5.0.1 + node-fetch: 2.7.0 + progress: 2.0.3 + proxy-from-env: 1.1.0 + which: 2.0.2 + optionalDependencies: + '@sentry/cli-darwin': 2.33.1 + '@sentry/cli-linux-arm': 2.33.1 + '@sentry/cli-linux-arm64': 2.33.1 + '@sentry/cli-linux-i686': 2.33.1 + '@sentry/cli-linux-x64': 2.33.1 + '@sentry/cli-win32-i686': 2.33.1 + '@sentry/cli-win32-x64': 2.33.1 + transitivePeerDependencies: + - encoding + - supports-color + '@sentry/core@8.13.0': dependencies: '@sentry/types': 8.13.0 '@sentry/utils': 8.13.0 + '@sentry/core@8.26.0': + dependencies: + '@sentry/types': 8.26.0 + '@sentry/utils': 8.26.0 + '@sentry/node@8.13.0': dependencies: '@opentelemetry/api': 1.9.0 @@ -6246,6 +6651,42 @@ snapshots: transitivePeerDependencies: - supports-color + '@sentry/node@8.26.0': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/context-async-hooks': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-connect': 0.38.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-express': 0.41.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-fastify': 0.38.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-fs': 0.14.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-graphql': 0.42.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-hapi': 0.40.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-http': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-ioredis': 0.42.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-koa': 0.42.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-mongodb': 0.46.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-mongoose': 0.40.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-mysql': 0.40.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-mysql2': 0.40.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-nestjs-core': 0.39.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-pg': 0.43.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation-redis-4': 0.41.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + '@prisma/instrumentation': 5.17.0 + '@sentry/core': 8.26.0 + '@sentry/opentelemetry': 8.26.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1) + '@sentry/types': 8.26.0 + '@sentry/utils': 8.26.0 + import-in-the-middle: 1.11.0 + optionalDependencies: + opentelemetry-instrumentation-fetch-node: 1.2.3(@opentelemetry/api@1.9.0) + transitivePeerDependencies: + - supports-color + '@sentry/opentelemetry@8.13.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6257,12 +6698,40 @@ snapshots: '@sentry/types': 8.13.0 '@sentry/utils': 8.13.0 + '@sentry/opentelemetry@8.26.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + '@sentry/core': 8.26.0 + '@sentry/types': 8.26.0 + '@sentry/utils': 8.26.0 + + '@sentry/profiling-node@8.26.0': + dependencies: + '@sentry/core': 8.26.0 + '@sentry/node': 8.26.0 + '@sentry/types': 8.26.0 + '@sentry/utils': 8.26.0 + detect-libc: 2.0.3 + node-abi: 3.67.0 + transitivePeerDependencies: + - supports-color + '@sentry/types@8.13.0': {} + '@sentry/types@8.26.0': {} + '@sentry/utils@8.13.0': dependencies: '@sentry/types': 8.13.0 + '@sentry/utils@8.26.0': + dependencies: + '@sentry/types': 8.26.0 + '@sinclair/typebox@0.27.8': {} '@sinonjs/commons@3.0.1': @@ -6643,6 +7112,12 @@ snapshots: afinn-165@1.0.4: {} + agent-base@6.0.2: + dependencies: + debug: 4.3.5 + transitivePeerDependencies: + - supports-color + agent-base@7.1.1: dependencies: debug: 4.3.5 @@ -7194,8 +7669,7 @@ snapshots: destroy@1.2.0: {} - detect-libc@2.0.3: - optional: true + detect-libc@2.0.3: {} detect-newline@3.1.0: {} @@ -7662,6 +8136,13 @@ snapshots: - debug - supports-color + https-proxy-agent@5.0.1: + dependencies: + agent-base: 6.0.2 + debug: 4.3.5 + transitivePeerDependencies: + - supports-color + https-proxy-agent@7.0.4: dependencies: agent-base: 7.1.1 @@ -7701,6 +8182,13 @@ snapshots: parent-module: 1.0.1 resolve-from: 4.0.0 + import-in-the-middle@1.11.0: + dependencies: + acorn: 8.12.0 + acorn-import-attributes: 1.9.5(acorn@8.12.0) + cjs-module-lexer: 1.3.1 + module-details-from-path: 1.0.3 + import-in-the-middle@1.4.2: dependencies: acorn: 8.12.0 @@ -7709,6 +8197,14 @@ snapshots: module-details-from-path: 1.0.3 optional: true + import-in-the-middle@1.7.1: + dependencies: + acorn: 8.12.0 + acorn-import-assertions: 1.9.0(acorn@8.12.0) + cjs-module-lexer: 1.3.1 + module-details-from-path: 1.0.3 + optional: true + import-in-the-middle@1.7.4: dependencies: acorn: 8.12.0 @@ -8601,6 +9097,10 @@ snapshots: netmask@2.0.2: {} + node-abi@3.67.0: + dependencies: + semver: 7.6.2 + node-abort-controller@3.1.1: {} node-domexception@1.0.0: {} @@ -8712,6 +9212,15 @@ snapshots: - supports-color optional: true + opentelemetry-instrumentation-fetch-node@1.2.3(@opentelemetry/api@1.9.0): + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/instrumentation': 0.46.0(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.25.1 + transitivePeerDependencies: + - supports-color + optional: true + option@0.2.4: {} ora@5.4.1: diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index f9fab1cb..78379bee 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -1,7 +1,10 @@ +// Sentry +import "dotenv/config"; +import "./services/sentry" + import express from "express"; import bodyParser from "body-parser"; import cors from "cors"; -import "dotenv/config"; import { getScrapeQueue } from "./services/queue-service"; import { v0Router } from "./routes/v0"; import { initSDK } from "@hyperdx/node-opentelemetry"; @@ -14,6 +17,8 @@ import http from 'node:http'; import https from 'node:https'; import CacheableLookup from 'cacheable-lookup'; + + const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { ExpressAdapter } = require("@bull-board/express"); @@ -190,3 +195,5 @@ if (cluster.isMaster) { // sq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed")); // sq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed")); + + diff --git a/apps/api/src/lib/default-values.ts b/apps/api/src/lib/default-values.ts index 0b469ee2..f70f17c0 100644 --- a/apps/api/src/lib/default-values.ts +++ b/apps/api/src/lib/default-values.ts @@ -1,6 +1,6 @@ export const defaultOrigin = "api"; -export const defaultTimeout = 45000; // 45 seconds +export const defaultTimeout = 60000; // 60 seconds export const defaultPageOptions = { onlyMainContent: false, diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 6e848ce6..aadfade9 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -117,7 +117,7 @@ async function processJob(job: Job, token: string) { // Check if the job URL is researchhub and block it immediately // TODO: remove this once solve the root issue - if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com") || job.data.url.includes("youtube.com") || job.data.url.includes("microsoft.com"))) { + if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com") || job.data.url.includes("youtube.com") || job.data.url.includes("microsoft.com") )) { Logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`); const data = { success: false, diff --git a/apps/api/src/services/sentry.ts b/apps/api/src/services/sentry.ts new file mode 100644 index 00000000..5deb6195 --- /dev/null +++ b/apps/api/src/services/sentry.ts @@ -0,0 +1,15 @@ +// Import with `import * as Sentry from "@sentry/node"` if you are using ESM +const Sentry = require("@sentry/node"); +const { nodeProfilingIntegration } = require("@sentry/profiling-node"); + +Sentry.init({ + dsn: process.env.SENTRY_DSN, + integrations: [ + nodeProfilingIntegration(), + ], + // Tracing + tracesSampleRate: 1.0, // Capture 100% of the transactions + + // Set sampling rate for profiling - this is relative to tracesSampleRate + profilesSampleRate: 1.0, +}); \ No newline at end of file diff --git a/apps/api/tsconfig.json b/apps/api/tsconfig.json index 84007570..239d9b3a 100644 --- a/apps/api/tsconfig.json +++ b/apps/api/tsconfig.json @@ -2,16 +2,27 @@ "compilerOptions": { "rootDir": "./src", "lib": ["es6","DOM"], - "target": "ES2020", // or higher + + // or higher + "target": "ES2020", + "module": "commonjs", "esModuleInterop": true, "sourceMap": true, "outDir": "./dist/src", "moduleResolution": "node", "baseUrl": ".", + "paths": { "*": ["node_modules/*", "src/types/*"], - } + }, + + "inlineSources": true, + + // Set `sourceRoot` to "/" to strip the build path prefix + // from generated source code references. + // This improves issue grouping in Sentry. + "sourceRoot": "/" }, "include": ["src/","src/**/*", "services/db/supabase.ts", "utils/utils.ts", "services/db/supabaseEmbeddings.ts", "utils/EventEmmitter.ts", "src/services/queue-service.ts"] } From 86942728e348d216172fc0c625ecc2f5d70856aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 17:58:27 +0200 Subject: [PATCH 30/65] Add metadata for queue-worker and Express --- apps/api/package.json | 2 +- apps/api/pnpm-lock.yaml | 340 +------------------------- apps/api/src/index.ts | 5 +- apps/api/src/services/queue-worker.ts | 17 +- apps/api/src/services/sentry.ts | 25 +- 5 files changed, 34 insertions(+), 355 deletions(-) diff --git a/apps/api/package.json b/apps/api/package.json index a51f51aa..55cefe96 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -57,7 +57,7 @@ "@logtail/node": "^0.4.12", "@nangohq/node": "^0.40.8", "@sentry/cli": "^2.33.1", - "@sentry/node": "^8.13.0", + "@sentry/node": "^8.26.0", "@sentry/profiling-node": "^8.26.0", "@supabase/supabase-js": "^2.44.2", "ajv": "^8.16.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 7065d1cf..3f59352c 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -39,8 +39,8 @@ importers: specifier: ^2.33.1 version: 2.33.1 '@sentry/node': - specifier: ^8.13.0 - version: 8.13.0 + specifier: ^8.26.0 + version: 8.26.0 '@sentry/profiling-node': specifier: ^8.26.0 version: 8.26.0 @@ -809,12 +809,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-connect@0.37.0': - resolution: {integrity: sha512-SeQktDIH5rNzjiEiazWiJAIXkmnLOnNV7wwHpahrqE0Ph+Z3heqMfxRtoMtbdJSIYLfcNZYO51AjxZ00IXufdw==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-connect@0.38.0': resolution: {integrity: sha512-2/nRnx3pjYEmdPIaBwtgtSviTKHWnDZN3R+TkRUnhIVrvBKVcq+I5B2rtd6mr6Fe9cHlZ9Ojcuh7pkNh/xdWWg==} engines: {node: '>=14'} @@ -845,12 +839,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-express@0.40.1': - resolution: {integrity: sha512-+RKMvVe2zw3kIXRup9c1jFu3T4d0fs5aKy015TpiMyoCKX1UMu3Z0lfgYtuyiSTANvg5hZnDbWmQmqSPj9VTvg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-express@0.41.1': resolution: {integrity: sha512-uRx0V3LPGzjn2bxAnV8eUsDT82vT7NTwI0ezEuPMBOTOsnPpGhWdhcdNdhH80sM4TrWrOfXm9HGEdfWE3TRIww==} engines: {node: '>=14'} @@ -863,12 +851,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-fastify@0.37.0': - resolution: {integrity: sha512-WRjwzNZgupSzbEYvo9s+QuHJRqZJjVdNxSEpGBwWK8RKLlHGwGVAu0gcc2gPamJWUJsGqPGvahAPWM18ZkWj6A==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-fastify@0.38.0': resolution: {integrity: sha512-HBVLpTSYpkQZ87/Df3N0gAw7VzYZV3n28THIBrJWfuqw3Or7UqdhnjeuMIPQ04BKk3aZc0cWn2naSQObbh5vXw==} engines: {node: '>=14'} @@ -899,12 +881,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-graphql@0.41.0': - resolution: {integrity: sha512-R/gXeljgIhaRDKquVkKYT5QHPnFouM8ooyePZEP0kqyaVAedtR1V7NfAUJbxfTG5fBQa5wdmLjvu63+tzRXZCA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-graphql@0.42.0': resolution: {integrity: sha512-N8SOwoKL9KQSX7z3gOaw5UaTeVQcfDO1c21csVHnmnmGUoqsXbArK2B8VuwPWcv6/BC/i3io+xTo7QGRZ/z28Q==} engines: {node: '>=14'} @@ -923,12 +899,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-hapi@0.39.0': - resolution: {integrity: sha512-ik2nA9Yj2s2ay+aNY+tJsKCsEx6Tsc2g/MK0iWBW5tibwrWKTy1pdVt5sB3kd5Gkimqj23UV5+FH2JFcQLeKug==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-hapi@0.40.0': resolution: {integrity: sha512-8U/w7Ifumtd2bSN1OLaSwAAFhb9FyqWUki3lMMB0ds+1+HdSxYBe9aspEJEgvxAqOkrQnVniAPTEGf1pGM7SOw==} engines: {node: '>=14'} @@ -953,12 +923,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-ioredis@0.41.0': - resolution: {integrity: sha512-rxiLloU8VyeJGm5j2fZS8ShVdB82n7VNP8wTwfUQqDwRfHCnkzGr+buKoxuhGD91gtwJ91RHkjHA1Eg6RqsUTg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-ioredis@0.42.0': resolution: {integrity: sha512-P11H168EKvBB9TUSasNDOGJCSkpT44XgoM6d3gRIWAa9ghLpYhl0uRkS8//MqPzcJVHr3h3RmfXIpiYLjyIZTw==} engines: {node: '>=14'} @@ -977,12 +941,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-koa@0.41.0': - resolution: {integrity: sha512-mbPnDt7ELvpM2S0vixYUsde7122lgegLOJQxx8iJQbB8YHal/xnTh9v7IfArSVzIDo+E+080hxZyUZD4boOWkw==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-koa@0.42.0': resolution: {integrity: sha512-H1BEmnMhho8o8HuNRq5zEI4+SIHDIglNB7BPKohZyWG4fWNuR7yM4GTlR01Syq21vODAS7z5omblScJD/eZdKw==} engines: {node: '>=14'} @@ -1007,12 +965,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mongodb@0.45.0': - resolution: {integrity: sha512-xnZP9+ayeB1JJyNE9cIiwhOJTzNEsRhXVdLgfzmrs48Chhhk026mQdM5CITfyXSCfN73FGAIB8d91+pflJEfWQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mongodb@0.46.0': resolution: {integrity: sha512-VF/MicZ5UOBiXrqBslzwxhN7TVqzu1/LN/QDpkskqM0Zm0aZ4CVRbUygL8d7lrjLn15x5kGIe8VsSphMfPJzlA==} engines: {node: '>=14'} @@ -1025,12 +977,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mongoose@0.39.0': - resolution: {integrity: sha512-J1r66A7zJklPPhMtrFOO7/Ud2p0Pv5u8+r23Cd1JUH6fYPmftNJVsLp2urAt6PHK4jVqpP/YegN8wzjJ2mZNPQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mongoose@0.40.0': resolution: {integrity: sha512-niRi5ZUnkgzRhIGMOozTyoZIvJKNJyhijQI4nF4iFSb+FUx2v5fngfR+8XLmdQAO7xmsD8E5vEGdDVYVtKbZew==} engines: {node: '>=14'} @@ -1043,12 +989,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mysql2@0.39.0': - resolution: {integrity: sha512-Iypuq2z6TCfriAXCIZjRq8GTFCKhQv5SpXbmI+e60rYdXw8NHtMH4NXcGF0eKTuoCsC59IYSTUvDQYDKReaszA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mysql2@0.40.0': resolution: {integrity: sha512-0xfS1xcqUmY7WE1uWjlmI67Xg3QsSUlNT+AcXHeA4BDUPwZtWqF4ezIwLgpVZfHOnkAEheqGfNSWd1PIu3Wnfg==} engines: {node: '>=14'} @@ -1061,12 +1001,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mysql@0.39.0': - resolution: {integrity: sha512-8snHPh83rhrDf31v9Kq0Nf+ts8hdr7NguuszRqZomZBHgE0+UyXZSkXHAAFZoBPPRMGyM68uaFE5hVtFl+wOcA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mysql@0.40.0': resolution: {integrity: sha512-d7ja8yizsOCNMYIJt5PH/fKZXjb/mS48zLROO4BzZTtDfhNCl2UM/9VIomP2qkGIFVouSJrGr/T00EzY7bPtKA==} engines: {node: '>=14'} @@ -1079,12 +1013,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-nestjs-core@0.38.0': - resolution: {integrity: sha512-M381Df1dM8aqihZz2yK+ugvMFK5vlHG/835dc67Sx2hH4pQEQYDA2PpFPTgc9AYYOydQaj7ClFQunESimjXDgg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-nestjs-core@0.39.0': resolution: {integrity: sha512-mewVhEXdikyvIZoMIUry8eb8l3HUjuQjSjVbmLVTt4NQi35tkpnHQrG9bTRBrl3403LoWZ2njMPJyg4l6HfKvA==} engines: {node: '>=14'} @@ -1103,12 +1031,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-pg@0.42.0': - resolution: {integrity: sha512-sjgcM8CswYy8zxHgXv4RAZ09DlYhQ+9TdlourUs63Df/ek5RrB1ZbjznqW7PB6c3TyJJmX6AVtPTjAsROovEjA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-pg@0.43.0': resolution: {integrity: sha512-og23KLyoxdnAeFs1UWqzSonuCkePUzCX30keSYigIzJe/6WSYA8rnEI5lobcxPEzg+GcU06J7jzokuEHbjVJNw==} engines: {node: '>=14'} @@ -1127,12 +1049,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-redis-4@0.40.0': - resolution: {integrity: sha512-0ieQYJb6yl35kXA75LQUPhHtGjtQU9L85KlWa7d4ohBbk/iQKZ3X3CFl5jC5vNMq/GGPB3+w3IxNvALlHtrp7A==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-redis-4@0.41.0': resolution: {integrity: sha512-H7IfGTqW2reLXqput4yzAe8YpDC0fmVNal95GHMLOrS89W+qWUKIqxolSh63hJyfmwPSFwXASzj7wpSk8Az+Dg==} engines: {node: '>=14'} @@ -1187,12 +1103,6 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation@0.43.0': - resolution: {integrity: sha512-S1uHE+sxaepgp+t8lvIDuRgyjJWisAb733198kwQTUc9ZtYQ2V2gmyCtR1x21ePGVLoMiX/NWY7WA290hwkjJQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation@0.46.0': resolution: {integrity: sha512-a9TijXZZbk0vI5TGLZl+0kxyFfrXHhX6Svtz7Pp2/VBlCSKrazuULEyoJQrOknJyFWNMEmbbJgOciHCCpQcisw==} engines: {node: '>=14'} @@ -1372,9 +1282,6 @@ packages: resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} engines: {node: '>=14'} - '@prisma/instrumentation@5.16.0': - resolution: {integrity: sha512-MVzNRW2ikWvVNnMIEgQMcwWxpFD+XF2U2h0Qz7MjutRqJxrhWexWV2aSi2OXRaU8UL5wzWw7pnjdKUzYhWauLg==} - '@prisma/instrumentation@5.17.0': resolution: {integrity: sha512-c1Sle4ji8aasMcYfBBHFM56We4ljfenVtRmS8aY06BllS7SoU6SmJBwG7vil+GHiR0Yrh+t9iBwt4AY0Jr4KNQ==} @@ -1503,24 +1410,10 @@ packages: resolution: {integrity: sha512-g/tVmTZD4GNbLFf++hKJfBpcCAtduFEMLnbfa9iT/QEZjlmP+EzY+GsH9bafM5VsNe8DiOUp+kJKWtShzlVdBA==} engines: {node: '>=14.18'} - '@sentry/node@8.13.0': - resolution: {integrity: sha512-OeZ7K90RhyxfwfreerIi4cszzHrPRRH36STJno2+p3sIGbG5VScOccqXzYEOAqHpByxnti4KQN34BLAT2BFOEA==} - engines: {node: '>=14.18'} - '@sentry/node@8.26.0': resolution: {integrity: sha512-N9mNLzicnfGgsq6P10ckPdTzEFusjTC7gpqPopwq5eEMF7g798hH8CcE5o6FZ4iAAR3vWliAR/jgccdoMmJMpQ==} engines: {node: '>=14.18'} - '@sentry/opentelemetry@8.13.0': - resolution: {integrity: sha512-NYn/HNE/SxFXe8pfnxJknhrrRzYRMHNssCoi5M1CeR5G7F2BGxxVmaGsd8j0WyTCpUS4i97G4vhYtDGxHvWN6w==} - engines: {node: '>=14.18'} - peerDependencies: - '@opentelemetry/api': ^1.9.0 - '@opentelemetry/core': ^1.25.1 - '@opentelemetry/instrumentation': ^0.52.1 - '@opentelemetry/sdk-trace-base': ^1.25.1 - '@opentelemetry/semantic-conventions': ^1.25.1 - '@sentry/opentelemetry@8.26.0': resolution: {integrity: sha512-HBDheM/+ysfIz8R1OH4bBIxdgD7ZbQkKLJAUXkdAbBcfbpK/CTtwcplbauF5wY7Q+GYvwL/ShuDwvXRfW+gFyQ==} engines: {node: '>=14.18'} @@ -2892,18 +2785,12 @@ packages: import-in-the-middle@1.11.0: resolution: {integrity: sha512-5DimNQGoe0pLUHbR9qK84iWaWjjbsxiqXnw6Qz64+azRgleqv9k2kTt5fw7QsOpmaGYtuxxursnPPsnTKEx10Q==} - import-in-the-middle@1.4.2: - resolution: {integrity: sha512-9WOz1Yh/cvO/p69sxRmhyQwrIGGSp7EIdcb+fFNVi7CzQGQB8U1/1XrKVSbEd/GNOAeM0peJtmi7+qphe7NvAw==} - import-in-the-middle@1.7.1: resolution: {integrity: sha512-1LrZPDtW+atAxH42S6288qyDFNQ2YCty+2mxEPRtfazH6Z5QwkaBSTS2ods7hnVJioF6rkRfNoA6A/MstpFXLg==} import-in-the-middle@1.7.4: resolution: {integrity: sha512-Lk+qzWmiQuRPPulGQeK5qq0v32k2bHnWrRPFgqyvhw7Kkov5L6MOLOIU3pcWeujc9W4q54Cp3Q2WV16eQkc7Bg==} - import-in-the-middle@1.8.1: - resolution: {integrity: sha512-yhRwoHtiLGvmSozNOALgjRPFI6uYsds60EoMqqnXyyv+JOIW/BrrLejuTGBt+bq0T5tLzOHrN0T7xYTm4Qt/ng==} - import-local@3.1.0: resolution: {integrity: sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==} engines: {node: '>=8'} @@ -3825,10 +3712,6 @@ packages: resolution: {integrity: sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==} hasBin: true - opentelemetry-instrumentation-fetch-node@1.2.0: - resolution: {integrity: sha512-aiSt/4ubOTyb1N5C2ZbGrBvaJOXIZhZvpRPYuUVxQJe27wJZqf/o65iPrqgLcgfeOLaQ8cS2Q+762jrYvniTrA==} - engines: {node: '>18.0.0'} - opentelemetry-instrumentation-fetch-node@1.2.3: resolution: {integrity: sha512-Qb11T7KvoCevMaSeuamcLsAD+pZnavkhDnlVL0kRozfhl42dKG5Q3anUklAFKJZjY3twLR+BnRa6DlwwkIE/+A==} engines: {node: '>18.0.0'} @@ -5692,16 +5575,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-connect@0.37.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/connect': 3.4.36 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-connect@0.38.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5745,15 +5618,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-express@0.40.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-express@0.41.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5772,15 +5636,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-fastify@0.37.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-fastify@0.38.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5821,13 +5676,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-graphql@0.41.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-graphql@0.42.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5852,15 +5700,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-hapi@0.39.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-hapi@0.40.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5899,15 +5738,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-ioredis@0.41.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/redis-common': 0.36.2 - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-ioredis@0.42.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5936,17 +5766,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-koa@0.41.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/koa': 2.14.0 - '@types/koa__router': 12.0.3 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-koa@0.42.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -5981,15 +5800,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-mongodb@0.45.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-metrics': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-mongodb@0.46.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6008,15 +5818,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-mongoose@0.39.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-mongoose@0.40.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6035,15 +5836,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-mysql2@0.39.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@opentelemetry/sql-common': 0.40.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-mysql2@0.40.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6062,15 +5854,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-mysql@0.39.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/mysql': 2.15.22 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-mysql@0.40.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6088,14 +5871,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-nestjs-core@0.38.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-nestjs-core@0.39.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6123,17 +5898,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-pg@0.42.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@opentelemetry/sql-common': 0.40.1(@opentelemetry/api@1.9.0) - '@types/pg': 8.6.1 - '@types/pg-pool': 2.0.4 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-pg@0.43.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6161,15 +5925,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-redis-4@0.40.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/redis-common': 0.36.2 - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-redis-4@0.41.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6245,18 +6000,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation@0.43.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@types/shimmer': 1.0.5 - import-in-the-middle: 1.4.2 - require-in-the-middle: 7.3.0 - semver: 7.6.2 - shimmer: 1.2.1 - transitivePeerDependencies: - - supports-color - optional: true - '@opentelemetry/instrumentation@0.46.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6286,7 +6029,7 @@ snapshots: '@opentelemetry/api': 1.9.0 '@opentelemetry/api-logs': 0.52.1 '@types/shimmer': 1.0.5 - import-in-the-middle: 1.8.1 + import-in-the-middle: 1.11.0 require-in-the-middle: 7.3.0 semver: 7.6.2 shimmer: 1.2.1 @@ -6476,14 +6219,6 @@ snapshots: '@pkgjs/parseargs@0.11.0': optional: true - '@prisma/instrumentation@5.16.0': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - '@prisma/instrumentation@5.17.0': dependencies: '@opentelemetry/api': 1.9.0 @@ -6617,40 +6352,6 @@ snapshots: '@sentry/types': 8.26.0 '@sentry/utils': 8.26.0 - '@sentry/node@8.13.0': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/context-async-hooks': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-connect': 0.37.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-express': 0.40.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-fastify': 0.37.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-graphql': 0.41.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-hapi': 0.39.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-http': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-ioredis': 0.41.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-koa': 0.41.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-mongodb': 0.45.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-mongoose': 0.39.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-mysql': 0.39.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-mysql2': 0.39.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-nestjs-core': 0.38.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-pg': 0.42.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-redis-4': 0.40.0(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@prisma/instrumentation': 5.16.0 - '@sentry/core': 8.13.0 - '@sentry/opentelemetry': 8.13.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1) - '@sentry/types': 8.13.0 - '@sentry/utils': 8.13.0 - optionalDependencies: - opentelemetry-instrumentation-fetch-node: 1.2.0 - transitivePeerDependencies: - - supports-color - '@sentry/node@8.26.0': dependencies: '@opentelemetry/api': 1.9.0 @@ -6687,17 +6388,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@sentry/opentelemetry@8.13.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@sentry/core': 8.13.0 - '@sentry/types': 8.13.0 - '@sentry/utils': 8.13.0 - '@sentry/opentelemetry@8.26.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)': dependencies: '@opentelemetry/api': 1.9.0 @@ -8189,14 +7879,6 @@ snapshots: cjs-module-lexer: 1.3.1 module-details-from-path: 1.0.3 - import-in-the-middle@1.4.2: - dependencies: - acorn: 8.12.0 - acorn-import-assertions: 1.9.0(acorn@8.12.0) - cjs-module-lexer: 1.3.1 - module-details-from-path: 1.0.3 - optional: true - import-in-the-middle@1.7.1: dependencies: acorn: 8.12.0 @@ -8212,13 +7894,6 @@ snapshots: cjs-module-lexer: 1.3.1 module-details-from-path: 1.0.3 - import-in-the-middle@1.8.1: - dependencies: - acorn: 8.12.0 - acorn-import-attributes: 1.9.5(acorn@8.12.0) - cjs-module-lexer: 1.3.1 - module-details-from-path: 1.0.3 - import-local@3.1.0: dependencies: pkg-dir: 4.2.0 @@ -9203,15 +8878,6 @@ snapshots: opener@1.5.2: {} - opentelemetry-instrumentation-fetch-node@1.2.0: - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.43.0(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - optional: true - opentelemetry-instrumentation-fetch-node@1.2.3(@opentelemetry/api@1.9.0): dependencies: '@opentelemetry/api': 1.9.0 diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 78379bee..6a6437b3 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -1,7 +1,6 @@ -// Sentry import "dotenv/config"; import "./services/sentry" - +import * as Sentry from "@sentry/node"; import express from "express"; import bodyParser from "body-parser"; import cors from "cors"; @@ -183,6 +182,8 @@ if (cluster.isMaster) { res.send({ isProduction: global.isProduction }); }); + Sentry.setupExpressErrorHandler(app); + Logger.info(`Worker ${process.pid} started`); } diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index aadfade9..51f995c6 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -1,10 +1,12 @@ +import "dotenv/config"; +import "./services/sentry" +import * as Sentry from "@sentry/node"; import { CustomError } from "../lib/custom-error"; import { getScrapeQueue, redisConnection, scrapeQueueName, } from "./queue-service"; -import "dotenv/config"; import { logtail } from "./logtail"; import { startWebScraperPipeline } from "../main/runWebScraper"; import { callWebhook } from "./webhook"; @@ -102,7 +104,12 @@ const workerFun = async (queueName: string, processJobInternal: (token: string, const job = await worker.getNextJob(token); if (job) { - processJobInternal(token, job); + Sentry.startSpan({ + name: "Job " + job.id, + parentSpan: null, + }, async () => { + await processJobInternal(token, job); + }); await sleep(gotJobInterval); } else { await sleep(connectionMonitorInterval); @@ -289,6 +296,12 @@ async function processJob(job: Job, token: string) { } catch (error) { Logger.error(`🐂 Job errored ${job.id} - ${error}`); + Sentry.captureException(error, { + data: { + job: job.id + }, + }) + if (error instanceof CustomError) { // Here we handle the error, then save the failed job Logger.error(error.message); // or any other error handling diff --git a/apps/api/src/services/sentry.ts b/apps/api/src/services/sentry.ts index 5deb6195..04a0b1e4 100644 --- a/apps/api/src/services/sentry.ts +++ b/apps/api/src/services/sentry.ts @@ -1,15 +1,14 @@ // Import with `import * as Sentry from "@sentry/node"` if you are using ESM -const Sentry = require("@sentry/node"); -const { nodeProfilingIntegration } = require("@sentry/profiling-node"); +import * as Sentry from "@sentry/node"; +import { nodeProfilingIntegration } from "@sentry/profiling-node"; -Sentry.init({ - dsn: process.env.SENTRY_DSN, - integrations: [ - nodeProfilingIntegration(), - ], - // Tracing - tracesSampleRate: 1.0, // Capture 100% of the transactions - - // Set sampling rate for profiling - this is relative to tracesSampleRate - profilesSampleRate: 1.0, -}); \ No newline at end of file +if (process.env.SENTRY_DSN) { + Sentry.init({ + dsn: process.env.SENTRY_DSN, + integrations: [ + nodeProfilingIntegration(), + ], + tracesSampleRate: 1.0, + profilesSampleRate: 1.0, + }); +} From 920702cdde72bb1ac81c71fd90143cb8b4c37127 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 19:08:03 +0200 Subject: [PATCH 31/65] Update builder to handle uploading sourcemaps --- apps/api/Dockerfile | 5 ++++- apps/api/package.json | 6 ++++-- apps/api/pnpm-lock.yaml | 20 ++++++++++++++++++++ apps/api/src/services/queue-worker.ts | 9 ++++++--- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index a7be4fe0..201b2ce9 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -12,8 +12,11 @@ RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-l FROM base AS build RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile +RUN apt-get update -qq && apt-get install -y ca-certificates && update-ca-certificates RUN pnpm install -RUN pnpm run build +RUN --mount=type=secret,id=SENTRY_AUTH_TOKEN \ + SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)" \ + bash -c "if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi" # Install packages needed for deployment diff --git a/apps/api/package.json b/apps/api/package.json index 55cefe96..c9058943 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -5,11 +5,12 @@ "main": "index.js", "scripts": { "start": "nodemon --exec ts-node src/index.ts", - "start:production": "tsc && pnpm sentry:sourcemaps && node dist/src/index.js", + "start:production": "tsc && node dist/src/index.js", "format": "prettier --write \"src/**/*.(js|ts)\"", "flyio": "node dist/src/index.js", "start:dev": "nodemon --exec ts-node src/index.ts", "build": "tsc && pnpm sentry:sourcemaps", + "build:nosentry": "tsc", "test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", "test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'", "test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'", @@ -19,7 +20,7 @@ "mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest", "mongo-docker-console": "docker exec -it mongodb mongosh", "run-example": "npx ts-node src/example.ts", - "deploy:fly": "flyctl deploy", + "deploy:fly": "flyctl deploy --build-secret SENTRY_AUTH_TOKEN=$(dotenv -p SENTRY_AUTH_TOKEN)", "deploy:fly:staging": "fly deploy -c fly.staging.toml", "sentry:sourcemaps": "sentry-cli sourcemaps inject --org caleb-peffer --project firecrawl-scraper-js ./dist && sentry-cli sourcemaps upload --org caleb-peffer --project firecrawl-scraper-js ./dist" }, @@ -73,6 +74,7 @@ "cron-parser": "^4.9.0", "date-fns": "^3.6.0", "dotenv": "^16.3.1", + "dotenv-cli": "^7.4.2", "express-rate-limit": "^7.3.1", "form-data": "^4.0.0", "glob": "^10.4.2", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 3f59352c..efbe9d80 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -86,6 +86,9 @@ importers: dotenv: specifier: ^16.3.1 version: 16.4.5 + dotenv-cli: + specifier: ^7.4.2 + version: 7.4.2 express-rate-limit: specifier: ^7.3.1 version: 7.3.1(express@4.19.2) @@ -2367,6 +2370,14 @@ packages: domutils@3.1.0: resolution: {integrity: sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==} + dotenv-cli@7.4.2: + resolution: {integrity: sha512-SbUj8l61zIbzyhIbg0FwPJq6+wjbzdn9oEtozQpZ6kW2ihCcapKVZj49oCT3oPM+mgQm+itgvUQcG5szxVrZTA==} + hasBin: true + + dotenv-expand@10.0.0: + resolution: {integrity: sha512-GopVGCpVS1UKH75VKHGuQFqS1Gusej0z4FyQkPdwjil2gNIv+LNsqBlboOzpJFZKVT95GkCyWJbBSdFEFUWI2A==} + engines: {node: '>=12'} + dotenv@16.4.5: resolution: {integrity: sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==} engines: {node: '>=12'} @@ -7396,6 +7407,15 @@ snapshots: domelementtype: 2.3.0 domhandler: 5.0.3 + dotenv-cli@7.4.2: + dependencies: + cross-spawn: 7.0.3 + dotenv: 16.4.5 + dotenv-expand: 10.0.0 + minimist: 1.2.8 + + dotenv-expand@10.0.0: {} + dotenv@16.4.5: {} duck@0.1.12: diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 51f995c6..7d4beef1 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -1,5 +1,5 @@ import "dotenv/config"; -import "./services/sentry" +import "./sentry" import * as Sentry from "@sentry/node"; import { CustomError } from "../lib/custom-error"; import { @@ -105,8 +105,11 @@ const workerFun = async (queueName: string, processJobInternal: (token: string, const job = await worker.getNextJob(token); if (job) { Sentry.startSpan({ - name: "Job " + job.id, - parentSpan: null, + name: "Scrape job", + op: "bullmq.job", + attributes: { + job: job.id, + }, }, async () => { await processJobInternal(token, job); }); From 3ad9bf7ac088c7c6a33193916aacaaac4f62c4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 19:15:25 +0200 Subject: [PATCH 32/65] Update GH Actions deployment --- .github/workflows/fly-direct.yml | 4 +++- .github/workflows/fly.yml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/fly-direct.yml b/.github/workflows/fly-direct.yml index aea0a48e..d395ff31 100644 --- a/.github/workflows/fly-direct.yml +++ b/.github/workflows/fly-direct.yml @@ -22,6 +22,7 @@ env: SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }} TEST_API_KEY: ${{ secrets.TEST_API_KEY }} + SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} jobs: deploy: @@ -30,8 +31,9 @@ jobs: steps: - uses: actions/checkout@v3 - uses: superfly/flyctl-actions/setup-flyctl@master - - run: flyctl deploy --remote-only -a firecrawl-scraper-js + - run: flyctl deploy --remote-only -a firecrawl-scraper-js --build-secret SENTRY_AUTH_TOKEN=$SENTRY_AUTH_TOKEN working-directory: ./apps/api env: FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} + SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 9e4b85a8..5b1b9f69 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -26,6 +26,7 @@ env: PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} jobs: pre-deploy-e2e-tests: @@ -211,11 +212,12 @@ jobs: steps: - uses: actions/checkout@v3 - uses: superfly/flyctl-actions/setup-flyctl@master - - run: flyctl deploy --remote-only -a firecrawl-scraper-js + - run: flyctl deploy --remote-only -a firecrawl-scraper-js --build-secret SENTRY_AUTH_TOKEN=$SENTRY_AUTH_TOKEN working-directory: ./apps/api env: FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} + SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} build-and-publish-python-sdk: name: Build and publish Python SDK From 85ff0c311e0f94c238dc4fb1d48f7e8ffd324be9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 19:21:29 +0200 Subject: [PATCH 33/65] Add worker ID to job attribute --- apps/api/src/services/queue-worker.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 7d4beef1..2086d0a6 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -109,6 +109,7 @@ const workerFun = async (queueName: string, processJobInternal: (token: string, op: "bullmq.job", attributes: { job: job.id, + worker: process.env.FLY_MACHINE_ID ?? worker.id, }, }, async () => { await processJobInternal(token, job); From 3d5dc9d90aa657f1d532e52ab4ebacedd5933506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 19:39:10 +0200 Subject: [PATCH 34/65] feat(sentry): add log + server name --- apps/api/src/services/sentry.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/api/src/services/sentry.ts b/apps/api/src/services/sentry.ts index 04a0b1e4..11dcab45 100644 --- a/apps/api/src/services/sentry.ts +++ b/apps/api/src/services/sentry.ts @@ -1,8 +1,10 @@ // Import with `import * as Sentry from "@sentry/node"` if you are using ESM import * as Sentry from "@sentry/node"; import { nodeProfilingIntegration } from "@sentry/profiling-node"; +import { Logger } from "../lib/logger"; if (process.env.SENTRY_DSN) { + Logger.info("Setting up Sentry..."); Sentry.init({ dsn: process.env.SENTRY_DSN, integrations: [ @@ -10,5 +12,6 @@ if (process.env.SENTRY_DSN) { ], tracesSampleRate: 1.0, profilesSampleRate: 1.0, + serverName: process.env.FLY_MACHINE_ID, }); } From 6104d7421342182c3297ea317e31f74a75dece7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 20:12:47 +0200 Subject: [PATCH 35/65] fix(sentry): drop profiling sample rate --- apps/api/src/services/sentry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/sentry.ts b/apps/api/src/services/sentry.ts index 11dcab45..266fe4c7 100644 --- a/apps/api/src/services/sentry.ts +++ b/apps/api/src/services/sentry.ts @@ -11,7 +11,7 @@ if (process.env.SENTRY_DSN) { nodeProfilingIntegration(), ], tracesSampleRate: 1.0, - profilesSampleRate: 1.0, + profilesSampleRate: 0.25, serverName: process.env.FLY_MACHINE_ID, }); } From 9579f03c4be33468ac0760d1f8c0592181ab9fc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 20:16:06 +0200 Subject: [PATCH 36/65] fix: import resolution --- apps/api/tsconfig.json | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/apps/api/tsconfig.json b/apps/api/tsconfig.json index 239d9b3a..dd7f0ed2 100644 --- a/apps/api/tsconfig.json +++ b/apps/api/tsconfig.json @@ -17,12 +17,7 @@ "*": ["node_modules/*", "src/types/*"], }, - "inlineSources": true, - - // Set `sourceRoot` to "/" to strip the build path prefix - // from generated source code references. - // This improves issue grouping in Sentry. - "sourceRoot": "/" + "inlineSources": true }, "include": ["src/","src/**/*", "services/db/supabase.ts", "utils/utils.ts", "services/db/supabaseEmbeddings.ts", "utils/EventEmmitter.ts", "src/services/queue-service.ts"] } From ac9783ed2f70d4ea907d838e1c3e792a98d214ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 20:21:16 +0200 Subject: [PATCH 37/65] fix(sentry): adjust profiles sample rate to be even lower --- apps/api/src/services/sentry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/sentry.ts b/apps/api/src/services/sentry.ts index 266fe4c7..7bd318ad 100644 --- a/apps/api/src/services/sentry.ts +++ b/apps/api/src/services/sentry.ts @@ -11,7 +11,7 @@ if (process.env.SENTRY_DSN) { nodeProfilingIntegration(), ], tracesSampleRate: 1.0, - profilesSampleRate: 0.25, + profilesSampleRate: 0.045, serverName: process.env.FLY_MACHINE_ID, }); } From dae1408e66ff312f8cca6a2fc05dbfa1b7ba5170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 20:40:42 +0200 Subject: [PATCH 38/65] fix(Dockerfile): retain sentry auth token properly --- apps/api/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index 201b2ce9..3ffede0d 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -15,8 +15,7 @@ RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile RUN apt-get update -qq && apt-get install -y ca-certificates && update-ca-certificates RUN pnpm install RUN --mount=type=secret,id=SENTRY_AUTH_TOKEN \ - SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)" \ - bash -c "if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi" + bash -c 'export SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)"; if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi' # Install packages needed for deployment From 55009e51f5338c05a913da3c2a9b2e88c1d9f5f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 20:49:25 +0200 Subject: [PATCH 39/65] fix: filter out invalid URLs from crawl links --- apps/api/src/scraper/WebScraper/crawler.ts | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index af3a9d69..02894cfc 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -267,9 +267,18 @@ export class WebCrawler { public filterURL(href: string, url: string): string | null { let fullUrl = href; if (!href.startsWith("http")) { - fullUrl = new URL(href, this.baseUrl).toString(); + try { + fullUrl = new URL(href, this.baseUrl).toString(); + } catch (_) { + return null; + } + } + let urlObj; + try { + urlObj = new URL(fullUrl); + } catch (_) { + return null; } - const urlObj = new URL(fullUrl); const path = urlObj.pathname; if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS From 629da74a5c673f641b613066042c511538c78f28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 21 Aug 2024 20:51:35 +0200 Subject: [PATCH 40/65] fix(sentry): decrease tracesSampleRate --- apps/api/src/services/sentry.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/services/sentry.ts b/apps/api/src/services/sentry.ts index 7bd318ad..1292773a 100644 --- a/apps/api/src/services/sentry.ts +++ b/apps/api/src/services/sentry.ts @@ -10,8 +10,8 @@ if (process.env.SENTRY_DSN) { integrations: [ nodeProfilingIntegration(), ], - tracesSampleRate: 1.0, - profilesSampleRate: 0.045, + tracesSampleRate: 0.045, + profilesSampleRate: 1.0, serverName: process.env.FLY_MACHINE_ID, }); } From e78d2af1f09a09aeab83faffe84c2e642efb5f64 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 21 Aug 2024 21:51:54 -0300 Subject: [PATCH 41/65] Nick: --- apps/api/src/controllers/auth.ts | 2 +- apps/dragonfly/fly.toml | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 apps/dragonfly/fly.toml diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 9d46d005..3b862c48 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -268,7 +268,7 @@ export async function supaAuthenticateUser( return { success: false, - error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`, + error: `Rate limit exceeded. Consumed (req/min): ${rateLimiterRes.consumedPoints}, Remaining (req/min): ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`, status: 429, }; } diff --git a/apps/dragonfly/fly.toml b/apps/dragonfly/fly.toml new file mode 100644 index 00000000..14bdbd96 --- /dev/null +++ b/apps/dragonfly/fly.toml @@ -0,0 +1,27 @@ +app = 'firecrawl-dragonfly' +primary_region = 'iad' + +[experimental] + cmd = ['dragonfly','--logtostderr', '--cluster_mode=emulated', '--lock_on_hashtags', "--bind","::"] +[build] + image = 'ghcr.io/dragonflydb/dragonfly' + +[[mounts]] + source = 'firecrawl_dragonfly' + destination = '/data' + +[[services]] + protocol = 'tcp' + internal_port = 6379 + + [[services.tcp_checks]] + interval = '10s' + timeout = '2s' + +[[vm]] + size = 'performance-4x' + memory = '32gb' + +[[metrics]] + port = 9091 + path = '/metrics' From 0cdf41587e30b62589367eb317e573621d6b5e48 Mon Sep 17 00:00:00 2001 From: Gergo Moricz Date: Thu, 22 Aug 2024 03:55:40 +0200 Subject: [PATCH 42/65] feat(sentry): add error handles to try-catch blocks --- apps/api/src/controllers/auth.ts | 8 ++++++++ apps/api/src/controllers/crawl-cancel.ts | 2 ++ apps/api/src/controllers/crawl-status.ts | 2 ++ apps/api/src/controllers/crawl.ts | 2 ++ apps/api/src/controllers/crawlPreview.ts | 2 ++ apps/api/src/controllers/scrape.ts | 2 ++ apps/api/src/controllers/search.ts | 3 +++ apps/api/src/controllers/status.ts | 2 ++ 8 files changed, 23 insertions(+) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 3b862c48..467d09fc 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -15,6 +15,7 @@ import { redlock } from "../../src/services/redlock"; import { getValue } from "../../src/services/redis"; import { setValue } from "../../src/services/redis"; import { validate } from "uuid"; +import * as Sentry from "@sentry/node"; function normalizedApiIsUuid(potentialUuid: string): boolean { // Check if the string is a valid UUID @@ -34,6 +35,7 @@ function setTrace(team_id: string, api_key: string) { api_key, }); } catch (error) { + Sentry.captureException(error); Logger.error(`Error setting trace attributes: ${error.message}`); } } @@ -49,6 +51,7 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{ api_key: normalizedApi, }); if (error) { + Sentry.captureException(error); Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`); return { success: false, @@ -59,6 +62,7 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{ } if (!data || data.length === 0) { Logger.warn(`Error fetching api key: ${error.message} or data is empty`); + Sentry.captureException(error); // TODO: change this error code ? return { success: false, @@ -152,6 +156,7 @@ export async function supaAuthenticateUser( ); } } catch (error) { + Sentry.captureException(error); Logger.error(`Error with auth function: ${error}`); // const { // success, @@ -302,6 +307,9 @@ export async function supaAuthenticateUser( .eq("key", normalizedApi); if (error || !data || data.length === 0) { + if (error) { + Sentry.captureException(error); + } Logger.warn(`Error fetching api key: ${error.message} or data is empty`); return { success: false, diff --git a/apps/api/src/controllers/crawl-cancel.ts b/apps/api/src/controllers/crawl-cancel.ts index ed2c4166..1de9af60 100644 --- a/apps/api/src/controllers/crawl-cancel.ts +++ b/apps/api/src/controllers/crawl-cancel.ts @@ -4,6 +4,7 @@ import { RateLimiterMode } from "../../src/types"; import { supabase_service } from "../../src/services/supabase"; import { Logger } from "../../src/lib/logger"; import { getCrawl, saveCrawl } from "../../src/lib/crawl-redis"; +import * as Sentry from "@sentry/node"; export async function crawlCancelController(req: Request, res: Response) { try { @@ -50,6 +51,7 @@ export async function crawlCancelController(req: Request, res: Response) { status: "cancelled" }); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } diff --git a/apps/api/src/controllers/crawl-status.ts b/apps/api/src/controllers/crawl-status.ts index 3488ce26..76147263 100644 --- a/apps/api/src/controllers/crawl-status.ts +++ b/apps/api/src/controllers/crawl-status.ts @@ -5,6 +5,7 @@ import { getScrapeQueue } from "../../src/services/queue-service"; import { Logger } from "../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; import { supabaseGetJobsById } from "../../src/lib/supabase-jobs"; +import * as Sentry from "@sentry/node"; export async function getJobs(ids: string[]) { const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x); @@ -63,6 +64,7 @@ export async function crawlStatusController(req: Request, res: Response) { partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null), }); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 1dfe758f..d40f2a9e 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -25,6 +25,7 @@ import { } from "../../src/lib/crawl-redis"; import { getScrapeQueue } from "../../src/services/queue-service"; import { checkAndUpdateURL } from "../../src/lib/validateUrl"; +import * as Sentry from "@sentry/node"; export async function crawlController(req: Request, res: Response) { try { @@ -194,6 +195,7 @@ export async function crawlController(req: Request, res: Response) { res.json({ jobId: id }); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/crawlPreview.ts index cc10dc8e..59b54458 100644 --- a/apps/api/src/controllers/crawlPreview.ts +++ b/apps/api/src/controllers/crawlPreview.ts @@ -7,6 +7,7 @@ import { Logger } from "../../src/lib/logger"; import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis"; import { addScrapeJob } from "../../src/services/queue-jobs"; import { checkAndUpdateURL } from "../../src/lib/validateUrl"; +import * as Sentry from "@sentry/node"; export async function crawlPreviewController(req: Request, res: Response) { try { @@ -129,6 +130,7 @@ export async function crawlPreviewController(req: Request, res: Response) { res.json({ jobId: id }); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 273b4c56..b2d1db34 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -12,6 +12,7 @@ import { addScrapeJob } from '../services/queue-jobs'; import { scrapeQueueEvents } from '../services/queue-service'; import { v4 as uuidv4 } from "uuid"; import { Logger } from '../lib/logger'; +import * as Sentry from "@sentry/node"; export async function scrapeHelper( jobId: string, @@ -186,6 +187,7 @@ export async function scrapeController(req: Request, res: Response) { return res.status(result.returnCode).json(result); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 759c7805..21a8e390 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -10,6 +10,7 @@ import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; import { v4 as uuidv4 } from "uuid"; import { Logger } from "../lib/logger"; import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service"; +import * as Sentry from "@sentry/node"; export async function searchHelper( jobId: string, @@ -154,6 +155,7 @@ export async function searchController(req: Request, res: Response) { return res.status(402).json({ error: "Insufficient credits" }); } } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: "Internal server error" }); } @@ -184,6 +186,7 @@ export async function searchController(req: Request, res: Response) { }); return res.status(result.returnCode).json(result); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } diff --git a/apps/api/src/controllers/status.ts b/apps/api/src/controllers/status.ts index e469060f..c3ca906f 100644 --- a/apps/api/src/controllers/status.ts +++ b/apps/api/src/controllers/status.ts @@ -4,6 +4,7 @@ import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; import { getScrapeQueue } from "../../src/services/queue-service"; import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; import { getJobs } from "./crawl-status"; +import * as Sentry from "@sentry/node"; export async function crawlJobStatusPreviewController(req: Request, res: Response) { try { @@ -37,6 +38,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null), }); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } From 14fa75cae6a6da480bb8d8edec8fc7d76e65e7cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 13:08:54 +0200 Subject: [PATCH 43/65] fix(crawl): send error if url is not a string Fixes FIRECRAWL-SCRAPER-JS-1E and FIRECRAWL-SCRAPER-JS-Z --- apps/api/src/controllers/crawl.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index d40f2a9e..2e31c257 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -72,6 +72,9 @@ export async function crawlController(req: Request, res: Response) { if (!url) { return res.status(400).json({ error: "Url is required" }); } + if (typeof url !== "string") { + return res.status(400).json({ error: "URL must be a string" }); + } try { url = checkAndUpdateURL(url).url; } catch (e) { @@ -87,8 +90,6 @@ export async function crawlController(req: Request, res: Response) { }); } - const mode = req.body.mode ?? "crawl"; - // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this? // try { // const a = new WebScraperDataProvider(); From 508568f9438166d0fe564f6fac0aec1753968c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 13:10:58 +0200 Subject: [PATCH 44/65] fix(search): handle scrape timeouts on search Fixes FIRECRAWL-SCRAPER-JS-15 --- apps/api/src/controllers/search.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 21a8e390..63820aba 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -186,6 +186,10 @@ export async function searchController(req: Request, res: Response) { }); return res.status(result.returnCode).json(result); } catch (error) { + if (error instanceof Error && error.message.startsWith("Job wait")) { + return res.status(408).json({ error: "Request timed out" }); + } + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); From fbbc3878f189b661e4096833fcf0517dd052049a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 13:18:26 +0200 Subject: [PATCH 45/65] fix(crawler): make sure includes/excludes is an array --- apps/api/src/scraper/WebScraper/crawler.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 02894cfc..67f1c22e 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -53,8 +53,8 @@ export class WebCrawler { this.jobId = jobId; this.initialUrl = initialUrl; this.baseUrl = new URL(initialUrl).origin; - this.includes = includes ?? []; - this.excludes = excludes ?? []; + this.includes = Array.isArray(includes) ? includes : []; + this.excludes = Array.isArray(excludes) ? excludes : []; this.limit = limit; this.robotsTxtUrl = `${this.baseUrl}/robots.txt`; this.robots = robotsParser(this.robotsTxtUrl, ""); From 1f580deefc073faecf95b43fe5e77820ff421ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 13:29:11 +0200 Subject: [PATCH 46/65] fix(crawl): validate includes.excludes regexes --- apps/api/src/controllers/crawl.ts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 2e31c257..4335334a 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -57,6 +57,26 @@ export async function crawlController(req: Request, res: Response) { }; const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions }; + if (Array.isArray(crawlerOptions.includes)) { + for (const x of crawlerOptions.includes) { + try { + new RegExp(x); + } catch (e) { + return res.status(400).json({ error: e.message }); + } + } + } + + if (Array.isArray(crawlerOptions.excludes)) { + for (const x of crawlerOptions.excludes) { + try { + new RegExp(x); + } catch (e) { + return res.status(400).json({ error: e.message }); + } + } + } + const limitCheck = req.body?.crawlerOptions?.limit ?? 1; const { success: creditsCheckSuccess, message: creditsCheckMessage, remainingCredits } = await checkTeamCredits(team_id, limitCheck); From 7d9f5bf8b1a616fc2dbc51b7e54b4c3b79770bec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 13:40:55 +0200 Subject: [PATCH 47/65] fix(crawl): don't use sitemap if it's empty Fixes FIRECRAWL-SCRAPER-JS-11 --- apps/api/src/controllers/crawl.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 4335334a..c299dc01 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -163,7 +163,7 @@ export async function crawlController(req: Request, res: Response) { ? null : await crawler.tryGetSitemap(); - if (sitemap !== null) { + if (sitemap !== null && sitemap.length > 0) { const jobs = sitemap.map((x) => { const url = x.url; const uuid = uuidv4(); From 670d253a8cfed25042d3e075b944fbef4a7e7bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 14:08:09 +0200 Subject: [PATCH 48/65] fix(auth): fix error reporting --- apps/api/src/controllers/auth.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 467d09fc..ac60dc53 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -61,8 +61,10 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{ }; } if (!data || data.length === 0) { - Logger.warn(`Error fetching api key: ${error.message} or data is empty`); - Sentry.captureException(error); + if (error) { + Logger.warn(`Error fetching api key: ${error.message} or data is empty`); + Sentry.captureException(error); + } // TODO: change this error code ? return { success: false, @@ -309,8 +311,8 @@ export async function supaAuthenticateUser( if (error || !data || data.length === 0) { if (error) { Sentry.captureException(error); + Logger.warn(`Error fetching api key: ${error.message} or data is empty`); } - Logger.warn(`Error fetching api key: ${error.message} or data is empty`); return { success: false, error: "Unauthorized: Invalid token", From e4adbaa88eaa6e56985df8b1e6087ded95c4fc8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 14:12:52 +0200 Subject: [PATCH 49/65] fix(llm-extract): handle llm-extract if scrape failed --- apps/api/src/lib/LLM-extraction/models.ts | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/apps/api/src/lib/LLM-extraction/models.ts b/apps/api/src/lib/LLM-extraction/models.ts index e696a8cd..8ca6bbd4 100644 --- a/apps/api/src/lib/LLM-extraction/models.ts +++ b/apps/api/src/lib/LLM-extraction/models.ts @@ -15,7 +15,7 @@ const defaultPrompt = function prepareOpenAIDoc( document: Document, mode: "markdown" | "raw-html" -): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] { +): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null { let markdown = document.markdown; @@ -27,9 +27,10 @@ function prepareOpenAIDoc( // Check if the markdown content exists in the document if (!extractionTarget) { - throw new Error( - `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai` - ); + return null; + // throw new Error( + // `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai` + // ); } @@ -64,7 +65,16 @@ export async function generateOpenAICompletions({ mode: "markdown" | "raw-html"; }): Promise { const openai = client as OpenAI; - const [content, numTokens] = prepareOpenAIDoc(document, mode); + const preparedDoc = prepareOpenAIDoc(document, mode); + + if (preparedDoc === null) { + return { + ...document, + warning: "LLM extraction was not performed since the document's content is empty or missing.", + }; + } + + const [content, numTokens] = preparedDoc; const completion = await openai.chat.completions.create({ model, From 4bd2ff26d308d096f703b5b308d9660d6bbaf0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 14:37:09 +0200 Subject: [PATCH 50/65] fix(llm-extract): pass stacktrace properly --- apps/api/src/lib/LLM-extraction/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/lib/LLM-extraction/index.ts b/apps/api/src/lib/LLM-extraction/index.ts index 85a7e995..af8b0bb1 100644 --- a/apps/api/src/lib/LLM-extraction/index.ts +++ b/apps/api/src/lib/LLM-extraction/index.ts @@ -46,7 +46,7 @@ export async function generateCompletions( return completionResult; } catch (error) { Logger.error(`Error generating completions: ${error}`); - throw new Error(`Error generating completions: ${error.message}`); + throw error; } default: throw new Error("Invalid client"); From 0e8fd6ce7089c7dda62540b6c2ddda2071593246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 14:50:51 +0200 Subject: [PATCH 51/65] fix(scrape): ensure extractionSchema is an object if llm-extraction is specified --- apps/api/src/controllers/scrape.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index b2d1db34..959cc546 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -105,6 +105,10 @@ export async function scrapeController(req: Request, res: Response) { let timeout = req.body.timeout ?? defaultTimeout; if (extractorOptions.mode.includes("llm-extraction")) { + if (typeof extractorOptions.extractionSchema !== "object" || extractorOptions.extractionSchema === null) { + return res.status(400).json({ error: "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified" }); + } + pageOptions.onlyMainContent = true; timeout = req.body.timeout ?? 90000; } From 5ca36fe9fcf6e18acfb42150a2110f2f1b3c722d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 15:49:16 +0200 Subject: [PATCH 52/65] feat(api): add more captureExceptions --- apps/api/src/index.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 6a6437b3..0674a46f 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -119,6 +119,7 @@ if (cluster.isMaster) { waitingJobs, }); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } @@ -170,6 +171,7 @@ if (cluster.isMaster) { }, timeout); } } catch (error) { + Sentry.captureException(error); Logger.debug(error); } }; From 6d92b8524d19a900de2a3145ac6a8c0aa19a77f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 16:00:13 +0200 Subject: [PATCH 53/65] feat(scrape): record job result in span --- apps/api/src/controllers/scrape.ts | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 959cc546..b0004276 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -49,18 +49,28 @@ export async function scrapeHelper( }, {}, jobId); let doc; - try { - doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0]; //60 seconds timeout - } catch (e) { - if (e instanceof Error && e.message.startsWith("Job wait")) { - return { - success: false, - error: "Request timed out", - returnCode: 408, + + const err = await Sentry.startSpanManual({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => { + try { + doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0] + } catch (e) { + if (e instanceof Error && e.message.startsWith("Job wait")) { + span.setAttribute("timedOut", true).end(); + return { + success: false, + error: "Request timed out", + returnCode: 408, + } + } else { + throw e; } - } else { - throw e; } + span.setAttribute("result", JSON.stringify(doc)).end(); + return null; + }); + + if (err !== null) { + return err; } await job.remove(); From 6d48dbcd38a5a8173b6917a38338bf296dfc23e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 16:47:38 +0200 Subject: [PATCH 54/65] feat(sentry): add trace continuity for queue --- apps/api/src/controllers/crawl.ts | 9 +++- apps/api/src/controllers/scrape.ts | 6 +-- apps/api/src/controllers/status.ts | 2 - apps/api/src/main/runWebScraper.ts | 1 - apps/api/src/scraper/WebScraper/index.ts | 1 - apps/api/src/services/queue-jobs.ts | 39 ++++++++++++++-- apps/api/src/services/queue-worker.ts | 59 +++++++++++++++++++----- apps/api/src/services/sentry.ts | 3 +- 8 files changed, 95 insertions(+), 25 deletions(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index c299dc01..c5f440e2 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -194,7 +194,14 @@ export async function crawlController(req: Request, res: Response) { id, jobs.map((x) => x.opts.jobId) ); - await getScrapeQueue().addBulk(jobs); + if (Sentry.isInitialized()) { + for (const job of jobs) { + // add with sentry instrumentation + await addScrapeJob(job.data as any, {}, job.opts.jobId); + } + } else { + await getScrapeQueue().addBulk(jobs); + } } else { await lockURL(id, sc, url); const job = await addScrapeJob( diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index b0004276..3666fc1a 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -50,12 +50,12 @@ export async function scrapeHelper( let doc; - const err = await Sentry.startSpanManual({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => { + const err = await Sentry.startSpan({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => { try { doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0] } catch (e) { if (e instanceof Error && e.message.startsWith("Job wait")) { - span.setAttribute("timedOut", true).end(); + span.setAttribute("timedOut", true); return { success: false, error: "Request timed out", @@ -65,7 +65,7 @@ export async function scrapeHelper( throw e; } } - span.setAttribute("result", JSON.stringify(doc)).end(); + span.setAttribute("result", JSON.stringify(doc)); return null; }); diff --git a/apps/api/src/controllers/status.ts b/apps/api/src/controllers/status.ts index c3ca906f..362f1f24 100644 --- a/apps/api/src/controllers/status.ts +++ b/apps/api/src/controllers/status.ts @@ -1,8 +1,6 @@ import { Request, Response } from "express"; import { Logger } from "../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; -import { getScrapeQueue } from "../../src/services/queue-service"; -import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; import { getJobs } from "./crawl-status"; import * as Sentry from "@sentry/node"; diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 2be05bd5..aea7876e 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -12,7 +12,6 @@ import { Document } from "../lib/entities"; import { supabase_service } from "../services/supabase"; import { Logger } from "../lib/logger"; import { ScrapeEvents } from "../lib/scrape-events"; -import { getScrapeQueue } from "../services/queue-service"; export async function startWebScraperPipeline({ job, diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 65247df1..38d0cc32 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -16,7 +16,6 @@ import { replacePathsWithAbsolutePaths, } from "./utils/replacePaths"; import { generateCompletions } from "../../lib/LLM-extraction"; -import { getScrapeQueue } from "../../../src/services/queue-service"; import { fetchAndProcessDocx } from "./utils/docxProcessor"; import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils"; import { Logger } from "../../lib/logger"; diff --git a/apps/api/src/services/queue-jobs.ts b/apps/api/src/services/queue-jobs.ts index 3099da68..33997890 100644 --- a/apps/api/src/services/queue-jobs.ts +++ b/apps/api/src/services/queue-jobs.ts @@ -2,11 +2,12 @@ import { Job, Queue } from "bullmq"; import { getScrapeQueue } from "./queue-service"; import { v4 as uuidv4 } from "uuid"; import { WebScraperOptions } from "../types"; +import * as Sentry from "@sentry/node"; -export async function addScrapeJob( - webScraperOptions: WebScraperOptions, - options: any = {}, - jobId: string = uuidv4(), +async function addScrapeJobRaw( + webScraperOptions: any, + options: any, + jobId: string, ): Promise { return await getScrapeQueue().add(jobId, webScraperOptions, { priority: webScraperOptions.crawl_id ? 20 : 10, @@ -15,3 +16,33 @@ export async function addScrapeJob( }); } +export async function addScrapeJob( + webScraperOptions: WebScraperOptions, + options: any = {}, + jobId: string = uuidv4(), +): Promise { + if (Sentry.isInitialized()) { + const size = JSON.stringify(webScraperOptions).length; + return await Sentry.startSpan({ + name: "Add scrape job", + op: "queue.publish", + attributes: { + "messaging.message.id": jobId, + "messaging.destination.name": getScrapeQueue().name, + "messaging.message.body.size": size, + }, + }, async (span) => { + return await addScrapeJobRaw({ + ...webScraperOptions, + sentry: { + trace: Sentry.spanToTraceHeader(span), + baggage: Sentry.spanToBaggageHeader(span), + size, + }, + }, options, jobId); + }); + } else { + return await addScrapeJobRaw(webScraperOptions, options, jobId); + } +} + diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 2086d0a6..a7d20383 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -50,6 +50,7 @@ const processJobInternal = async (token: string, job: Job) => { await job.extendLock(token, jobLockExtensionTime); }, jobLockExtendInterval); + let err = null; try { const result = await processJob(job, token); try{ @@ -62,11 +63,14 @@ const processJobInternal = async (token: string, job: Job) => { } } catch (error) { console.log("Job failed, error:", error); - + Sentry.captureException(error); + err = error; await job.moveToFailed(error, token, false); } finally { clearInterval(extendLockInterval); } + + return err; }; let isShuttingDown = false; @@ -76,7 +80,7 @@ process.on("SIGINT", () => { isShuttingDown = true; }); -const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise) => { +const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise) => { const worker = new Worker(queueName, null, { connection: redisConnection, lockDuration: 1 * 60 * 1000, // 1 minute @@ -104,16 +108,47 @@ const workerFun = async (queueName: string, processJobInternal: (token: string, const job = await worker.getNextJob(token); if (job) { - Sentry.startSpan({ - name: "Scrape job", - op: "bullmq.job", - attributes: { - job: job.id, - worker: process.env.FLY_MACHINE_ID ?? worker.id, - }, - }, async () => { - await processJobInternal(token, job); - }); + if (job.data && job.data.sentry && Sentry.isInitialized()) { + Sentry.continueTrace({ sentryTrace: job.data.sentry.trace, baggage: job.data.sentry.baggage }, () => { + Sentry.startSpan({ + name: "Scrape job", + attributes: { + job: job.id, + worker: process.env.FLY_MACHINE_ID ?? worker.id, + }, + }, async (span) => { + await Sentry.startSpan({ + name: "Process scrape job", + op: "queue.process", + attributes: { + "messaging.message.id": job.id, + "messaging.destination.name": getScrapeQueue().name, + "messaging.message.body.size": job.data.sentry.size, + "messaging.message.receive.latency": Date.now() - (job.processedOn ?? job.timestamp), + "messaging.message.retry.count": job.attemptsMade, + } + }, async () => { + const res = await processJobInternal(token, job); + if (res !== null) { + span.setStatus({ code: 2 }); // ERROR + } else { + span.setStatus({ code: 1 }); // OK + } + }); + }); + }); + } else { + Sentry.startSpan({ + name: "Scrape job", + attributes: { + job: job.id, + worker: process.env.FLY_MACHINE_ID ?? worker.id, + }, + }, () => { + processJobInternal(token, job); + }); + } + await sleep(gotJobInterval); } else { await sleep(connectionMonitorInterval); diff --git a/apps/api/src/services/sentry.ts b/apps/api/src/services/sentry.ts index 1292773a..176d3d4b 100644 --- a/apps/api/src/services/sentry.ts +++ b/apps/api/src/services/sentry.ts @@ -10,8 +10,9 @@ if (process.env.SENTRY_DSN) { integrations: [ nodeProfilingIntegration(), ], - tracesSampleRate: 0.045, + tracesSampleRate: process.env.SENTRY_ENVIRONMENT === "dev" ? 1.0 : 0.045, profilesSampleRate: 1.0, serverName: process.env.FLY_MACHINE_ID, + environment: process.env.SENTRY_ENVIRONMENT ?? "production", }); } From d036738da05b5d85dbcafdad3508ff4fc1aa44b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 18:04:09 +0200 Subject: [PATCH 55/65] fix(bullmq): duplicate redis connection for QueueEvents --- apps/api/src/services/queue-service.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/queue-service.ts b/apps/api/src/services/queue-service.ts index b13489a6..2e6d7562 100644 --- a/apps/api/src/services/queue-service.ts +++ b/apps/api/src/services/queue-service.ts @@ -37,4 +37,4 @@ export function getScrapeQueue() { import { QueueEvents } from 'bullmq'; -export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection }); \ No newline at end of file +export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection.duplicate() }); \ No newline at end of file From 7265ab7c67457013bae626493957061f95c0e761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 18:46:56 +0200 Subject: [PATCH 56/65] fix(search): filter docs properly --- apps/api/src/controllers/search.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 63820aba..8a04a978 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -108,7 +108,7 @@ export async function searchHelper( // make sure doc.content is not empty const filteredDocs = docs.filter( - (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 + (doc: { content?: string }) => doc && doc.content && doc.content.trim().length > 0 ); if (filteredDocs.length === 0) { From dd737f1235fad97d602d1a1ac37c7c453cdfa4a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 19:17:51 +0200 Subject: [PATCH 57/65] feat(sentry): add queue instrumentation to --- apps/api/src/controllers/search.ts | 14 ++++++++++++-- apps/api/src/services/queue-jobs.ts | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 8a04a978..304176a3 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -11,6 +11,7 @@ import { v4 as uuidv4 } from "uuid"; import { Logger } from "../lib/logger"; import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service"; import * as Sentry from "@sentry/node"; +import { addScrapeJob } from "../services/queue-jobs"; export async function searchHelper( jobId: string, @@ -95,8 +96,17 @@ export async function searchHelper( } }; }) - - const jobs = await getScrapeQueue().addBulk(jobDatas); + + let jobs = []; + if (Sentry.isInitialized()) { + for (const job of jobDatas) { + // add with sentry instrumentation + jobs.push(await addScrapeJob(job.data as any, {}, job.opts.jobId)); + } + } else { + jobs = await getScrapeQueue().addBulk(jobDatas); + await getScrapeQueue().addBulk(jobs); + } const docs = (await Promise.all(jobs.map(x => x.waitUntilFinished(scrapeQueueEvents, 60000)))).map(x => x[0]); diff --git a/apps/api/src/services/queue-jobs.ts b/apps/api/src/services/queue-jobs.ts index 33997890..888cdefc 100644 --- a/apps/api/src/services/queue-jobs.ts +++ b/apps/api/src/services/queue-jobs.ts @@ -10,8 +10,8 @@ async function addScrapeJobRaw( jobId: string, ): Promise { return await getScrapeQueue().add(jobId, webScraperOptions, { - priority: webScraperOptions.crawl_id ? 20 : 10, ...options, + priority: webScraperOptions.crawl_id ? 20 : 10, jobId, }); } From ad82175fb8d390167e4dc6799a6ee7b2d197db19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 22:12:02 +0200 Subject: [PATCH 58/65] fix(scrape): poll --- apps/api/src/controllers/scrape.ts | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 3666fc1a..e9bd33b8 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -9,7 +9,7 @@ import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import import { numTokensFromString } from '../lib/LLM-extraction/helpers'; import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values'; import { addScrapeJob } from '../services/queue-jobs'; -import { scrapeQueueEvents } from '../services/queue-service'; +import { getScrapeQueue, scrapeQueueEvents } from '../services/queue-service'; import { v4 as uuidv4 } from "uuid"; import { Logger } from '../lib/logger'; import * as Sentry from "@sentry/node"; @@ -52,7 +52,19 @@ export async function scrapeHelper( const err = await Sentry.startSpan({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => { try { - doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0] + doc = (await new Promise((resolve, reject) => { + const start = Date.now(); + const int = setInterval(async () => { + if (Date.now() >= start + timeout) { + clearInterval(int); + reject(new Error("Job wait ")); + } else if (await job.getState() === "completed") { + clearInterval(int); + resolve((await getScrapeQueue().getJob(job.id)).returnvalue); + } + }, 1000); + job.waitUntilFinished(scrapeQueueEvents, timeout) + }))[0] } catch (e) { if (e instanceof Error && e.message.startsWith("Job wait")) { span.setAttribute("timedOut", true); From 76c8e9f996a2ca01c0cb2c25ff137fd7665902e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 22:24:24 +0200 Subject: [PATCH 59/65] fix --- apps/api/src/controllers/scrape.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index e9bd33b8..4f992891 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -63,7 +63,6 @@ export async function scrapeHelper( resolve((await getScrapeQueue().getJob(job.id)).returnvalue); } }, 1000); - job.waitUntilFinished(scrapeQueueEvents, timeout) }))[0] } catch (e) { if (e instanceof Error && e.message.startsWith("Job wait")) { From e690a6fda7d0b600880fbd1f988282b8c8fa5459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 22:38:39 +0200 Subject: [PATCH 60/65] fix: remove QueueEvents --- apps/api/src/controllers/scrape.ts | 2 +- apps/api/src/controllers/search.ts | 15 +++++++++++++-- apps/api/src/services/queue-service.ts | 6 +++--- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 4f992891..3ffbc92b 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -9,7 +9,7 @@ import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import import { numTokensFromString } from '../lib/LLM-extraction/helpers'; import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values'; import { addScrapeJob } from '../services/queue-jobs'; -import { getScrapeQueue, scrapeQueueEvents } from '../services/queue-service'; +import { getScrapeQueue } from '../services/queue-service'; import { v4 as uuidv4 } from "uuid"; import { Logger } from '../lib/logger'; import * as Sentry from "@sentry/node"; diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 304176a3..d86862b1 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -9,7 +9,7 @@ import { search } from "../search"; import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; import { v4 as uuidv4 } from "uuid"; import { Logger } from "../lib/logger"; -import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service"; +import { getScrapeQueue } from "../services/queue-service"; import * as Sentry from "@sentry/node"; import { addScrapeJob } from "../services/queue-jobs"; @@ -108,7 +108,18 @@ export async function searchHelper( await getScrapeQueue().addBulk(jobs); } - const docs = (await Promise.all(jobs.map(x => x.waitUntilFinished(scrapeQueueEvents, 60000)))).map(x => x[0]); + const docs = (await Promise.all(jobs.map(x => new Promise((resolve, reject) => { + const start = Date.now(); + const int = setInterval(async () => { + if (Date.now() >= start + 60000) { + clearInterval(int); + reject(new Error("Job wait ")); + } else if (await x.getState() === "completed") { + clearInterval(int); + resolve((await getScrapeQueue().getJob(x.id)).returnvalue); + } + }, 1000); + })))).map(x => x[0]); if (docs.length === 0) { return { success: true, error: "No search results found", returnCode: 200 }; diff --git a/apps/api/src/services/queue-service.ts b/apps/api/src/services/queue-service.ts index 2e6d7562..113b3fa3 100644 --- a/apps/api/src/services/queue-service.ts +++ b/apps/api/src/services/queue-service.ts @@ -35,6 +35,6 @@ export function getScrapeQueue() { } -import { QueueEvents } from 'bullmq'; - -export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection.duplicate() }); \ No newline at end of file +// === REMOVED IN FAVOR OF POLLING -- NOT RELIABLE +// import { QueueEvents } from 'bullmq'; +// export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection.duplicate() }); \ No newline at end of file From 8e3c2b28550aafcdf4627724940a3b951672c496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 23:30:19 +0200 Subject: [PATCH 61/65] fix(crawler): verify URL --- apps/api/src/scraper/WebScraper/crawler.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 67f1c22e..92b9ae40 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -108,7 +108,12 @@ export class WebCrawler { // Normalize the initial URL and the link to account for www and non-www versions const normalizedInitialUrl = new URL(this.initialUrl); - const normalizedLink = new URL(link); + let normalizedLink; + try { + normalizedLink = new URL(link); + } catch (_) { + return false; + } const initialHostname = normalizedInitialUrl.hostname.replace(/^www\./, ''); const linkHostname = normalizedLink.hostname.replace(/^www\./, ''); From 1f779e261a3260964488cc896fa24c2248a09bfb Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 22 Aug 2024 18:30:45 -0300 Subject: [PATCH 62/65] Update rate-limiter.ts --- apps/api/src/services/rate-limiter.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index 05fb102c..cd923c4c 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -90,7 +90,7 @@ export function getRateLimiter( plan?: string ) { - if (token.includes("a01ccae") || token.includes("6254cf9")) { + if (token.includes("a01ccae") || token.includes("6254cf9") || token.includes("0f96e673")) { return testSuiteRateLimiter; } From 8d9ff90bcb6d25f2c0d9592c6a5e9d03dab199ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 23:37:23 +0200 Subject: [PATCH 63/65] feat(fire-engine): propagate sentry trace --- .../scraper/WebScraper/scrapers/fireEngine.ts | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index b520bfe2..aa86ad5e 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -5,6 +5,7 @@ import { generateRequestParams } from "../single_url"; import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { universalTimeout } from "../global"; import { Logger } from "../../../lib/logger"; +import * as Sentry from "@sentry/node"; /** * Scrapes a URL with Fire-Engine @@ -92,27 +93,35 @@ export async function scrapWithFireEngine({ }); const startTime = Date.now(); - const _response = await axiosInstance.post( - process.env.FIRE_ENGINE_BETA_URL + endpoint, - { - url: url, - wait: waitParam, - screenshot: screenshotParam, - fullPageScreenshot: fullPageScreenshotParam, - headers: headers, - pageOptions: pageOptions, - disableJsDom: pageOptions?.disableJsDom ?? false, - priority, - engine, - instantReturn: true, - ...fireEngineOptionsParam, - }, - { - headers: { - "Content-Type": "application/json", + const _response = await Sentry.startSpan({ + name: "Call to fire-engine" + }, async span => { + return await axiosInstance.post( + process.env.FIRE_ENGINE_BETA_URL + endpoint, + { + url: url, + wait: waitParam, + screenshot: screenshotParam, + fullPageScreenshot: fullPageScreenshotParam, + headers: headers, + pageOptions: pageOptions, + disableJsDom: pageOptions?.disableJsDom ?? false, + priority, + engine, + instantReturn: true, + ...fireEngineOptionsParam, + }, + { + headers: { + "Content-Type": "application/json", + ...(Sentry.isInitialized() ? ({ + "sentry-trace": Sentry.spanToTraceHeader(span), + "baggage": Sentry.spanToBaggageHeader(span), + }) : {}), + } } - } - ); + ); + }); let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`); while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) { From 64e9be0cd4044c89b56c6e4e017184893e7ad694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 23:37:52 +0200 Subject: [PATCH 64/65] feat(redis): use bitnami image --- apps/dragonfly/.dockerignore | 2 + apps/dragonfly/Dockerfile | 6 + apps/dragonfly/Procfile | 2 + apps/dragonfly/README.md | 48 ++++++ apps/dragonfly/fly.toml | 7 +- apps/dragonfly/scripts/bump_version.sh | 91 +++++++++++ apps/dragonfly/scripts/semver | 200 +++++++++++++++++++++++++ apps/dragonfly/scripts/version.sh | 5 + apps/dragonfly/start-redis-server.sh | 30 ++++ 9 files changed, 385 insertions(+), 6 deletions(-) create mode 100644 apps/dragonfly/.dockerignore create mode 100644 apps/dragonfly/Dockerfile create mode 100644 apps/dragonfly/Procfile create mode 100644 apps/dragonfly/README.md create mode 100755 apps/dragonfly/scripts/bump_version.sh create mode 100755 apps/dragonfly/scripts/semver create mode 100755 apps/dragonfly/scripts/version.sh create mode 100755 apps/dragonfly/start-redis-server.sh diff --git a/apps/dragonfly/.dockerignore b/apps/dragonfly/.dockerignore new file mode 100644 index 00000000..860aa7ad --- /dev/null +++ b/apps/dragonfly/.dockerignore @@ -0,0 +1,2 @@ +.git +fly.toml diff --git a/apps/dragonfly/Dockerfile b/apps/dragonfly/Dockerfile new file mode 100644 index 00000000..77ea66ae --- /dev/null +++ b/apps/dragonfly/Dockerfile @@ -0,0 +1,6 @@ +ARG REDIS_VERSION=7.2.5 +FROM bitnami/redis:${REDIS_VERSION} + +COPY start-redis-server.sh /usr/bin/start-redis-server.sh + +CMD ["/usr/bin/start-redis-server.sh"] diff --git a/apps/dragonfly/Procfile b/apps/dragonfly/Procfile new file mode 100644 index 00000000..8f661345 --- /dev/null +++ b/apps/dragonfly/Procfile @@ -0,0 +1,2 @@ +redis: /usr/bin/start-redis-server.sh +metrics: /usr/local/bin/redis_exporter -redis.addr localhost:6379 -web.listen-address ":9091" diff --git a/apps/dragonfly/README.md b/apps/dragonfly/README.md new file mode 100644 index 00000000..7d2bcabd --- /dev/null +++ b/apps/dragonfly/README.md @@ -0,0 +1,48 @@ +The official repository for Running Redis on Fly.io. Find the accompanying Docker image at [flyio/redis](https://hub.docker.com/repository/docker/flyio/redis). + +## Usage + +This installation requires setting a password on Redis. To do that, run `fly secrets set REDIS_PASSWORD=mypassword` before deploying. Keep +track of this password - it won't be visible again after deployment! + +If you need no customizations, you can deploy using the official Docker image. See `fly.toml` in this repository for an example to get started with. +## Runtime requirements + +By default, this Redis installation will only accept connections on the private IPv6 network, on the standard port 6379. + +If you want to access it from the public internet, add a `[[services]]` section to your `fly.toml`. An example is included in this repo for accessing Redis on port 10000. + + +We recommend adding persistent storage for Redis data. If you skip this step, data will be lost across deploys or restarts. For Fly apps, the volume needs to be in the same region as the app instances. For example: + +```cmd +flyctl volumes create redis_server --region ord +``` +```out + Name: redis_server + Region: ord + Size GB: 10 +Created at: 02 Nov 20 19:55 UTC +``` + +To connect this volume to the app, `fly.toml` includes a `[mounts]` entry. + +``` +[mounts] +source = "redis_server" +destination = "/data" +``` + +When the app starts, that volume will be mounted on /data. + +## Cutting a release + +If you have write access to this repo, you can ship a prerelease or full release with: + +``` +scripts/bump_version.sh +``` +or +``` +scripts/bump_version.sh prerel +``` diff --git a/apps/dragonfly/fly.toml b/apps/dragonfly/fly.toml index 14bdbd96..1bcd05fb 100644 --- a/apps/dragonfly/fly.toml +++ b/apps/dragonfly/fly.toml @@ -1,13 +1,8 @@ app = 'firecrawl-dragonfly' primary_region = 'iad' -[experimental] - cmd = ['dragonfly','--logtostderr', '--cluster_mode=emulated', '--lock_on_hashtags', "--bind","::"] -[build] - image = 'ghcr.io/dragonflydb/dragonfly' - [[mounts]] - source = 'firecrawl_dragonfly' + source = 'firecrawl_redis' destination = '/data' [[services]] diff --git a/apps/dragonfly/scripts/bump_version.sh b/apps/dragonfly/scripts/bump_version.sh new file mode 100755 index 00000000..4a82c00d --- /dev/null +++ b/apps/dragonfly/scripts/bump_version.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ORIGIN=${ORIGIN:-origin} + +bump=${1:-patch} + +prerel=${2:-none} + +if [[ $bump == "prerel" ]]; then + bump="patch" + prerel="prerel" +fi + +if [[ $(git status --porcelain) != "" ]]; then + echo "Error: repo is dirty. Run git status, clean repo and try again." + exit 1 +elif [[ $(git status --porcelain -b | grep -e "ahead" -e "behind") != "" ]]; then + echo "Error: repo has unpushed commits. Push commits to remote and try again." + exit 1 +fi + +BRANCH="$(git rev-parse --abbrev-ref HEAD)" +if [[ "$prerel" == "prerel" && "$BRANCH" != "prerelease" ]]; then +# echo "❌ Sorry, you can only cut a pre-release from the 'prelease' branch" +# echo "Run 'git checkout prerelease && git pull origin prerelease' and try again." +# exit 1 + echo "⚠️ Pre-releases should be cut from the 'prerelease' branch" + echo "Please make sure you're not overwriting someone else's prerelease!" + echo + read -p "Release anyway? " -n 1 -r + echo + if [[ $REPLY =~ ^[^Yy]$ ]]; then + echo Aborting. + exit 1 + fi +fi + +if [[ "$prerel" != "prerel" && "$BRANCH" != "main" ]]; then + echo "❌ Sorry, you can only cut a release from the 'main' branch" + echo "Run 'git checkout main && git pull origin main' and try again." + exit 1 +fi + +git fetch +if [[ "$(git rev-parse HEAD 2>&1)" != "$(git rev-parse '@{u}' 2>&1)" ]]; then + echo "There are upstream commits that won't be included in this release." + echo "You probably want to exit, run 'git pull', then release." + echo + read -p "Release anyway? " -n 1 -r + echo + if [[ $REPLY =~ ^[^Yy]$ ]]; then + echo Aborting. + exit 1 + fi +fi + +dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +previous_version="$("$dir"/../scripts/version.sh -s)" + +if [[ $prerel == "prerel" ]]; then + prerelversion=$("$dir"/../scripts/semver get prerel "$previous_version") + if [[ $prerelversion == "" ]]; then + new_version=$("$dir"/../scripts/semver bump "$bump" "$previous_version") + new_version=$("$dir"/../scripts/semver bump prerel pre-1 "$new_version") + else + prerel=pre-$((${prerelversion#pre-} + 1)) + new_version=$("$dir"/../scripts/semver bump prerel "$prerel" "$previous_version") + fi +else + prerelversion=$("$dir"/../scripts/semver get prerel "$previous_version") + if [[ $prerelversion == "" ]]; then + new_version=$("$dir"/../scripts/semver bump "$bump" "$previous_version") + else + new_version=${previous_version//-$prerelversion/} + fi +fi + +new_version="v$new_version" + +echo "Bumping version from v${previous_version} to ${new_version}" + +read -p "Are you sure? " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]] +then + git tag -m "release ${new_version}" -a "$new_version" && git push "${ORIGIN}" tag "$new_version" + echo "done" +fi diff --git a/apps/dragonfly/scripts/semver b/apps/dragonfly/scripts/semver new file mode 100755 index 00000000..674229e0 --- /dev/null +++ b/apps/dragonfly/scripts/semver @@ -0,0 +1,200 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail + +SEMVER_REGEX="^[vV]?(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)(\\-[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?(\\+[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?$" + +PROG=semver +PROG_VERSION=2.1.0 + +USAGE="\ +Usage: + $PROG bump (major|minor|patch|release|prerel |build ) + $PROG compare + $PROG get (major|minor|patch|release|prerel|build) + $PROG --help + $PROG --version + +Arguments: + A version must match the following regex pattern: + \"${SEMVER_REGEX}\". + In english, the version must match X.Y.Z(-PRERELEASE)(+BUILD) + where X, Y and Z are positive integers, PRERELEASE is an optional + string composed of alphanumeric characters and hyphens and + BUILD is also an optional string composed of alphanumeric + characters and hyphens. + + See definition. + + String that must be composed of alphanumeric characters and hyphens. + + String that must be composed of alphanumeric characters and hyphens. + +Options: + -v, --version Print the version of this tool. + -h, --help Print this help message. + +Commands: + bump Bump by one of major, minor, patch, prerel, build + or a forced potentially conflicting version. The bumped version is + shown to stdout. + + compare Compare with , output to stdout the + following values: -1 if is newer, 0 if equal, 1 if + older. + + get Extract given part of , where part is one of major, minor, + patch, prerel, build." + +function error { + echo -e "$1" >&2 + exit 1 +} + +function usage-help { + error "$USAGE" +} + +function usage-version { + echo -e "${PROG}: $PROG_VERSION" + exit 0 +} + +function validate-version { + local version=$1 + if [[ "$version" =~ $SEMVER_REGEX ]]; then + # if a second argument is passed, store the result in var named by $2 + if [ "$#" -eq "2" ]; then + local major=${BASH_REMATCH[1]} + local minor=${BASH_REMATCH[2]} + local patch=${BASH_REMATCH[3]} + local prere=${BASH_REMATCH[4]} + local build=${BASH_REMATCH[6]} + eval "$2=(\"$major\" \"$minor\" \"$patch\" \"$prere\" \"$build\")" + else + echo "$version" + fi + else + error "version $version does not match the semver scheme 'X.Y.Z(-PRERELEASE)(+BUILD)'. See help for more information." + fi +} + +function compare-version { + validate-version "$1" V + validate-version "$2" V_ + + # MAJOR, MINOR and PATCH should compare numerically + for i in 0 1 2; do + local diff=$((${V[$i]} - ${V_[$i]})) + if [[ $diff -lt 0 ]]; then + echo -1; return 0 + elif [[ $diff -gt 0 ]]; then + echo 1; return 0 + fi + done + + # PREREL should compare with the ASCII order. + if [[ -z "${V[3]}" ]] && [[ -n "${V_[3]}" ]]; then + echo 1; return 0; + elif [[ -n "${V[3]}" ]] && [[ -z "${V_[3]}" ]]; then + echo -1; return 0; + elif [[ -n "${V[3]}" ]] && [[ -n "${V_[3]}" ]]; then + if [[ "${V[3]}" > "${V_[3]}" ]]; then + echo 1; return 0; + elif [[ "${V[3]}" < "${V_[3]}" ]]; then + echo -1; return 0; + fi + fi + + echo 0 +} + +function command-bump { + local new; local version; local sub_version; local command; + + case $# in + 2) case $1 in + major|minor|patch|release) command=$1; version=$2;; + *) usage-help;; + esac ;; + 3) case $1 in + prerel|build) command=$1; sub_version=$2 version=$3 ;; + *) usage-help;; + esac ;; + *) usage-help;; + esac + + validate-version "$version" parts + # shellcheck disable=SC2154 + local major="${parts[0]}" + local minor="${parts[1]}" + local patch="${parts[2]}" + local prere="${parts[3]}" + local build="${parts[4]}" + + case "$command" in + major) new="$((major + 1)).0.0";; + minor) new="${major}.$((minor + 1)).0";; + patch) new="${major}.${minor}.$((patch + 1))";; + release) new="${major}.${minor}.${patch}";; + prerel) new=$(validate-version "${major}.${minor}.${patch}-${sub_version}");; + build) new=$(validate-version "${major}.${minor}.${patch}${prere}+${sub_version}");; + *) usage-help ;; + esac + + echo "$new" + exit 0 +} + +function command-compare { + local v; local v_; + + case $# in + 2) v=$(validate-version "$1"); v_=$(validate-version "$2") ;; + *) usage-help ;; + esac + + compare-version "$v" "$v_" + exit 0 +} + + +# shellcheck disable=SC2034 +function command-get { + local part version + + if [[ "$#" -ne "2" ]] || [[ -z "$1" ]] || [[ -z "$2" ]]; then + usage-help + exit 0 + fi + + part="$1" + version="$2" + + validate-version "$version" parts + local major="${parts[0]}" + local minor="${parts[1]}" + local patch="${parts[2]}" + local prerel="${parts[3]:1}" + local build="${parts[4]:1}" + + case "$part" in + major|minor|patch|release|prerel|build) echo "${!part}" ;; + *) usage-help ;; + esac + + exit 0 +} + +case $# in + 0) echo "Unknown command: $*"; usage-help;; +esac + +case $1 in + --help|-h) echo -e "$USAGE"; exit 0;; + --version|-v) usage-version ;; + bump) shift; command-bump "$@";; + get) shift; command-get "$@";; + compare) shift; command-compare "$@";; + *) echo "Unknown arguments: $*"; usage-help;; +esac diff --git a/apps/dragonfly/scripts/version.sh b/apps/dragonfly/scripts/version.sh new file mode 100755 index 00000000..0d3d9875 --- /dev/null +++ b/apps/dragonfly/scripts/version.sh @@ -0,0 +1,5 @@ +ORIGIN=${ORIGIN:-origin} + +version=$(git fetch --tags "${ORIGIN}" &>/dev/null | git -c "versionsort.prereleasesuffix=-pre" tag -l --sort=version:refname | grep -v dev | grep -vE '^v2$' | grep -vE '^v1$' | tail -n1 | cut -c 2-) + +echo "$version" diff --git a/apps/dragonfly/start-redis-server.sh b/apps/dragonfly/start-redis-server.sh new file mode 100755 index 00000000..ed252fde --- /dev/null +++ b/apps/dragonfly/start-redis-server.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -e + +sysctl vm.overcommit_memory=1 || true +sysctl net.core.somaxconn=1024 || true + +PW_ARG="" +if [[ ! -z "${REDIS_PASSWORD}" ]]; then + PW_ARG="--requirepass $REDIS_PASSWORD" +fi + +# Set maxmemory-policy to 'allkeys-lru' for caching servers that should always evict old keys +: ${MAXMEMORY_POLICY:="volatile-lru"} +: ${APPENDONLY:="no"} +: ${FLY_VM_MEMORY_MB:=512} +if [ "${NOSAVE}" = "" ] ; then + : ${SAVE:="3600 1 300 100 60 10000"} +fi +# Set maxmemory to 10% of available memory +MAXMEMORY=$(($FLY_VM_MEMORY_MB*80/100)) + +mkdir /data/redis + +redis-server $PW_ARG \ + --dir /data/redis \ + --maxmemory "${MAXMEMORY}mb" \ + --maxmemory-policy $MAXMEMORY_POLICY \ + --appendonly $APPENDONLY \ + --save "$SAVE" From 52a05b8c6ea5e04243daad229ed960f4428c5833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 23 Aug 2024 17:05:59 +0200 Subject: [PATCH 65/65] rename "dragonfly" to "redis" --- apps/{dragonfly => redis}/.dockerignore | 0 apps/{dragonfly => redis}/Dockerfile | 0 apps/{dragonfly => redis}/Procfile | 0 apps/{dragonfly => redis}/README.md | 0 apps/{dragonfly => redis}/fly.toml | 0 apps/{dragonfly => redis}/scripts/bump_version.sh | 0 apps/{dragonfly => redis}/scripts/semver | 0 apps/{dragonfly => redis}/scripts/version.sh | 0 apps/{dragonfly => redis}/start-redis-server.sh | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename apps/{dragonfly => redis}/.dockerignore (100%) rename apps/{dragonfly => redis}/Dockerfile (100%) rename apps/{dragonfly => redis}/Procfile (100%) rename apps/{dragonfly => redis}/README.md (100%) rename apps/{dragonfly => redis}/fly.toml (100%) rename apps/{dragonfly => redis}/scripts/bump_version.sh (100%) rename apps/{dragonfly => redis}/scripts/semver (100%) rename apps/{dragonfly => redis}/scripts/version.sh (100%) rename apps/{dragonfly => redis}/start-redis-server.sh (100%) diff --git a/apps/dragonfly/.dockerignore b/apps/redis/.dockerignore similarity index 100% rename from apps/dragonfly/.dockerignore rename to apps/redis/.dockerignore diff --git a/apps/dragonfly/Dockerfile b/apps/redis/Dockerfile similarity index 100% rename from apps/dragonfly/Dockerfile rename to apps/redis/Dockerfile diff --git a/apps/dragonfly/Procfile b/apps/redis/Procfile similarity index 100% rename from apps/dragonfly/Procfile rename to apps/redis/Procfile diff --git a/apps/dragonfly/README.md b/apps/redis/README.md similarity index 100% rename from apps/dragonfly/README.md rename to apps/redis/README.md diff --git a/apps/dragonfly/fly.toml b/apps/redis/fly.toml similarity index 100% rename from apps/dragonfly/fly.toml rename to apps/redis/fly.toml diff --git a/apps/dragonfly/scripts/bump_version.sh b/apps/redis/scripts/bump_version.sh similarity index 100% rename from apps/dragonfly/scripts/bump_version.sh rename to apps/redis/scripts/bump_version.sh diff --git a/apps/dragonfly/scripts/semver b/apps/redis/scripts/semver similarity index 100% rename from apps/dragonfly/scripts/semver rename to apps/redis/scripts/semver diff --git a/apps/dragonfly/scripts/version.sh b/apps/redis/scripts/version.sh similarity index 100% rename from apps/dragonfly/scripts/version.sh rename to apps/redis/scripts/version.sh diff --git a/apps/dragonfly/start-redis-server.sh b/apps/redis/start-redis-server.sh similarity index 100% rename from apps/dragonfly/start-redis-server.sh rename to apps/redis/start-redis-server.sh