From dcef6fbc13fcc673f3fea91385996b95beebacf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 16 Apr 2025 01:13:49 -0700 Subject: [PATCH] feat(extractSmartScrape): mog it to 100 pages max --- .../src/scraper/scrapeURL/lib/extractSmartScrape.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts b/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts index cee1d5a6..a20c5d2c 100644 --- a/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts +++ b/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts @@ -279,10 +279,18 @@ export async function extractData({ smartScrapeCost += smartscrapeResults[0].tokenUsage; smartScrapeCallCount++; } else { - const pages = extract?.smartscrapePages; + const pages = extract?.smartscrapePages ?? []; //do it async promiseall instead + if (pages.length > 100) { + logger.warn("Smart scrape pages limit exceeded, only first 100 pages will be scraped", { + pagesLength: pages.length, + extractId, + scrapeId, + }); + } + smartscrapeResults = await Promise.all( - pages.map(async (page) => { + pages.slice(0, 100).map(async (page) => { return await smartScrape( urls[page.page_index], page.smartscrape_prompt,