From 0d813b628bd4e2a2c81ba87a0b1d96fb8657b3f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 16 Apr 2025 00:25:42 -0700 Subject: [PATCH] feat: correlate smart scrape --- apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts | 5 ++++- apps/api/src/scraper/scrapeURL/lib/smartScrape.ts | 3 +++ apps/api/src/scraper/scrapeURL/transformers/agent.ts | 2 +- apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts | 1 + 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts b/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts index d2ad2a06..cee1d5a6 100644 --- a/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts +++ b/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts @@ -185,11 +185,13 @@ export async function extractData({ urls, useAgent, extractId, + scrapeId, }: { extractOptions: GenerateCompletionsOptions; urls: string[]; useAgent: boolean; extractId?: string; + scrapeId?: string; }): Promise<{ extractedDataArray: any[]; warning: any; @@ -272,7 +274,7 @@ export async function extractData({ let smartscrapeResults: SmartScrapeResult[]; if (isSingleUrl) { smartscrapeResults = [ - await smartScrape(urls[0], extract?.smartscrape_prompt, undefined, extractId), + await smartScrape(urls[0], extract?.smartscrape_prompt, undefined, extractId, scrapeId), ]; smartScrapeCost += smartscrapeResults[0].tokenUsage; smartScrapeCallCount++; @@ -286,6 +288,7 @@ export async function extractData({ page.smartscrape_prompt, undefined, extractId, + scrapeId, ); }), ); diff --git a/apps/api/src/scraper/scrapeURL/lib/smartScrape.ts b/apps/api/src/scraper/scrapeURL/lib/smartScrape.ts index 4e4cbb20..046a7b5e 100644 --- a/apps/api/src/scraper/scrapeURL/lib/smartScrape.ts +++ b/apps/api/src/scraper/scrapeURL/lib/smartScrape.ts @@ -50,6 +50,7 @@ export async function smartScrape( prompt: string, sessionId?: string, extractId?: string, + scrapeId?: string, ): Promise { let logger = _logger.child({ method: "smartScrape", @@ -58,6 +59,7 @@ export async function smartScrape( url, prompt, sessionId, + scrapeId, }); try { logger.info("Initiating smart scrape request"); @@ -71,6 +73,7 @@ export async function smartScrape( prompt, userProvidedId: sessionId ?? undefined, extractId, + scrapeId, models: { thinkingModel: { model: "gemini-2.5-pro-preview-03-25", diff --git a/apps/api/src/scraper/scrapeURL/transformers/agent.ts b/apps/api/src/scraper/scrapeURL/transformers/agent.ts index 6ab32862..30a0f46f 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/agent.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/agent.ts @@ -25,7 +25,7 @@ export async function performAgent( let smartscrapeResults: SmartScrapeResult; try { - smartscrapeResults = await smartScrape(url, prompt, sessionId) + smartscrapeResults = await smartScrape(url, prompt, sessionId, undefined, meta.id) } catch (error) { if (error instanceof Error && error.message === "Cost limit exceeded") { logger.error("Cost limit exceeded", { error }) diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index ebdb7a6b..804ead2e 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -601,6 +601,7 @@ export async function performLLMExtract( extractOptions: generationOptions, urls: [meta.url], useAgent: isAgentExtractModelValid(meta.options.extract?.agent?.model), + scrapeId: meta.id, }); if (warning) {