feat: correlate smart scrape

This commit is contained in:
Gergő Móricz 2025-04-16 00:25:42 -07:00
parent edd4c30908
commit 0d813b628b
4 changed files with 9 additions and 2 deletions

View File

@ -185,11 +185,13 @@ export async function extractData({
urls, urls,
useAgent, useAgent,
extractId, extractId,
scrapeId,
}: { }: {
extractOptions: GenerateCompletionsOptions; extractOptions: GenerateCompletionsOptions;
urls: string[]; urls: string[];
useAgent: boolean; useAgent: boolean;
extractId?: string; extractId?: string;
scrapeId?: string;
}): Promise<{ }): Promise<{
extractedDataArray: any[]; extractedDataArray: any[];
warning: any; warning: any;
@ -272,7 +274,7 @@ export async function extractData({
let smartscrapeResults: SmartScrapeResult[]; let smartscrapeResults: SmartScrapeResult[];
if (isSingleUrl) { if (isSingleUrl) {
smartscrapeResults = [ smartscrapeResults = [
await smartScrape(urls[0], extract?.smartscrape_prompt, undefined, extractId), await smartScrape(urls[0], extract?.smartscrape_prompt, undefined, extractId, scrapeId),
]; ];
smartScrapeCost += smartscrapeResults[0].tokenUsage; smartScrapeCost += smartscrapeResults[0].tokenUsage;
smartScrapeCallCount++; smartScrapeCallCount++;
@ -286,6 +288,7 @@ export async function extractData({
page.smartscrape_prompt, page.smartscrape_prompt,
undefined, undefined,
extractId, extractId,
scrapeId,
); );
}), }),
); );

View File

@ -50,6 +50,7 @@ export async function smartScrape(
prompt: string, prompt: string,
sessionId?: string, sessionId?: string,
extractId?: string, extractId?: string,
scrapeId?: string,
): Promise<SmartScrapeResult> { ): Promise<SmartScrapeResult> {
let logger = _logger.child({ let logger = _logger.child({
method: "smartScrape", method: "smartScrape",
@ -58,6 +59,7 @@ export async function smartScrape(
url, url,
prompt, prompt,
sessionId, sessionId,
scrapeId,
}); });
try { try {
logger.info("Initiating smart scrape request"); logger.info("Initiating smart scrape request");
@ -71,6 +73,7 @@ export async function smartScrape(
prompt, prompt,
userProvidedId: sessionId ?? undefined, userProvidedId: sessionId ?? undefined,
extractId, extractId,
scrapeId,
models: { models: {
thinkingModel: { thinkingModel: {
model: "gemini-2.5-pro-preview-03-25", model: "gemini-2.5-pro-preview-03-25",

View File

@ -25,7 +25,7 @@ export async function performAgent(
let smartscrapeResults: SmartScrapeResult; let smartscrapeResults: SmartScrapeResult;
try { try {
smartscrapeResults = await smartScrape(url, prompt, sessionId) smartscrapeResults = await smartScrape(url, prompt, sessionId, undefined, meta.id)
} catch (error) { } catch (error) {
if (error instanceof Error && error.message === "Cost limit exceeded") { if (error instanceof Error && error.message === "Cost limit exceeded") {
logger.error("Cost limit exceeded", { error }) logger.error("Cost limit exceeded", { error })

View File

@ -601,6 +601,7 @@ export async function performLLMExtract(
extractOptions: generationOptions, extractOptions: generationOptions,
urls: [meta.url], urls: [meta.url],
useAgent: isAgentExtractModelValid(meta.options.extract?.agent?.model), useAgent: isAgentExtractModelValid(meta.options.extract?.agent?.model),
scrapeId: meta.id,
}); });
if (warning) { if (warning) {