feat: correlate smart scrape

This commit is contained in:
Gergő Móricz 2025-04-16 00:25:42 -07:00
parent edd4c30908
commit 0d813b628b
4 changed files with 9 additions and 2 deletions

View File

@ -185,11 +185,13 @@ export async function extractData({
urls,
useAgent,
extractId,
scrapeId,
}: {
extractOptions: GenerateCompletionsOptions;
urls: string[];
useAgent: boolean;
extractId?: string;
scrapeId?: string;
}): Promise<{
extractedDataArray: any[];
warning: any;
@ -272,7 +274,7 @@ export async function extractData({
let smartscrapeResults: SmartScrapeResult[];
if (isSingleUrl) {
smartscrapeResults = [
await smartScrape(urls[0], extract?.smartscrape_prompt, undefined, extractId),
await smartScrape(urls[0], extract?.smartscrape_prompt, undefined, extractId, scrapeId),
];
smartScrapeCost += smartscrapeResults[0].tokenUsage;
smartScrapeCallCount++;
@ -286,6 +288,7 @@ export async function extractData({
page.smartscrape_prompt,
undefined,
extractId,
scrapeId,
);
}),
);

View File

@ -50,6 +50,7 @@ export async function smartScrape(
prompt: string,
sessionId?: string,
extractId?: string,
scrapeId?: string,
): Promise<SmartScrapeResult> {
let logger = _logger.child({
method: "smartScrape",
@ -58,6 +59,7 @@ export async function smartScrape(
url,
prompt,
sessionId,
scrapeId,
});
try {
logger.info("Initiating smart scrape request");
@ -71,6 +73,7 @@ export async function smartScrape(
prompt,
userProvidedId: sessionId ?? undefined,
extractId,
scrapeId,
models: {
thinkingModel: {
model: "gemini-2.5-pro-preview-03-25",

View File

@ -25,7 +25,7 @@ export async function performAgent(
let smartscrapeResults: SmartScrapeResult;
try {
smartscrapeResults = await smartScrape(url, prompt, sessionId)
smartscrapeResults = await smartScrape(url, prompt, sessionId, undefined, meta.id)
} catch (error) {
if (error instanceof Error && error.message === "Cost limit exceeded") {
logger.error("Cost limit exceeded", { error })

View File

@ -601,6 +601,7 @@ export async function performLLMExtract(
extractOptions: generationOptions,
urls: [meta.url],
useAgent: isAgentExtractModelValid(meta.options.extract?.agent?.model),
scrapeId: meta.id,
});
if (warning) {