diff --git a/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts b/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts index 37205343..82f16d12 100644 --- a/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts +++ b/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts @@ -315,7 +315,13 @@ export async function extractData({ let smartscrapeResults: SmartScrapeResult[]; if (isSingleUrl) { smartscrapeResults = [ - await smartScrape(urls[0], extract?.smartscrape_prompt, sessionId, extractId, scrapeId), + await smartScrape({ + url: urls[0], + prompt: extract?.smartscrape_prompt, + sessionId, + extractId, + scrapeId, + }), ]; smartScrapeCost += smartscrapeResults[0].tokenUsage; smartScrapeCallCount++; @@ -332,13 +338,13 @@ export async function extractData({ smartscrapeResults = await Promise.all( pages.slice(0, 100).map(async (page) => { - return await smartScrape( - urls[page.page_index], - page.smartscrape_prompt, - undefined, + return await smartScrape({ + url: urls[page.page_index], + prompt: page.smartscrape_prompt, + sessionId, extractId, scrapeId, - ); + }); }), ); smartScrapeCost += smartscrapeResults.reduce( @@ -364,6 +370,8 @@ export async function extractData({ const newExtractOptions = { ...extractOptions, markdown: markdown, + model: getModel("gemini-2.5-pro-preview-03-25", "vertex"), + retryModel: getModel("gemini-2.5-pro-preview-03-25", "google"), }; const { extract, warning, totalUsage, model, cost } = await generateCompletions(newExtractOptions); diff --git a/apps/api/src/scraper/scrapeURL/lib/smartScrape.ts b/apps/api/src/scraper/scrapeURL/lib/smartScrape.ts index 046a7b5e..a913ec27 100644 --- a/apps/api/src/scraper/scrapeURL/lib/smartScrape.ts +++ b/apps/api/src/scraper/scrapeURL/lib/smartScrape.ts @@ -45,13 +45,21 @@ export type SmartScrapeResult = z.infer; * @returns A promise that resolves to an object matching the SmartScrapeResult type. * @throws Throws an error if the request fails or the response is invalid. */ -export async function smartScrape( +export async function smartScrape({ + url, + prompt, + sessionId, + extractId, + scrapeId, + beforeSubmission, +}: { url: string, prompt: string, sessionId?: string, extractId?: string, scrapeId?: string, -): Promise { + beforeSubmission?: () => unknown, +}): Promise { let logger = _logger.child({ method: "smartScrape", module: "smartScrape", diff --git a/apps/api/src/scraper/scrapeURL/transformers/agent.ts b/apps/api/src/scraper/scrapeURL/transformers/agent.ts index 30a0f46f..5ad304d3 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/agent.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/agent.ts @@ -25,7 +25,12 @@ export async function performAgent( let smartscrapeResults: SmartScrapeResult; try { - smartscrapeResults = await smartScrape(url, prompt, sessionId, undefined, meta.id) + smartscrapeResults = await smartScrape({ + url, + prompt, + sessionId, + scrapeId: meta.id, + }) } catch (error) { if (error instanceof Error && error.message === "Cost limit exceeded") { logger.error("Cost limit exceeded", { error })