feat(smart-scrape): use correct models for multi-entity assembly

This commit is contained in:
Gergő Móricz 2025-04-16 16:39:48 -07:00
parent 751c30f139
commit 6a93293fd0
3 changed files with 30 additions and 9 deletions

View File

@ -315,7 +315,13 @@ export async function extractData({
let smartscrapeResults: SmartScrapeResult[]; let smartscrapeResults: SmartScrapeResult[];
if (isSingleUrl) { if (isSingleUrl) {
smartscrapeResults = [ smartscrapeResults = [
await smartScrape(urls[0], extract?.smartscrape_prompt, sessionId, extractId, scrapeId), await smartScrape({
url: urls[0],
prompt: extract?.smartscrape_prompt,
sessionId,
extractId,
scrapeId,
}),
]; ];
smartScrapeCost += smartscrapeResults[0].tokenUsage; smartScrapeCost += smartscrapeResults[0].tokenUsage;
smartScrapeCallCount++; smartScrapeCallCount++;
@ -332,13 +338,13 @@ export async function extractData({
smartscrapeResults = await Promise.all( smartscrapeResults = await Promise.all(
pages.slice(0, 100).map(async (page) => { pages.slice(0, 100).map(async (page) => {
return await smartScrape( return await smartScrape({
urls[page.page_index], url: urls[page.page_index],
page.smartscrape_prompt, prompt: page.smartscrape_prompt,
undefined, sessionId,
extractId, extractId,
scrapeId, scrapeId,
); });
}), }),
); );
smartScrapeCost += smartscrapeResults.reduce( smartScrapeCost += smartscrapeResults.reduce(
@ -364,6 +370,8 @@ export async function extractData({
const newExtractOptions = { const newExtractOptions = {
...extractOptions, ...extractOptions,
markdown: markdown, markdown: markdown,
model: getModel("gemini-2.5-pro-preview-03-25", "vertex"),
retryModel: getModel("gemini-2.5-pro-preview-03-25", "google"),
}; };
const { extract, warning, totalUsage, model, cost } = const { extract, warning, totalUsage, model, cost } =
await generateCompletions(newExtractOptions); await generateCompletions(newExtractOptions);

View File

@ -45,13 +45,21 @@ export type SmartScrapeResult = z.infer<typeof smartScrapeResultSchema>;
* @returns A promise that resolves to an object matching the SmartScrapeResult type. * @returns A promise that resolves to an object matching the SmartScrapeResult type.
* @throws Throws an error if the request fails or the response is invalid. * @throws Throws an error if the request fails or the response is invalid.
*/ */
export async function smartScrape( export async function smartScrape({
url,
prompt,
sessionId,
extractId,
scrapeId,
beforeSubmission,
}: {
url: string, url: string,
prompt: string, prompt: string,
sessionId?: string, sessionId?: string,
extractId?: string, extractId?: string,
scrapeId?: string, scrapeId?: string,
): Promise<SmartScrapeResult> { beforeSubmission?: () => unknown,
}): Promise<SmartScrapeResult> {
let logger = _logger.child({ let logger = _logger.child({
method: "smartScrape", method: "smartScrape",
module: "smartScrape", module: "smartScrape",

View File

@ -25,7 +25,12 @@ export async function performAgent(
let smartscrapeResults: SmartScrapeResult; let smartscrapeResults: SmartScrapeResult;
try { try {
smartscrapeResults = await smartScrape(url, prompt, sessionId, undefined, meta.id) smartscrapeResults = await smartScrape({
url,
prompt,
sessionId,
scrapeId: meta.id,
})
} catch (error) { } catch (error) {
if (error instanceof Error && error.message === "Cost limit exceeded") { if (error instanceof Error && error.message === "Cost limit exceeded") {
logger.error("Cost limit exceeded", { error }) logger.error("Cost limit exceeded", { error })