feat(smart-scrape): use correct models for multi-entity assembly

This commit is contained in:
Gergő Móricz 2025-04-16 16:39:48 -07:00
parent 751c30f139
commit 6a93293fd0
3 changed files with 30 additions and 9 deletions

View File

@ -315,7 +315,13 @@ export async function extractData({
let smartscrapeResults: SmartScrapeResult[];
if (isSingleUrl) {
smartscrapeResults = [
await smartScrape(urls[0], extract?.smartscrape_prompt, sessionId, extractId, scrapeId),
await smartScrape({
url: urls[0],
prompt: extract?.smartscrape_prompt,
sessionId,
extractId,
scrapeId,
}),
];
smartScrapeCost += smartscrapeResults[0].tokenUsage;
smartScrapeCallCount++;
@ -332,13 +338,13 @@ export async function extractData({
smartscrapeResults = await Promise.all(
pages.slice(0, 100).map(async (page) => {
return await smartScrape(
urls[page.page_index],
page.smartscrape_prompt,
undefined,
return await smartScrape({
url: urls[page.page_index],
prompt: page.smartscrape_prompt,
sessionId,
extractId,
scrapeId,
);
});
}),
);
smartScrapeCost += smartscrapeResults.reduce(
@ -364,6 +370,8 @@ export async function extractData({
const newExtractOptions = {
...extractOptions,
markdown: markdown,
model: getModel("gemini-2.5-pro-preview-03-25", "vertex"),
retryModel: getModel("gemini-2.5-pro-preview-03-25", "google"),
};
const { extract, warning, totalUsage, model, cost } =
await generateCompletions(newExtractOptions);

View File

@ -45,13 +45,21 @@ export type SmartScrapeResult = z.infer<typeof smartScrapeResultSchema>;
* @returns A promise that resolves to an object matching the SmartScrapeResult type.
* @throws Throws an error if the request fails or the response is invalid.
*/
export async function smartScrape(
export async function smartScrape({
url,
prompt,
sessionId,
extractId,
scrapeId,
beforeSubmission,
}: {
url: string,
prompt: string,
sessionId?: string,
extractId?: string,
scrapeId?: string,
): Promise<SmartScrapeResult> {
beforeSubmission?: () => unknown,
}): Promise<SmartScrapeResult> {
let logger = _logger.child({
method: "smartScrape",
module: "smartScrape",

View File

@ -25,7 +25,12 @@ export async function performAgent(
let smartscrapeResults: SmartScrapeResult;
try {
smartscrapeResults = await smartScrape(url, prompt, sessionId, undefined, meta.id)
smartscrapeResults = await smartScrape({
url,
prompt,
sessionId,
scrapeId: meta.id,
})
} catch (error) {
if (error instanceof Error && error.message === "Cost limit exceeded") {
logger.error("Cost limit exceeded", { error })