mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 21:49:02 +08:00
feat(smart-scrape): use correct models for multi-entity assembly
This commit is contained in:
parent
751c30f139
commit
6a93293fd0
@ -315,7 +315,13 @@ export async function extractData({
|
||||
let smartscrapeResults: SmartScrapeResult[];
|
||||
if (isSingleUrl) {
|
||||
smartscrapeResults = [
|
||||
await smartScrape(urls[0], extract?.smartscrape_prompt, sessionId, extractId, scrapeId),
|
||||
await smartScrape({
|
||||
url: urls[0],
|
||||
prompt: extract?.smartscrape_prompt,
|
||||
sessionId,
|
||||
extractId,
|
||||
scrapeId,
|
||||
}),
|
||||
];
|
||||
smartScrapeCost += smartscrapeResults[0].tokenUsage;
|
||||
smartScrapeCallCount++;
|
||||
@ -332,13 +338,13 @@ export async function extractData({
|
||||
|
||||
smartscrapeResults = await Promise.all(
|
||||
pages.slice(0, 100).map(async (page) => {
|
||||
return await smartScrape(
|
||||
urls[page.page_index],
|
||||
page.smartscrape_prompt,
|
||||
undefined,
|
||||
return await smartScrape({
|
||||
url: urls[page.page_index],
|
||||
prompt: page.smartscrape_prompt,
|
||||
sessionId,
|
||||
extractId,
|
||||
scrapeId,
|
||||
);
|
||||
});
|
||||
}),
|
||||
);
|
||||
smartScrapeCost += smartscrapeResults.reduce(
|
||||
@ -364,6 +370,8 @@ export async function extractData({
|
||||
const newExtractOptions = {
|
||||
...extractOptions,
|
||||
markdown: markdown,
|
||||
model: getModel("gemini-2.5-pro-preview-03-25", "vertex"),
|
||||
retryModel: getModel("gemini-2.5-pro-preview-03-25", "google"),
|
||||
};
|
||||
const { extract, warning, totalUsage, model, cost } =
|
||||
await generateCompletions(newExtractOptions);
|
||||
|
@ -45,13 +45,21 @@ export type SmartScrapeResult = z.infer<typeof smartScrapeResultSchema>;
|
||||
* @returns A promise that resolves to an object matching the SmartScrapeResult type.
|
||||
* @throws Throws an error if the request fails or the response is invalid.
|
||||
*/
|
||||
export async function smartScrape(
|
||||
export async function smartScrape({
|
||||
url,
|
||||
prompt,
|
||||
sessionId,
|
||||
extractId,
|
||||
scrapeId,
|
||||
beforeSubmission,
|
||||
}: {
|
||||
url: string,
|
||||
prompt: string,
|
||||
sessionId?: string,
|
||||
extractId?: string,
|
||||
scrapeId?: string,
|
||||
): Promise<SmartScrapeResult> {
|
||||
beforeSubmission?: () => unknown,
|
||||
}): Promise<SmartScrapeResult> {
|
||||
let logger = _logger.child({
|
||||
method: "smartScrape",
|
||||
module: "smartScrape",
|
||||
|
@ -25,7 +25,12 @@ export async function performAgent(
|
||||
|
||||
let smartscrapeResults: SmartScrapeResult;
|
||||
try {
|
||||
smartscrapeResults = await smartScrape(url, prompt, sessionId, undefined, meta.id)
|
||||
smartscrapeResults = await smartScrape({
|
||||
url,
|
||||
prompt,
|
||||
sessionId,
|
||||
scrapeId: meta.id,
|
||||
})
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message === "Cost limit exceeded") {
|
||||
logger.error("Cost limit exceeded", { error })
|
||||
|
Loading…
x
Reference in New Issue
Block a user