mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 18:38:59 +08:00
feat: correlate smart scrape
This commit is contained in:
parent
edd4c30908
commit
0d813b628b
@ -185,11 +185,13 @@ export async function extractData({
|
|||||||
urls,
|
urls,
|
||||||
useAgent,
|
useAgent,
|
||||||
extractId,
|
extractId,
|
||||||
|
scrapeId,
|
||||||
}: {
|
}: {
|
||||||
extractOptions: GenerateCompletionsOptions;
|
extractOptions: GenerateCompletionsOptions;
|
||||||
urls: string[];
|
urls: string[];
|
||||||
useAgent: boolean;
|
useAgent: boolean;
|
||||||
extractId?: string;
|
extractId?: string;
|
||||||
|
scrapeId?: string;
|
||||||
}): Promise<{
|
}): Promise<{
|
||||||
extractedDataArray: any[];
|
extractedDataArray: any[];
|
||||||
warning: any;
|
warning: any;
|
||||||
@ -272,7 +274,7 @@ export async function extractData({
|
|||||||
let smartscrapeResults: SmartScrapeResult[];
|
let smartscrapeResults: SmartScrapeResult[];
|
||||||
if (isSingleUrl) {
|
if (isSingleUrl) {
|
||||||
smartscrapeResults = [
|
smartscrapeResults = [
|
||||||
await smartScrape(urls[0], extract?.smartscrape_prompt, undefined, extractId),
|
await smartScrape(urls[0], extract?.smartscrape_prompt, undefined, extractId, scrapeId),
|
||||||
];
|
];
|
||||||
smartScrapeCost += smartscrapeResults[0].tokenUsage;
|
smartScrapeCost += smartscrapeResults[0].tokenUsage;
|
||||||
smartScrapeCallCount++;
|
smartScrapeCallCount++;
|
||||||
@ -286,6 +288,7 @@ export async function extractData({
|
|||||||
page.smartscrape_prompt,
|
page.smartscrape_prompt,
|
||||||
undefined,
|
undefined,
|
||||||
extractId,
|
extractId,
|
||||||
|
scrapeId,
|
||||||
);
|
);
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
@ -50,6 +50,7 @@ export async function smartScrape(
|
|||||||
prompt: string,
|
prompt: string,
|
||||||
sessionId?: string,
|
sessionId?: string,
|
||||||
extractId?: string,
|
extractId?: string,
|
||||||
|
scrapeId?: string,
|
||||||
): Promise<SmartScrapeResult> {
|
): Promise<SmartScrapeResult> {
|
||||||
let logger = _logger.child({
|
let logger = _logger.child({
|
||||||
method: "smartScrape",
|
method: "smartScrape",
|
||||||
@ -58,6 +59,7 @@ export async function smartScrape(
|
|||||||
url,
|
url,
|
||||||
prompt,
|
prompt,
|
||||||
sessionId,
|
sessionId,
|
||||||
|
scrapeId,
|
||||||
});
|
});
|
||||||
try {
|
try {
|
||||||
logger.info("Initiating smart scrape request");
|
logger.info("Initiating smart scrape request");
|
||||||
@ -71,6 +73,7 @@ export async function smartScrape(
|
|||||||
prompt,
|
prompt,
|
||||||
userProvidedId: sessionId ?? undefined,
|
userProvidedId: sessionId ?? undefined,
|
||||||
extractId,
|
extractId,
|
||||||
|
scrapeId,
|
||||||
models: {
|
models: {
|
||||||
thinkingModel: {
|
thinkingModel: {
|
||||||
model: "gemini-2.5-pro-preview-03-25",
|
model: "gemini-2.5-pro-preview-03-25",
|
||||||
|
@ -25,7 +25,7 @@ export async function performAgent(
|
|||||||
|
|
||||||
let smartscrapeResults: SmartScrapeResult;
|
let smartscrapeResults: SmartScrapeResult;
|
||||||
try {
|
try {
|
||||||
smartscrapeResults = await smartScrape(url, prompt, sessionId)
|
smartscrapeResults = await smartScrape(url, prompt, sessionId, undefined, meta.id)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error instanceof Error && error.message === "Cost limit exceeded") {
|
if (error instanceof Error && error.message === "Cost limit exceeded") {
|
||||||
logger.error("Cost limit exceeded", { error })
|
logger.error("Cost limit exceeded", { error })
|
||||||
|
@ -601,6 +601,7 @@ export async function performLLMExtract(
|
|||||||
extractOptions: generationOptions,
|
extractOptions: generationOptions,
|
||||||
urls: [meta.url],
|
urls: [meta.url],
|
||||||
useAgent: isAgentExtractModelValid(meta.options.extract?.agent?.model),
|
useAgent: isAgentExtractModelValid(meta.options.extract?.agent?.model),
|
||||||
|
scrapeId: meta.id,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (warning) {
|
if (warning) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user