mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 05:19:06 +08:00
correlate with eid
This commit is contained in:
parent
512a2b1cd4
commit
80b507e64e
@ -22,6 +22,7 @@ type BatchExtractOptions = {
|
|||||||
systemPrompt: string;
|
systemPrompt: string;
|
||||||
doc: Document;
|
doc: Document;
|
||||||
useAgent: boolean;
|
useAgent: boolean;
|
||||||
|
extractId?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -44,7 +45,7 @@ export async function batchExtractPromise(options: BatchExtractOptions, logger:
|
|||||||
smartScrapeCallCount: number;
|
smartScrapeCallCount: number;
|
||||||
otherCallCount: number;
|
otherCallCount: number;
|
||||||
}> {
|
}> {
|
||||||
const { multiEntitySchema, links, prompt, systemPrompt, doc, useAgent } = options;
|
const { multiEntitySchema, links, prompt, systemPrompt, doc, useAgent, extractId } = options;
|
||||||
|
|
||||||
|
|
||||||
const generationOptions: GenerateCompletionsOptions = {
|
const generationOptions: GenerateCompletionsOptions = {
|
||||||
@ -74,6 +75,7 @@ export async function batchExtractPromise(options: BatchExtractOptions, logger:
|
|||||||
extractOptions: generationOptions,
|
extractOptions: generationOptions,
|
||||||
urls: [doc.metadata.sourceURL || doc.metadata.url || ""],
|
urls: [doc.metadata.sourceURL || doc.metadata.url || ""],
|
||||||
useAgent,
|
useAgent,
|
||||||
|
extractId,
|
||||||
});
|
});
|
||||||
extractedDataArray = e;
|
extractedDataArray = e;
|
||||||
warning = w;
|
warning = w;
|
||||||
|
@ -14,7 +14,8 @@ export async function singleAnswerCompletion({
|
|||||||
links,
|
links,
|
||||||
prompt,
|
prompt,
|
||||||
systemPrompt,
|
systemPrompt,
|
||||||
useAgent
|
useAgent,
|
||||||
|
extractId,
|
||||||
}: {
|
}: {
|
||||||
singleAnswerDocs: Document[];
|
singleAnswerDocs: Document[];
|
||||||
rSchema: any;
|
rSchema: any;
|
||||||
@ -22,6 +23,7 @@ export async function singleAnswerCompletion({
|
|||||||
prompt: string;
|
prompt: string;
|
||||||
systemPrompt: string;
|
systemPrompt: string;
|
||||||
useAgent: boolean;
|
useAgent: boolean;
|
||||||
|
extractId?: string;
|
||||||
}): Promise<{
|
}): Promise<{
|
||||||
extract: any;
|
extract: any;
|
||||||
tokenUsage: TokenUsage;
|
tokenUsage: TokenUsage;
|
||||||
@ -51,6 +53,7 @@ export async function singleAnswerCompletion({
|
|||||||
extractOptions: generationOptions,
|
extractOptions: generationOptions,
|
||||||
urls: singleAnswerDocs.map(doc => doc.metadata.url || doc.metadata.sourceURL || ""),
|
urls: singleAnswerDocs.map(doc => doc.metadata.url || doc.metadata.sourceURL || ""),
|
||||||
useAgent,
|
useAgent,
|
||||||
|
extractId,
|
||||||
});
|
});
|
||||||
|
|
||||||
const completion = {
|
const completion = {
|
||||||
|
@ -430,7 +430,8 @@ export async function performExtraction(
|
|||||||
prompt: request.prompt ?? "",
|
prompt: request.prompt ?? "",
|
||||||
systemPrompt: request.systemPrompt ?? "",
|
systemPrompt: request.systemPrompt ?? "",
|
||||||
doc,
|
doc,
|
||||||
useAgent: isAgentExtractModelValid(request.agent?.model)
|
useAgent: isAgentExtractModelValid(request.agent?.model),
|
||||||
|
extractId,
|
||||||
}, logger);
|
}, logger);
|
||||||
|
|
||||||
// Race between timeout and completion
|
// Race between timeout and completion
|
||||||
@ -741,6 +742,7 @@ export async function performExtraction(
|
|||||||
prompt: request.prompt ?? "",
|
prompt: request.prompt ?? "",
|
||||||
systemPrompt: request.systemPrompt ?? "",
|
systemPrompt: request.systemPrompt ?? "",
|
||||||
useAgent: isAgentExtractModelValid(request.agent?.model),
|
useAgent: isAgentExtractModelValid(request.agent?.model),
|
||||||
|
extractId,
|
||||||
});
|
});
|
||||||
costTracking.smartScrapeCost += singleAnswerSmartScrapeCost;
|
costTracking.smartScrapeCost += singleAnswerSmartScrapeCost;
|
||||||
costTracking.smartScrapeCallCount += singleAnswerSmartScrapeCallCount;
|
costTracking.smartScrapeCallCount += singleAnswerSmartScrapeCallCount;
|
||||||
|
@ -184,10 +184,12 @@ export async function extractData({
|
|||||||
extractOptions,
|
extractOptions,
|
||||||
urls,
|
urls,
|
||||||
useAgent,
|
useAgent,
|
||||||
|
extractId,
|
||||||
}: {
|
}: {
|
||||||
extractOptions: GenerateCompletionsOptions;
|
extractOptions: GenerateCompletionsOptions;
|
||||||
urls: string[];
|
urls: string[];
|
||||||
useAgent: boolean;
|
useAgent: boolean;
|
||||||
|
extractId?: string;
|
||||||
}): Promise<{
|
}): Promise<{
|
||||||
extractedDataArray: any[];
|
extractedDataArray: any[];
|
||||||
warning: any;
|
warning: any;
|
||||||
@ -273,7 +275,7 @@ export async function extractData({
|
|||||||
let smartscrapeResults: SmartScrapeResult[];
|
let smartscrapeResults: SmartScrapeResult[];
|
||||||
if (isSingleUrl) {
|
if (isSingleUrl) {
|
||||||
smartscrapeResults = [
|
smartscrapeResults = [
|
||||||
await smartScrape(urls[0], extract?.smartscrape_prompt),
|
await smartScrape(urls[0], extract?.smartscrape_prompt, extractId),
|
||||||
];
|
];
|
||||||
smartScrapeCost += smartscrapeResults[0].tokenUsage;
|
smartScrapeCost += smartscrapeResults[0].tokenUsage;
|
||||||
smartScrapeCallCount++;
|
smartScrapeCallCount++;
|
||||||
@ -285,6 +287,7 @@ export async function extractData({
|
|||||||
return await smartScrape(
|
return await smartScrape(
|
||||||
urls[page.page_index],
|
urls[page.page_index],
|
||||||
page.smartscrape_prompt,
|
page.smartscrape_prompt,
|
||||||
|
extractId,
|
||||||
);
|
);
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
@ -49,6 +49,7 @@ export async function smartScrape(
|
|||||||
url: string,
|
url: string,
|
||||||
prompt: string,
|
prompt: string,
|
||||||
sessionId?: string,
|
sessionId?: string,
|
||||||
|
extractId?: string,
|
||||||
): Promise<SmartScrapeResult> {
|
): Promise<SmartScrapeResult> {
|
||||||
try {
|
try {
|
||||||
logger.info("Initiating smart scrape request", { url, prompt, sessionId });
|
logger.info("Initiating smart scrape request", { url, prompt, sessionId });
|
||||||
|
Loading…
x
Reference in New Issue
Block a user