mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 18:09:03 +08:00
Nick: introduce llm-usage cost analysis
This commit is contained in:
parent
de14c0a45d
commit
4db023280d
@ -37,5 +37,6 @@ export async function extractStatusController(
|
||||
error: extract?.error ?? undefined,
|
||||
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
|
||||
steps: extract.showSteps ? extract.steps : undefined,
|
||||
llmUsage: extract.showLLMUsage ? extract.llmUsage : undefined,
|
||||
});
|
||||
}
|
||||
|
@ -71,6 +71,7 @@ export async function extractController(
|
||||
createdAt: Date.now(),
|
||||
status: "processing",
|
||||
showSteps: req.body.__experimental_streamSteps,
|
||||
showLLMUsage: req.body.__experimental_llmUsage,
|
||||
});
|
||||
|
||||
if (Sentry.isInitialized()) {
|
||||
|
@ -222,6 +222,7 @@ export const extractV1Options = z
|
||||
origin: z.string().optional().default("api"),
|
||||
urlTrace: z.boolean().default(false),
|
||||
__experimental_streamSteps: z.boolean().default(false),
|
||||
__experimental_llmUsage: z.boolean().default(false),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||
})
|
||||
.strict(strictMessage);
|
||||
@ -840,3 +841,12 @@ export type SearchResponse =
|
||||
warning?: string;
|
||||
data: Document[];
|
||||
};
|
||||
|
||||
|
||||
export type TokenUsage = {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
step?: string;
|
||||
model?: string;
|
||||
};
|
||||
|
@ -30,6 +30,8 @@ export type StoredExtract = {
|
||||
error?: any;
|
||||
showSteps?: boolean;
|
||||
steps?: ExtractedStep[];
|
||||
showLLMUsage?: boolean;
|
||||
llmUsage?: number;
|
||||
};
|
||||
|
||||
export async function saveExtract(id: string, extract: StoredExtract) {
|
||||
|
@ -1,6 +1,7 @@
|
||||
import {
|
||||
Document,
|
||||
ExtractRequest,
|
||||
TokenUsage,
|
||||
toLegacyCrawlerOptions,
|
||||
URLTrace,
|
||||
} from "../../controllers/v1/types";
|
||||
@ -31,6 +32,7 @@ import { ExtractStep, updateExtract } from "./extract-redis";
|
||||
import { deduplicateObjectsArray } from "./helpers/deduplicate-objs-array";
|
||||
import { mergeNullValObjs } from "./helpers/merge-null-val-objs";
|
||||
import { CUSTOM_U_TEAMS } from "./config";
|
||||
import { estimateCost, estimateTotalCost } from "./usage/llm-cost";
|
||||
|
||||
interface ExtractServiceOptions {
|
||||
request: ExtractRequest;
|
||||
@ -46,6 +48,8 @@ interface ExtractResult {
|
||||
warning?: string;
|
||||
urlTrace?: URLTrace[];
|
||||
error?: string;
|
||||
tokenUsageBreakdown?: TokenUsage[];
|
||||
llmUsage?: number;
|
||||
}
|
||||
|
||||
async function analyzeSchemaAndPrompt(
|
||||
@ -57,6 +61,7 @@ async function analyzeSchemaAndPrompt(
|
||||
multiEntityKeys: string[];
|
||||
reasoning?: string;
|
||||
keyIndicators?: string[];
|
||||
tokenUsage: TokenUsage;
|
||||
}> {
|
||||
if (!schema) {
|
||||
schema = await generateSchemaFromPrompt(prompt);
|
||||
@ -71,8 +76,10 @@ async function analyzeSchemaAndPrompt(
|
||||
keyIndicators: z.array(z.string()),
|
||||
});
|
||||
|
||||
const model = "gpt-4o";
|
||||
|
||||
const result = await openai.beta.chat.completions.parse({
|
||||
model: "gpt-4o",
|
||||
model: model,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
@ -131,12 +138,20 @@ Schema: ${schemaString}\nPrompt: ${prompt}\nRelevant URLs: ${urls}`,
|
||||
|
||||
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
|
||||
checkSchema.parse(result.choices[0].message.parsed);
|
||||
return { isMultiEntity, multiEntityKeys, reasoning, keyIndicators };
|
||||
|
||||
const tokenUsage: TokenUsage = {
|
||||
promptTokens: result.usage?.prompt_tokens ?? 0,
|
||||
completionTokens: result.usage?.completion_tokens ?? 0,
|
||||
totalTokens: result.usage?.total_tokens ?? 0,
|
||||
model: model,
|
||||
};
|
||||
return { isMultiEntity, multiEntityKeys, reasoning, keyIndicators, tokenUsage };
|
||||
}
|
||||
|
||||
type completions = {
|
||||
extract: Record<string, any>;
|
||||
numTokens: number;
|
||||
totalUsage: TokenUsage;
|
||||
warning?: string;
|
||||
};
|
||||
|
||||
@ -164,6 +179,10 @@ export async function performExtraction(
|
||||
let multiEntityResult: any = {};
|
||||
let singleAnswerResult: any = {};
|
||||
|
||||
|
||||
// Token tracking
|
||||
let tokenUsage: TokenUsage[] = [];
|
||||
|
||||
await updateExtract(extractId, {
|
||||
status: "processing",
|
||||
steps: [
|
||||
@ -249,9 +268,12 @@ export async function performExtraction(
|
||||
// 1. the first one is a completion that will extract the array of items
|
||||
// 2. the second one is multiple completions that will extract the items from the array
|
||||
let startAnalyze = Date.now();
|
||||
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
|
||||
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators, tokenUsage: schemaAnalysisTokenUsage } =
|
||||
await analyzeSchemaAndPrompt(links, reqSchema, request.prompt ?? "");
|
||||
|
||||
// Track schema analysis tokens
|
||||
tokenUsage.push(schemaAnalysisTokenUsage);
|
||||
|
||||
// console.log("\nIs Multi Entity:", isMultiEntity);
|
||||
// console.log("\nMulti Entity Keys:", multiEntityKeys);
|
||||
// console.log("\nReasoning:", reasoning);
|
||||
@ -376,6 +398,8 @@ export async function performExtraction(
|
||||
true,
|
||||
);
|
||||
|
||||
tokenUsage.push(shouldExtractCheck.totalUsage);
|
||||
|
||||
if (!shouldExtractCheck.extract["extract"]) {
|
||||
console.log(
|
||||
`Skipping extraction for ${doc.metadata.url} as content is irrelevant`,
|
||||
@ -438,6 +462,11 @@ export async function performExtraction(
|
||||
timeoutPromise,
|
||||
])) as Awaited<ReturnType<typeof generateOpenAICompletions>>;
|
||||
|
||||
// Track multi-entity extraction tokens
|
||||
if (multiEntityCompletion) {
|
||||
tokenUsage.push(multiEntityCompletion.totalUsage);
|
||||
}
|
||||
|
||||
// console.log(multiEntityCompletion.extract)
|
||||
// if (!multiEntityCompletion.extract?.is_content_relevant) {
|
||||
// console.log(`Skipping extraction for ${doc.metadata.url} as content is not relevant`);
|
||||
@ -603,6 +632,11 @@ export async function performExtraction(
|
||||
true,
|
||||
);
|
||||
|
||||
// Track single answer extraction tokens
|
||||
if (singleAnswerCompletions) {
|
||||
tokenUsage.push(singleAnswerCompletions.totalUsage);
|
||||
}
|
||||
|
||||
singleAnswerResult = singleAnswerCompletions.extract;
|
||||
|
||||
// Update token usage in traces
|
||||
@ -641,7 +675,10 @@ export async function performExtraction(
|
||||
);
|
||||
});
|
||||
|
||||
// Log job
|
||||
const totalTokensUsed = tokenUsage.reduce((a, b) => a + b.totalTokens, 0);
|
||||
const llmUsage = estimateTotalCost(tokenUsage);
|
||||
|
||||
// Log job with token usage
|
||||
logJob({
|
||||
job_id: extractId,
|
||||
success: true,
|
||||
@ -654,10 +691,11 @@ export async function performExtraction(
|
||||
url: request.urls.join(", "),
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
num_tokens: 0, // completions?.numTokens ?? 0,
|
||||
num_tokens: totalTokensUsed,
|
||||
}).then(() => {
|
||||
updateExtract(extractId, {
|
||||
status: "completed",
|
||||
llmUsage,
|
||||
}).catch((error) => {
|
||||
logger.error(
|
||||
`Failed to update extract ${extractId} status to completed: ${error}`,
|
||||
@ -671,5 +709,7 @@ export async function performExtraction(
|
||||
extractId,
|
||||
warning: undefined, // TODO FIX
|
||||
urlTrace: request.urlTrace ? urlTraces : undefined,
|
||||
llmUsage,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -150,16 +150,21 @@ function filterAndProcessLinks(
|
||||
);
|
||||
}
|
||||
|
||||
export type RerankerResult = {
|
||||
mapDocument: MapDocument[];
|
||||
tokensUsed: number;
|
||||
}
|
||||
|
||||
export async function rerankLinksWithLLM(
|
||||
mappedLinks: MapDocument[],
|
||||
searchQuery: string,
|
||||
urlTraces: URLTrace[],
|
||||
): Promise<MapDocument[]> {
|
||||
): Promise<RerankerResult> {
|
||||
const chunkSize = 100;
|
||||
const chunks: MapDocument[][] = [];
|
||||
const TIMEOUT_MS = 20000;
|
||||
const MAX_RETRIES = 2;
|
||||
let totalTokensUsed = 0;
|
||||
|
||||
// Split mappedLinks into chunks of 200
|
||||
for (let i = 0; i < mappedLinks.length; i += chunkSize) {
|
||||
@ -225,6 +230,7 @@ export async function rerankLinksWithLLM(
|
||||
return [];
|
||||
}
|
||||
|
||||
totalTokensUsed += completion.numTokens || 0;
|
||||
// console.log(`Chunk ${chunkIndex + 1}: Found ${completion.extract.relevantLinks.length} relevant links`);
|
||||
return completion.extract.relevantLinks;
|
||||
|
||||
@ -252,5 +258,8 @@ export async function rerankLinksWithLLM(
|
||||
.filter((link): link is MapDocument => link !== undefined);
|
||||
|
||||
// console.log(`Returning ${relevantLinks.length} relevant links`);
|
||||
return relevantLinks;
|
||||
}
|
||||
return {
|
||||
mapDocument: relevantLinks,
|
||||
tokensUsed: totalTokensUsed,
|
||||
};
|
||||
}
|
||||
|
@ -199,15 +199,19 @@ export async function processUrl(
|
||||
// (link, index) => `${index + 1}. URL: ${link.url}, Title: ${link.title}, Description: ${link.description}`
|
||||
// );
|
||||
|
||||
mappedLinks = await rerankLinksWithLLM(mappedLinks, searchQuery, urlTraces);
|
||||
const rerankerResult = await rerankLinksWithLLM(mappedLinks, searchQuery, urlTraces);
|
||||
mappedLinks = rerankerResult.mapDocument;
|
||||
let tokensUsed = rerankerResult.tokensUsed;
|
||||
|
||||
// 2nd Pass, useful for when the first pass returns too many links
|
||||
if (mappedLinks.length > 100) {
|
||||
mappedLinks = await rerankLinksWithLLM(
|
||||
const rerankerResult = await rerankLinksWithLLM(
|
||||
mappedLinks,
|
||||
searchQuery,
|
||||
urlTraces,
|
||||
);
|
||||
mappedLinks = rerankerResult.mapDocument;
|
||||
tokensUsed += rerankerResult.tokensUsed;
|
||||
}
|
||||
|
||||
// dumpToFile(
|
||||
|
53
apps/api/src/lib/extract/usage/llm-cost.ts
Normal file
53
apps/api/src/lib/extract/usage/llm-cost.ts
Normal file
@ -0,0 +1,53 @@
|
||||
import { TokenUsage } from "../../../controllers/v1/types";
|
||||
import { logger } from "../../../lib/logger";
|
||||
import { modelPrices } from "./model-prices";
|
||||
|
||||
interface ModelPricing {
|
||||
input_cost_per_token?: number;
|
||||
output_cost_per_token?: number;
|
||||
input_cost_per_request?: number;
|
||||
mode: string;
|
||||
}
|
||||
|
||||
export function estimateTotalCost(tokenUsage: TokenUsage[]): number {
|
||||
return tokenUsage.reduce((total, usage) => {
|
||||
return total + estimateCost(usage);
|
||||
}, 0);
|
||||
}
|
||||
|
||||
export function estimateCost(tokenUsage: TokenUsage): number {
|
||||
let totalCost = 0;
|
||||
try {
|
||||
let model = tokenUsage.model ?? process.env.MODEL_NAME ?? "gpt-4o-mini";
|
||||
const pricing = modelPrices[model] as ModelPricing;
|
||||
|
||||
if (!pricing) {
|
||||
logger.error(`No pricing information found for model: ${model}`);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pricing.mode !== "chat") {
|
||||
logger.error(`Model ${model} is not a chat model`);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Add per-request cost if applicable (Only Perplexity supports this)
|
||||
if (pricing.input_cost_per_request) {
|
||||
totalCost += pricing.input_cost_per_request;
|
||||
}
|
||||
|
||||
// Add token-based costs
|
||||
if (pricing.input_cost_per_token) {
|
||||
totalCost += tokenUsage.promptTokens * pricing.input_cost_per_token;
|
||||
}
|
||||
|
||||
if (pricing.output_cost_per_token) {
|
||||
totalCost += tokenUsage.completionTokens * pricing.output_cost_per_token;
|
||||
}
|
||||
|
||||
return Number(totalCost.toFixed(7));
|
||||
} catch (error) {
|
||||
logger.error(`Error estimating cost: ${error}`);
|
||||
return totalCost;
|
||||
}
|
||||
}
|
8139
apps/api/src/lib/extract/usage/model-prices.ts
Normal file
8139
apps/api/src/lib/extract/usage/model-prices.ts
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
import OpenAI from "openai";
|
||||
import { encoding_for_model } from "@dqbd/tiktoken";
|
||||
import { TiktokenModel } from "@dqbd/tiktoken";
|
||||
import { Document, ExtractOptions } from "../../../controllers/v1/types";
|
||||
import { Document, ExtractOptions, TokenUsage } from "../../../controllers/v1/types";
|
||||
import { Logger } from "winston";
|
||||
import { EngineResultsTracker, Meta } from "..";
|
||||
import { logger } from "../../../lib/logger";
|
||||
@ -72,7 +72,7 @@ export async function generateOpenAICompletions(
|
||||
markdown?: string,
|
||||
previousWarning?: string,
|
||||
isExtractEndpoint?: boolean,
|
||||
): Promise<{ extract: any; numTokens: number; warning: string | undefined }> {
|
||||
): Promise<{ extract: any; numTokens: number; warning: string | undefined; totalUsage: TokenUsage }> {
|
||||
let extract: any;
|
||||
let warning: string | undefined;
|
||||
|
||||
@ -208,6 +208,9 @@ export async function generateOpenAICompletions(
|
||||
}
|
||||
}
|
||||
|
||||
const promptTokens = (jsonCompletion.usage?.prompt_tokens ?? 0);
|
||||
const completionTokens = (jsonCompletion.usage?.completion_tokens ?? 0);
|
||||
|
||||
// If the users actually wants the items object, they can specify it as 'required' in the schema
|
||||
// otherwise, we just return the items array
|
||||
if (
|
||||
@ -217,7 +220,9 @@ export async function generateOpenAICompletions(
|
||||
) {
|
||||
extract = extract?.items;
|
||||
}
|
||||
return { extract, warning, numTokens };
|
||||
// num tokens (just user prompt tokenized) | deprecated
|
||||
// totalTokens = promptTokens + completionTokens
|
||||
return { extract, warning, numTokens, totalUsage: { promptTokens, completionTokens, totalTokens: promptTokens + completionTokens, model: model } };
|
||||
}
|
||||
|
||||
export async function performLLMExtract(
|
||||
|
Loading…
x
Reference in New Issue
Block a user