Nick: introduce llm-usage cost analysis

This commit is contained in:
Nicolas 2025-01-15 21:01:29 -03:00
parent de14c0a45d
commit 4db023280d
10 changed files with 8277 additions and 13 deletions

View File

@ -37,5 +37,6 @@ export async function extractStatusController(
error: extract?.error ?? undefined, error: extract?.error ?? undefined,
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(), expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
steps: extract.showSteps ? extract.steps : undefined, steps: extract.showSteps ? extract.steps : undefined,
llmUsage: extract.showLLMUsage ? extract.llmUsage : undefined,
}); });
} }

View File

@ -71,6 +71,7 @@ export async function extractController(
createdAt: Date.now(), createdAt: Date.now(),
status: "processing", status: "processing",
showSteps: req.body.__experimental_streamSteps, showSteps: req.body.__experimental_streamSteps,
showLLMUsage: req.body.__experimental_llmUsage,
}); });
if (Sentry.isInitialized()) { if (Sentry.isInitialized()) {

View File

@ -222,6 +222,7 @@ export const extractV1Options = z
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
urlTrace: z.boolean().default(false), urlTrace: z.boolean().default(false),
__experimental_streamSteps: z.boolean().default(false), __experimental_streamSteps: z.boolean().default(false),
__experimental_llmUsage: z.boolean().default(false),
timeout: z.number().int().positive().finite().safe().default(60000), timeout: z.number().int().positive().finite().safe().default(60000),
}) })
.strict(strictMessage); .strict(strictMessage);
@ -840,3 +841,12 @@ export type SearchResponse =
warning?: string; warning?: string;
data: Document[]; data: Document[];
}; };
export type TokenUsage = {
promptTokens: number;
completionTokens: number;
totalTokens: number;
step?: string;
model?: string;
};

View File

@ -30,6 +30,8 @@ export type StoredExtract = {
error?: any; error?: any;
showSteps?: boolean; showSteps?: boolean;
steps?: ExtractedStep[]; steps?: ExtractedStep[];
showLLMUsage?: boolean;
llmUsage?: number;
}; };
export async function saveExtract(id: string, extract: StoredExtract) { export async function saveExtract(id: string, extract: StoredExtract) {

View File

@ -1,6 +1,7 @@
import { import {
Document, Document,
ExtractRequest, ExtractRequest,
TokenUsage,
toLegacyCrawlerOptions, toLegacyCrawlerOptions,
URLTrace, URLTrace,
} from "../../controllers/v1/types"; } from "../../controllers/v1/types";
@ -31,6 +32,7 @@ import { ExtractStep, updateExtract } from "./extract-redis";
import { deduplicateObjectsArray } from "./helpers/deduplicate-objs-array"; import { deduplicateObjectsArray } from "./helpers/deduplicate-objs-array";
import { mergeNullValObjs } from "./helpers/merge-null-val-objs"; import { mergeNullValObjs } from "./helpers/merge-null-val-objs";
import { CUSTOM_U_TEAMS } from "./config"; import { CUSTOM_U_TEAMS } from "./config";
import { estimateCost, estimateTotalCost } from "./usage/llm-cost";
interface ExtractServiceOptions { interface ExtractServiceOptions {
request: ExtractRequest; request: ExtractRequest;
@ -46,6 +48,8 @@ interface ExtractResult {
warning?: string; warning?: string;
urlTrace?: URLTrace[]; urlTrace?: URLTrace[];
error?: string; error?: string;
tokenUsageBreakdown?: TokenUsage[];
llmUsage?: number;
} }
async function analyzeSchemaAndPrompt( async function analyzeSchemaAndPrompt(
@ -57,6 +61,7 @@ async function analyzeSchemaAndPrompt(
multiEntityKeys: string[]; multiEntityKeys: string[];
reasoning?: string; reasoning?: string;
keyIndicators?: string[]; keyIndicators?: string[];
tokenUsage: TokenUsage;
}> { }> {
if (!schema) { if (!schema) {
schema = await generateSchemaFromPrompt(prompt); schema = await generateSchemaFromPrompt(prompt);
@ -71,8 +76,10 @@ async function analyzeSchemaAndPrompt(
keyIndicators: z.array(z.string()), keyIndicators: z.array(z.string()),
}); });
const model = "gpt-4o";
const result = await openai.beta.chat.completions.parse({ const result = await openai.beta.chat.completions.parse({
model: "gpt-4o", model: model,
messages: [ messages: [
{ {
role: "system", role: "system",
@ -131,12 +138,20 @@ Schema: ${schemaString}\nPrompt: ${prompt}\nRelevant URLs: ${urls}`,
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } = const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
checkSchema.parse(result.choices[0].message.parsed); checkSchema.parse(result.choices[0].message.parsed);
return { isMultiEntity, multiEntityKeys, reasoning, keyIndicators };
const tokenUsage: TokenUsage = {
promptTokens: result.usage?.prompt_tokens ?? 0,
completionTokens: result.usage?.completion_tokens ?? 0,
totalTokens: result.usage?.total_tokens ?? 0,
model: model,
};
return { isMultiEntity, multiEntityKeys, reasoning, keyIndicators, tokenUsage };
} }
type completions = { type completions = {
extract: Record<string, any>; extract: Record<string, any>;
numTokens: number; numTokens: number;
totalUsage: TokenUsage;
warning?: string; warning?: string;
}; };
@ -164,6 +179,10 @@ export async function performExtraction(
let multiEntityResult: any = {}; let multiEntityResult: any = {};
let singleAnswerResult: any = {}; let singleAnswerResult: any = {};
// Token tracking
let tokenUsage: TokenUsage[] = [];
await updateExtract(extractId, { await updateExtract(extractId, {
status: "processing", status: "processing",
steps: [ steps: [
@ -249,9 +268,12 @@ export async function performExtraction(
// 1. the first one is a completion that will extract the array of items // 1. the first one is a completion that will extract the array of items
// 2. the second one is multiple completions that will extract the items from the array // 2. the second one is multiple completions that will extract the items from the array
let startAnalyze = Date.now(); let startAnalyze = Date.now();
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } = const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators, tokenUsage: schemaAnalysisTokenUsage } =
await analyzeSchemaAndPrompt(links, reqSchema, request.prompt ?? ""); await analyzeSchemaAndPrompt(links, reqSchema, request.prompt ?? "");
// Track schema analysis tokens
tokenUsage.push(schemaAnalysisTokenUsage);
// console.log("\nIs Multi Entity:", isMultiEntity); // console.log("\nIs Multi Entity:", isMultiEntity);
// console.log("\nMulti Entity Keys:", multiEntityKeys); // console.log("\nMulti Entity Keys:", multiEntityKeys);
// console.log("\nReasoning:", reasoning); // console.log("\nReasoning:", reasoning);
@ -376,6 +398,8 @@ export async function performExtraction(
true, true,
); );
tokenUsage.push(shouldExtractCheck.totalUsage);
if (!shouldExtractCheck.extract["extract"]) { if (!shouldExtractCheck.extract["extract"]) {
console.log( console.log(
`Skipping extraction for ${doc.metadata.url} as content is irrelevant`, `Skipping extraction for ${doc.metadata.url} as content is irrelevant`,
@ -438,6 +462,11 @@ export async function performExtraction(
timeoutPromise, timeoutPromise,
])) as Awaited<ReturnType<typeof generateOpenAICompletions>>; ])) as Awaited<ReturnType<typeof generateOpenAICompletions>>;
// Track multi-entity extraction tokens
if (multiEntityCompletion) {
tokenUsage.push(multiEntityCompletion.totalUsage);
}
// console.log(multiEntityCompletion.extract) // console.log(multiEntityCompletion.extract)
// if (!multiEntityCompletion.extract?.is_content_relevant) { // if (!multiEntityCompletion.extract?.is_content_relevant) {
// console.log(`Skipping extraction for ${doc.metadata.url} as content is not relevant`); // console.log(`Skipping extraction for ${doc.metadata.url} as content is not relevant`);
@ -603,6 +632,11 @@ export async function performExtraction(
true, true,
); );
// Track single answer extraction tokens
if (singleAnswerCompletions) {
tokenUsage.push(singleAnswerCompletions.totalUsage);
}
singleAnswerResult = singleAnswerCompletions.extract; singleAnswerResult = singleAnswerCompletions.extract;
// Update token usage in traces // Update token usage in traces
@ -641,7 +675,10 @@ export async function performExtraction(
); );
}); });
// Log job const totalTokensUsed = tokenUsage.reduce((a, b) => a + b.totalTokens, 0);
const llmUsage = estimateTotalCost(tokenUsage);
// Log job with token usage
logJob({ logJob({
job_id: extractId, job_id: extractId,
success: true, success: true,
@ -654,10 +691,11 @@ export async function performExtraction(
url: request.urls.join(", "), url: request.urls.join(", "),
scrapeOptions: request, scrapeOptions: request,
origin: request.origin ?? "api", origin: request.origin ?? "api",
num_tokens: 0, // completions?.numTokens ?? 0, num_tokens: totalTokensUsed,
}).then(() => { }).then(() => {
updateExtract(extractId, { updateExtract(extractId, {
status: "completed", status: "completed",
llmUsage,
}).catch((error) => { }).catch((error) => {
logger.error( logger.error(
`Failed to update extract ${extractId} status to completed: ${error}`, `Failed to update extract ${extractId} status to completed: ${error}`,
@ -671,5 +709,7 @@ export async function performExtraction(
extractId, extractId,
warning: undefined, // TODO FIX warning: undefined, // TODO FIX
urlTrace: request.urlTrace ? urlTraces : undefined, urlTrace: request.urlTrace ? urlTraces : undefined,
llmUsage,
}; };
} }

View File

@ -150,16 +150,21 @@ function filterAndProcessLinks(
); );
} }
export type RerankerResult = {
mapDocument: MapDocument[];
tokensUsed: number;
}
export async function rerankLinksWithLLM( export async function rerankLinksWithLLM(
mappedLinks: MapDocument[], mappedLinks: MapDocument[],
searchQuery: string, searchQuery: string,
urlTraces: URLTrace[], urlTraces: URLTrace[],
): Promise<MapDocument[]> { ): Promise<RerankerResult> {
const chunkSize = 100; const chunkSize = 100;
const chunks: MapDocument[][] = []; const chunks: MapDocument[][] = [];
const TIMEOUT_MS = 20000; const TIMEOUT_MS = 20000;
const MAX_RETRIES = 2; const MAX_RETRIES = 2;
let totalTokensUsed = 0;
// Split mappedLinks into chunks of 200 // Split mappedLinks into chunks of 200
for (let i = 0; i < mappedLinks.length; i += chunkSize) { for (let i = 0; i < mappedLinks.length; i += chunkSize) {
@ -225,6 +230,7 @@ export async function rerankLinksWithLLM(
return []; return [];
} }
totalTokensUsed += completion.numTokens || 0;
// console.log(`Chunk ${chunkIndex + 1}: Found ${completion.extract.relevantLinks.length} relevant links`); // console.log(`Chunk ${chunkIndex + 1}: Found ${completion.extract.relevantLinks.length} relevant links`);
return completion.extract.relevantLinks; return completion.extract.relevantLinks;
@ -252,5 +258,8 @@ export async function rerankLinksWithLLM(
.filter((link): link is MapDocument => link !== undefined); .filter((link): link is MapDocument => link !== undefined);
// console.log(`Returning ${relevantLinks.length} relevant links`); // console.log(`Returning ${relevantLinks.length} relevant links`);
return relevantLinks; return {
mapDocument: relevantLinks,
tokensUsed: totalTokensUsed,
};
} }

View File

@ -199,15 +199,19 @@ export async function processUrl(
// (link, index) => `${index + 1}. URL: ${link.url}, Title: ${link.title}, Description: ${link.description}` // (link, index) => `${index + 1}. URL: ${link.url}, Title: ${link.title}, Description: ${link.description}`
// ); // );
mappedLinks = await rerankLinksWithLLM(mappedLinks, searchQuery, urlTraces); const rerankerResult = await rerankLinksWithLLM(mappedLinks, searchQuery, urlTraces);
mappedLinks = rerankerResult.mapDocument;
let tokensUsed = rerankerResult.tokensUsed;
// 2nd Pass, useful for when the first pass returns too many links // 2nd Pass, useful for when the first pass returns too many links
if (mappedLinks.length > 100) { if (mappedLinks.length > 100) {
mappedLinks = await rerankLinksWithLLM( const rerankerResult = await rerankLinksWithLLM(
mappedLinks, mappedLinks,
searchQuery, searchQuery,
urlTraces, urlTraces,
); );
mappedLinks = rerankerResult.mapDocument;
tokensUsed += rerankerResult.tokensUsed;
} }
// dumpToFile( // dumpToFile(

View File

@ -0,0 +1,53 @@
import { TokenUsage } from "../../../controllers/v1/types";
import { logger } from "../../../lib/logger";
import { modelPrices } from "./model-prices";
interface ModelPricing {
input_cost_per_token?: number;
output_cost_per_token?: number;
input_cost_per_request?: number;
mode: string;
}
export function estimateTotalCost(tokenUsage: TokenUsage[]): number {
return tokenUsage.reduce((total, usage) => {
return total + estimateCost(usage);
}, 0);
}
export function estimateCost(tokenUsage: TokenUsage): number {
let totalCost = 0;
try {
let model = tokenUsage.model ?? process.env.MODEL_NAME ?? "gpt-4o-mini";
const pricing = modelPrices[model] as ModelPricing;
if (!pricing) {
logger.error(`No pricing information found for model: ${model}`);
return 0;
}
if (pricing.mode !== "chat") {
logger.error(`Model ${model} is not a chat model`);
return 0;
}
// Add per-request cost if applicable (Only Perplexity supports this)
if (pricing.input_cost_per_request) {
totalCost += pricing.input_cost_per_request;
}
// Add token-based costs
if (pricing.input_cost_per_token) {
totalCost += tokenUsage.promptTokens * pricing.input_cost_per_token;
}
if (pricing.output_cost_per_token) {
totalCost += tokenUsage.completionTokens * pricing.output_cost_per_token;
}
return Number(totalCost.toFixed(7));
} catch (error) {
logger.error(`Error estimating cost: ${error}`);
return totalCost;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
import OpenAI from "openai"; import OpenAI from "openai";
import { encoding_for_model } from "@dqbd/tiktoken"; import { encoding_for_model } from "@dqbd/tiktoken";
import { TiktokenModel } from "@dqbd/tiktoken"; import { TiktokenModel } from "@dqbd/tiktoken";
import { Document, ExtractOptions } from "../../../controllers/v1/types"; import { Document, ExtractOptions, TokenUsage } from "../../../controllers/v1/types";
import { Logger } from "winston"; import { Logger } from "winston";
import { EngineResultsTracker, Meta } from ".."; import { EngineResultsTracker, Meta } from "..";
import { logger } from "../../../lib/logger"; import { logger } from "../../../lib/logger";
@ -72,7 +72,7 @@ export async function generateOpenAICompletions(
markdown?: string, markdown?: string,
previousWarning?: string, previousWarning?: string,
isExtractEndpoint?: boolean, isExtractEndpoint?: boolean,
): Promise<{ extract: any; numTokens: number; warning: string | undefined }> { ): Promise<{ extract: any; numTokens: number; warning: string | undefined; totalUsage: TokenUsage }> {
let extract: any; let extract: any;
let warning: string | undefined; let warning: string | undefined;
@ -208,6 +208,9 @@ export async function generateOpenAICompletions(
} }
} }
const promptTokens = (jsonCompletion.usage?.prompt_tokens ?? 0);
const completionTokens = (jsonCompletion.usage?.completion_tokens ?? 0);
// If the users actually wants the items object, they can specify it as 'required' in the schema // If the users actually wants the items object, they can specify it as 'required' in the schema
// otherwise, we just return the items array // otherwise, we just return the items array
if ( if (
@ -217,7 +220,9 @@ export async function generateOpenAICompletions(
) { ) {
extract = extract?.items; extract = extract?.items;
} }
return { extract, warning, numTokens }; // num tokens (just user prompt tokenized) | deprecated
// totalTokens = promptTokens + completionTokens
return { extract, warning, numTokens, totalUsage: { promptTokens, completionTokens, totalTokens: promptTokens + completionTokens, model: model } };
} }
export async function performLLMExtract( export async function performLLMExtract(