Merge pull request #1068 from mendableai/nsc/llm-usage-extract

(feat/extract) - LLMs usage analysis + billing
This commit is contained in:
Nicolas 2025-01-19 21:36:33 -03:00 committed by GitHub
commit 406f28c04a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 8344 additions and 27 deletions

View File

@ -77,8 +77,9 @@ export async function getACUC(
api_key: string,
cacheOnly = false,
useCache = true,
mode?: RateLimiterMode,
): Promise<AuthCreditUsageChunk | null> {
const cacheKeyACUC = `acuc_${api_key}`;
const cacheKeyACUC = `acuc_${api_key}_${mode}`;
if (useCache) {
const cachedACUC = await getValue(cacheKeyACUC);
@ -93,9 +94,13 @@ export async function getACUC(
let retries = 0;
const maxRetries = 5;
let rpcName =
mode === RateLimiterMode.Extract || mode === RateLimiterMode.ExtractStatus
? "auth_credit_usage_chunk_extract"
: "auth_credit_usage_chunk_test_22_credit_pack_n_extract";
while (retries < maxRetries) {
({ data, error } = await supabase_service.rpc(
"auth_credit_usage_chunk_test_21_credit_pack",
rpcName,
{ input_key: api_key },
{ get: true },
));
@ -127,8 +132,6 @@ export async function getACUC(
setCachedACUC(api_key, chunk);
}
// console.log(chunk);
return chunk;
} else {
return null;
@ -203,7 +206,7 @@ export async function supaAuthenticateUser(
};
}
chunk = await getACUC(normalizedApi);
chunk = await getACUC(normalizedApi, false, true, mode);
if (chunk === null) {
return {
@ -258,6 +261,9 @@ export async function supaAuthenticateUser(
subscriptionData.plan,
);
break;
case RateLimiterMode.ExtractStatus:
rateLimiter = getRateLimiter(RateLimiterMode.ExtractStatus, token);
break;
case RateLimiterMode.CrawlStatus:
rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
break;

View File

@ -37,5 +37,6 @@ export async function extractStatusController(
error: extract?.error ?? undefined,
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
steps: extract.showSteps ? extract.steps : undefined,
llmUsage: extract.showLLMUsage ? extract.llmUsage : undefined,
});
}

View File

@ -71,6 +71,7 @@ export async function extractController(
createdAt: Date.now(),
status: "processing",
showSteps: req.body.__experimental_streamSteps,
showLLMUsage: req.body.__experimental_llmUsage,
});
if (Sentry.isInitialized()) {

View File

@ -226,6 +226,7 @@ export const extractV1Options = z
origin: z.string().optional().default("api"),
urlTrace: z.boolean().default(false),
__experimental_streamSteps: z.boolean().default(false),
__experimental_llmUsage: z.boolean().default(false),
timeout: z.number().int().positive().finite().safe().default(60000),
})
.strict(strictMessage);
@ -881,3 +882,12 @@ export type SearchResponse =
warning?: string;
data: Document[];
};
export type TokenUsage = {
promptTokens: number;
completionTokens: number;
totalTokens: number;
step?: string;
model?: string;
};

View File

@ -30,6 +30,8 @@ export type StoredExtract = {
error?: any;
showSteps?: boolean;
steps?: ExtractedStep[];
showLLMUsage?: boolean;
llmUsage?: number;
};
export async function saveExtract(id: string, extract: StoredExtract) {

View File

@ -1,6 +1,7 @@
import {
Document,
ExtractRequest,
TokenUsage,
toLegacyCrawlerOptions,
URLTrace,
} from "../../controllers/v1/types";
@ -31,6 +32,7 @@ import { ExtractStep, updateExtract } from "./extract-redis";
import { deduplicateObjectsArray } from "./helpers/deduplicate-objs-array";
import { mergeNullValObjs } from "./helpers/merge-null-val-objs";
import { CUSTOM_U_TEAMS } from "./config";
import { calculateFinalResultCost, estimateCost, estimateTotalCost } from "./usage/llm-cost";
interface ExtractServiceOptions {
request: ExtractRequest;
@ -46,6 +48,9 @@ interface ExtractResult {
warning?: string;
urlTrace?: URLTrace[];
error?: string;
tokenUsageBreakdown?: TokenUsage[];
llmUsage?: number;
totalUrlsScraped?: number;
}
async function analyzeSchemaAndPrompt(
@ -57,6 +62,7 @@ async function analyzeSchemaAndPrompt(
multiEntityKeys: string[];
reasoning?: string;
keyIndicators?: string[];
tokenUsage: TokenUsage;
}> {
if (!schema) {
schema = await generateSchemaFromPrompt(prompt);
@ -71,8 +77,10 @@ async function analyzeSchemaAndPrompt(
keyIndicators: z.array(z.string()),
});
const model = "gpt-4o";
const result = await openai.beta.chat.completions.parse({
model: "gpt-4o",
model: model,
messages: [
{
role: "system",
@ -131,12 +139,20 @@ Schema: ${schemaString}\nPrompt: ${prompt}\nRelevant URLs: ${urls}`,
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
checkSchema.parse(result.choices[0].message.parsed);
return { isMultiEntity, multiEntityKeys, reasoning, keyIndicators };
const tokenUsage: TokenUsage = {
promptTokens: result.usage?.prompt_tokens ?? 0,
completionTokens: result.usage?.completion_tokens ?? 0,
totalTokens: result.usage?.total_tokens ?? 0,
model: model,
};
return { isMultiEntity, multiEntityKeys, reasoning, keyIndicators, tokenUsage };
}
type completions = {
extract: Record<string, any>;
numTokens: number;
totalUsage: TokenUsage;
warning?: string;
};
@ -163,6 +179,11 @@ export async function performExtraction(
let multiEntityCompletions: completions[] = [];
let multiEntityResult: any = {};
let singleAnswerResult: any = {};
let totalUrlsScraped = 0;
// Token tracking
let tokenUsage: TokenUsage[] = [];
await updateExtract(extractId, {
status: "processing",
@ -219,6 +240,7 @@ export async function performExtraction(
"No valid URLs found to scrape. Try adjusting your search criteria or including more URLs.",
extractId,
urlTrace: urlTraces,
totalUrlsScraped: 0
};
}
@ -249,9 +271,12 @@ export async function performExtraction(
// 1. the first one is a completion that will extract the array of items
// 2. the second one is multiple completions that will extract the items from the array
let startAnalyze = Date.now();
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators, tokenUsage: schemaAnalysisTokenUsage } =
await analyzeSchemaAndPrompt(links, reqSchema, request.prompt ?? "");
// Track schema analysis tokens
tokenUsage.push(schemaAnalysisTokenUsage);
// console.log("\nIs Multi Entity:", isMultiEntity);
// console.log("\nMulti Entity Keys:", multiEntityKeys);
// console.log("\nReasoning:", reasoning);
@ -312,6 +337,8 @@ export async function performExtraction(
(doc): doc is Document => doc !== null,
);
totalUrlsScraped += multyEntityDocs.length;
let endScrape = Date.now();
await updateExtract(extractId, {
@ -376,6 +403,8 @@ export async function performExtraction(
true,
);
tokenUsage.push(shouldExtractCheck.totalUsage);
if (!shouldExtractCheck.extract["extract"]) {
console.log(
`Skipping extraction for ${doc.metadata.url} as content is irrelevant`,
@ -438,6 +467,11 @@ export async function performExtraction(
timeoutPromise,
])) as Awaited<ReturnType<typeof generateOpenAICompletions>>;
// Track multi-entity extraction tokens
if (multiEntityCompletion) {
tokenUsage.push(multiEntityCompletion.totalUsage);
}
// console.log(multiEntityCompletion.extract)
// if (!multiEntityCompletion.extract?.is_content_relevant) {
// console.log(`Skipping extraction for ${doc.metadata.url} as content is not relevant`);
@ -500,6 +534,7 @@ export async function performExtraction(
"An unexpected error occurred. Please contact help@firecrawl.com for help.",
extractId,
urlTrace: urlTraces,
totalUrlsScraped
};
}
}
@ -551,15 +586,17 @@ export async function performExtraction(
}
}
singleAnswerDocs.push(
...results.filter((doc): doc is Document => doc !== null),
);
const validResults = results.filter((doc): doc is Document => doc !== null);
singleAnswerDocs.push(...validResults);
totalUrlsScraped += validResults.length;
} catch (error) {
return {
success: false,
error: error.message,
extractId,
urlTrace: urlTraces,
totalUrlsScraped
};
}
@ -571,6 +608,7 @@ export async function performExtraction(
"All provided URLs are invalid. Please check your input and try again.",
extractId,
urlTrace: request.urlTrace ? urlTraces : undefined,
totalUrlsScraped: 0
};
}
@ -603,6 +641,11 @@ export async function performExtraction(
true,
);
// Track single answer extraction tokens
if (singleAnswerCompletions) {
tokenUsage.push(singleAnswerCompletions.totalUsage);
}
singleAnswerResult = singleAnswerCompletions.extract;
// Update token usage in traces
@ -629,19 +672,24 @@ export async function performExtraction(
? await mixSchemaObjects(reqSchema, singleAnswerResult, multiEntityResult)
: singleAnswerResult || multiEntityResult;
let linksBilled = links.length * 5;
const totalTokensUsed = tokenUsage.reduce((a, b) => a + b.totalTokens, 0);
const llmUsage = estimateTotalCost(tokenUsage);
let tokensToBill = calculateFinalResultCost(finalResult);
if (CUSTOM_U_TEAMS.includes(teamId)) {
linksBilled = 1;
tokensToBill = 1;
}
// Bill team for usage
billTeam(teamId, subId, linksBilled).catch((error) => {
billTeam(teamId, subId, tokensToBill, logger, true).catch((error) => {
logger.error(
`Failed to bill team ${teamId} for ${linksBilled} credits: ${error}`,
`Failed to bill team ${teamId} for ${tokensToBill} tokens: ${error}`,
);
});
// Log job
// Log job with token usage
logJob({
job_id: extractId,
success: true,
@ -654,10 +702,12 @@ export async function performExtraction(
url: request.urls.join(", "),
scrapeOptions: request,
origin: request.origin ?? "api",
num_tokens: 0, // completions?.numTokens ?? 0,
num_tokens: totalTokensUsed,
tokens_billed: tokensToBill,
}).then(() => {
updateExtract(extractId, {
status: "completed",
llmUsage,
}).catch((error) => {
logger.error(
`Failed to update extract ${extractId} status to completed: ${error}`,
@ -671,5 +721,7 @@ export async function performExtraction(
extractId,
warning: undefined, // TODO FIX
urlTrace: request.urlTrace ? urlTraces : undefined,
llmUsage,
totalUrlsScraped
};
}

View File

@ -150,16 +150,21 @@ function filterAndProcessLinks(
);
}
export type RerankerResult = {
mapDocument: MapDocument[];
tokensUsed: number;
}
export async function rerankLinksWithLLM(
mappedLinks: MapDocument[],
searchQuery: string,
urlTraces: URLTrace[],
): Promise<MapDocument[]> {
): Promise<RerankerResult> {
const chunkSize = 100;
const chunks: MapDocument[][] = [];
const TIMEOUT_MS = 20000;
const MAX_RETRIES = 2;
let totalTokensUsed = 0;
// Split mappedLinks into chunks of 200
for (let i = 0; i < mappedLinks.length; i += chunkSize) {
@ -225,6 +230,7 @@ export async function rerankLinksWithLLM(
return [];
}
totalTokensUsed += completion.numTokens || 0;
// console.log(`Chunk ${chunkIndex + 1}: Found ${completion.extract.relevantLinks.length} relevant links`);
return completion.extract.relevantLinks;
@ -252,5 +258,8 @@ export async function rerankLinksWithLLM(
.filter((link): link is MapDocument => link !== undefined);
// console.log(`Returning ${relevantLinks.length} relevant links`);
return relevantLinks;
}
return {
mapDocument: relevantLinks,
tokensUsed: totalTokensUsed,
};
}

View File

@ -199,15 +199,19 @@ export async function processUrl(
// (link, index) => `${index + 1}. URL: ${link.url}, Title: ${link.title}, Description: ${link.description}`
// );
mappedLinks = await rerankLinksWithLLM(mappedLinks, searchQuery, urlTraces);
const rerankerResult = await rerankLinksWithLLM(mappedLinks, searchQuery, urlTraces);
mappedLinks = rerankerResult.mapDocument;
let tokensUsed = rerankerResult.tokensUsed;
// 2nd Pass, useful for when the first pass returns too many links
if (mappedLinks.length > 100) {
mappedLinks = await rerankLinksWithLLM(
const rerankerResult = await rerankLinksWithLLM(
mappedLinks,
searchQuery,
urlTraces,
);
mappedLinks = rerankerResult.mapDocument;
tokensUsed += rerankerResult.tokensUsed;
}
// dumpToFile(

View File

@ -0,0 +1,59 @@
import { TokenUsage } from "../../../controllers/v1/types";
import { logger } from "../../../lib/logger";
import { modelPrices } from "./model-prices";
interface ModelPricing {
input_cost_per_token?: number;
output_cost_per_token?: number;
input_cost_per_request?: number;
mode: string;
}
const tokenPerCharacter = 4;
const baseTokenCost = 300;
export function calculateFinalResultCost(data: any): number {
return Math.floor((JSON.stringify(data).length / tokenPerCharacter) + baseTokenCost);
}
export function estimateTotalCost(tokenUsage: TokenUsage[]): number {
return tokenUsage.reduce((total, usage) => {
return total + estimateCost(usage);
}, 0);
}
export function estimateCost(tokenUsage: TokenUsage): number {
let totalCost = 0;
try {
let model = tokenUsage.model ?? process.env.MODEL_NAME ?? "gpt-4o-mini";
const pricing = modelPrices[model] as ModelPricing;
if (!pricing) {
logger.error(`No pricing information found for model: ${model}`);
return 0;
}
if (pricing.mode !== "chat") {
logger.error(`Model ${model} is not a chat model`);
return 0;
}
// Add per-request cost if applicable (Only Perplexity supports this)
if (pricing.input_cost_per_request) {
totalCost += pricing.input_cost_per_request;
}
// Add token-based costs
if (pricing.input_cost_per_token) {
totalCost += tokenUsage.promptTokens * pricing.input_cost_per_token;
}
if (pricing.output_cost_per_token) {
totalCost += tokenUsage.completionTokens * pricing.output_cost_per_token;
}
return Number(totalCost.toFixed(7));
} catch (error) {
logger.error(`Error estimating cost: ${error}`);
return totalCost;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -228,7 +228,7 @@ v1Router.post(
v1Router.get(
"/extract/:jobId",
authMiddleware(RateLimiterMode.CrawlStatus),
authMiddleware(RateLimiterMode.ExtractStatus),
wrap(extractStatusController),
);

View File

@ -1,7 +1,7 @@
import OpenAI from "openai";
import { encoding_for_model } from "@dqbd/tiktoken";
import { TiktokenModel } from "@dqbd/tiktoken";
import { Document, ExtractOptions } from "../../../controllers/v1/types";
import { Document, ExtractOptions, TokenUsage } from "../../../controllers/v1/types";
import { Logger } from "winston";
import { EngineResultsTracker, Meta } from "..";
import { logger } from "../../../lib/logger";
@ -72,7 +72,7 @@ export async function generateOpenAICompletions(
markdown?: string,
previousWarning?: string,
isExtractEndpoint?: boolean,
): Promise<{ extract: any; numTokens: number; warning: string | undefined }> {
): Promise<{ extract: any; numTokens: number; warning: string | undefined; totalUsage: TokenUsage }> {
let extract: any;
let warning: string | undefined;
@ -208,6 +208,9 @@ export async function generateOpenAICompletions(
}
}
const promptTokens = (jsonCompletion.usage?.prompt_tokens ?? 0);
const completionTokens = (jsonCompletion.usage?.completion_tokens ?? 0);
// If the users actually wants the items object, they can specify it as 'required' in the schema
// otherwise, we just return the items array
if (
@ -217,7 +220,9 @@ export async function generateOpenAICompletions(
) {
extract = extract?.items;
}
return { extract, warning, numTokens };
// num tokens (just user prompt tokenized) | deprecated
// totalTokens = promptTokens + completionTokens
return { extract, warning, numTokens, totalUsage: { promptTokens, completionTokens, totalTokens: promptTokens + completionTokens, model: model } };
}
export async function performLLMExtract(
@ -282,6 +287,22 @@ Consider:
3. Appropriate data types for each field
4. Nested objects and arrays where appropriate
Valid JSON schema, has to be simple. No crazy properties. OpenAI has to support it.
Supported types
The following types are supported for Structured Outputs:
String
Number
Boolean
Integer
Object
Array
Enum
anyOf
Formats are not supported. Min/max are not supported. Anything beyond the above is not supported. Keep it simple with types and descriptions.
Optionals are not supported.
Keep it simple. Don't create too many properties, just the ones that are needed. Don't invent properties.
Return a valid JSON schema object with properties that would capture the information requested in the prompt.`,
},
{

View File

@ -23,12 +23,14 @@ export async function billTeam(
subscription_id: string | null | undefined,
credits: number,
logger?: Logger,
is_extract: boolean = false,
) {
return withAuth(supaBillTeam, { success: true, message: "No DB, bypassed." })(
team_id,
subscription_id,
credits,
logger,
is_extract,
);
}
export async function supaBillTeam(
@ -36,6 +38,7 @@ export async function supaBillTeam(
subscription_id: string | null | undefined,
credits: number,
__logger?: Logger,
is_extract: boolean = false,
) {
const _logger = (__logger ?? logger).child({
module: "credit_billing",
@ -50,11 +53,12 @@ export async function supaBillTeam(
credits,
});
const { data, error } = await supabase_service.rpc("bill_team", {
const { data, error } = await supabase_service.rpc("bill_team_w_extract_3", {
_team_id: team_id,
sub_id: subscription_id ?? null,
fetch_subscription: subscription_id === undefined,
credits,
is_extract_param: is_extract,
});
if (error) {

View File

@ -59,6 +59,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
num_tokens: job.num_tokens,
retry: !!job.retry,
crawl_id: job.crawl_id,
tokens_billed: job.tokens_billed,
};
if (force) {
@ -128,6 +129,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
origin: job.origin,
num_tokens: job.num_tokens,
retry: job.retry,
tokens_billed: job.tokens_billed,
},
};
if (job.mode !== "single_urls") {

View File

@ -100,6 +100,10 @@ const RATE_LIMITS = {
free: 500,
default: 5000,
},
extractStatus: {
free: 500,
default: 5000,
},
testSuite: {
free: 10000,
default: 10000,

View File

@ -87,6 +87,7 @@ export interface FirecrawlJob {
num_tokens?: number;
retry?: boolean;
crawl_id?: string;
tokens_billed?: number;
}
export interface FirecrawlScrapeResponse {
@ -133,6 +134,7 @@ export enum RateLimiterMode {
Search = "search",
Map = "map",
Extract = "extract",
ExtractStatus = "extractStatus",
}
export type AuthResponse =