mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 23:58:59 +08:00
(feat/extract) Improved completions to use model's limits (#1109)
* Update analyzeSchemaAndPrompt.ts * Nick: fixes FIR-663 * Update llmExtract.ts * Update llmExtract.ts
This commit is contained in:
parent
cf8f7d0ce3
commit
fa99c62f64
@ -57,10 +57,11 @@ export const extractOptions = z
|
||||
schema: z.any().optional(),
|
||||
systemPrompt: z
|
||||
.string()
|
||||
.max(10000)
|
||||
.default(
|
||||
"Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required.",
|
||||
),
|
||||
prompt: z.string().optional(),
|
||||
prompt: z.string().max(10000).optional(),
|
||||
})
|
||||
.strict(strictMessage);
|
||||
|
||||
@ -201,8 +202,8 @@ export const extractV1Options = z
|
||||
urls: url
|
||||
.array()
|
||||
.max(10, "Maximum of 10 URLs allowed per request while in beta."),
|
||||
prompt: z.string().optional(),
|
||||
systemPrompt: z.string().optional(),
|
||||
prompt: z.string().max(10000).optional(),
|
||||
systemPrompt: z.string().max(10000).optional(),
|
||||
schema: z
|
||||
.any()
|
||||
.optional()
|
||||
|
@ -9,9 +9,25 @@ import {
|
||||
import { Logger } from "winston";
|
||||
import { EngineResultsTracker, Meta } from "..";
|
||||
import { logger } from "../../../lib/logger";
|
||||
import { modelPrices } from "../../../lib/extract/usage/model-prices";
|
||||
|
||||
const maxTokens = 32000;
|
||||
const modifier = 4;
|
||||
// Get max tokens from model prices
|
||||
const getModelLimits = (model: string) => {
|
||||
const modelConfig = modelPrices[model];
|
||||
if (!modelConfig) {
|
||||
// Default fallback values
|
||||
return {
|
||||
maxInputTokens: 8192,
|
||||
maxOutputTokens: 4096,
|
||||
maxTokens: 12288,
|
||||
};
|
||||
}
|
||||
return {
|
||||
maxInputTokens: modelConfig.max_input_tokens || modelConfig.max_tokens,
|
||||
maxOutputTokens: modelConfig.max_output_tokens || modelConfig.max_tokens,
|
||||
maxTokens: modelConfig.max_tokens,
|
||||
};
|
||||
};
|
||||
|
||||
export class LLMRefusalError extends Error {
|
||||
public refusal: string;
|
||||
@ -94,6 +110,13 @@ export async function generateOpenAICompletions(
|
||||
throw new Error("document.markdown is undefined -- this is unexpected");
|
||||
}
|
||||
|
||||
const { maxInputTokens, maxOutputTokens } = getModelLimits(model);
|
||||
|
||||
// Ratio of 4 was way too high, now 3.5.
|
||||
const modifier = 3.5; // tokens to characters ratio
|
||||
// Calculate 80% of max input tokens (for content)
|
||||
const maxTokensSafe = Math.floor(maxInputTokens * 0.8);
|
||||
|
||||
// count number of tokens
|
||||
let numTokens = 0;
|
||||
const encoder = encoding_for_model(model as TiktokenModel);
|
||||
@ -106,11 +129,11 @@ export async function generateOpenAICompletions(
|
||||
} catch (error) {
|
||||
logger.warn("Calculating num tokens of string failed", { error, markdown });
|
||||
|
||||
markdown = markdown.slice(0, maxTokens * modifier);
|
||||
markdown = markdown.slice(0, maxTokensSafe * modifier);
|
||||
|
||||
let w =
|
||||
"Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" +
|
||||
maxTokens +
|
||||
maxTokensSafe +
|
||||
") we support.";
|
||||
warning = previousWarning === undefined ? w : w + " " + previousWarning;
|
||||
} finally {
|
||||
@ -118,15 +141,15 @@ export async function generateOpenAICompletions(
|
||||
encoder.free();
|
||||
}
|
||||
|
||||
if (numTokens > maxTokens) {
|
||||
if (numTokens > maxTokensSafe) {
|
||||
// trim the document to the maximum number of tokens, tokens != characters
|
||||
markdown = markdown.slice(0, maxTokens * modifier);
|
||||
markdown = markdown.slice(0, maxTokensSafe * modifier);
|
||||
|
||||
const w =
|
||||
"The extraction content would have used more tokens (" +
|
||||
numTokens +
|
||||
") than the maximum we allow (" +
|
||||
maxTokens +
|
||||
maxTokensSafe +
|
||||
"). -- the input has been automatically trimmed.";
|
||||
warning = previousWarning === undefined ? w : w + " " + previousWarning;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user