(feat/extract) Improved completions to use model's limits (#1109)

* Update analyzeSchemaAndPrompt.ts

* Nick: fixes FIR-663

* Update llmExtract.ts

* Update llmExtract.ts
This commit is contained in:
Nicolas 2025-01-29 12:37:14 -03:00 committed by GitHub
parent cf8f7d0ce3
commit fa99c62f64
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 34 additions and 10 deletions

View File

@ -57,10 +57,11 @@ export const extractOptions = z
schema: z.any().optional(),
systemPrompt: z
.string()
.max(10000)
.default(
"Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required.",
),
prompt: z.string().optional(),
prompt: z.string().max(10000).optional(),
})
.strict(strictMessage);
@ -201,8 +202,8 @@ export const extractV1Options = z
urls: url
.array()
.max(10, "Maximum of 10 URLs allowed per request while in beta."),
prompt: z.string().optional(),
systemPrompt: z.string().optional(),
prompt: z.string().max(10000).optional(),
systemPrompt: z.string().max(10000).optional(),
schema: z
.any()
.optional()

View File

@ -9,9 +9,25 @@ import {
import { Logger } from "winston";
import { EngineResultsTracker, Meta } from "..";
import { logger } from "../../../lib/logger";
import { modelPrices } from "../../../lib/extract/usage/model-prices";
const maxTokens = 32000;
const modifier = 4;
// Get max tokens from model prices
const getModelLimits = (model: string) => {
const modelConfig = modelPrices[model];
if (!modelConfig) {
// Default fallback values
return {
maxInputTokens: 8192,
maxOutputTokens: 4096,
maxTokens: 12288,
};
}
return {
maxInputTokens: modelConfig.max_input_tokens || modelConfig.max_tokens,
maxOutputTokens: modelConfig.max_output_tokens || modelConfig.max_tokens,
maxTokens: modelConfig.max_tokens,
};
};
export class LLMRefusalError extends Error {
public refusal: string;
@ -94,6 +110,13 @@ export async function generateOpenAICompletions(
throw new Error("document.markdown is undefined -- this is unexpected");
}
const { maxInputTokens, maxOutputTokens } = getModelLimits(model);
// Ratio of 4 was way too high, now 3.5.
const modifier = 3.5; // tokens to characters ratio
// Calculate 80% of max input tokens (for content)
const maxTokensSafe = Math.floor(maxInputTokens * 0.8);
// count number of tokens
let numTokens = 0;
const encoder = encoding_for_model(model as TiktokenModel);
@ -106,11 +129,11 @@ export async function generateOpenAICompletions(
} catch (error) {
logger.warn("Calculating num tokens of string failed", { error, markdown });
markdown = markdown.slice(0, maxTokens * modifier);
markdown = markdown.slice(0, maxTokensSafe * modifier);
let w =
"Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" +
maxTokens +
maxTokensSafe +
") we support.";
warning = previousWarning === undefined ? w : w + " " + previousWarning;
} finally {
@ -118,15 +141,15 @@ export async function generateOpenAICompletions(
encoder.free();
}
if (numTokens > maxTokens) {
if (numTokens > maxTokensSafe) {
// trim the document to the maximum number of tokens, tokens != characters
markdown = markdown.slice(0, maxTokens * modifier);
markdown = markdown.slice(0, maxTokensSafe * modifier);
const w =
"The extraction content would have used more tokens (" +
numTokens +
") than the maximum we allow (" +
maxTokens +
maxTokensSafe +
"). -- the input has been automatically trimmed.";
warning = previousWarning === undefined ? w : w + " " + previousWarning;
}