mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 03:06:00 +08:00
(fix/token-slicer) Fixes extract token limit issues (#1236)
* Nick: fixes extract token limit errors * Update llmExtract.ts * Update llmExtract.ts
This commit is contained in:
parent
76e1f29ae8
commit
5ab86b8b43
@ -5,8 +5,7 @@ import {
|
|||||||
DeepResearchSource,
|
DeepResearchSource,
|
||||||
updateDeepResearch,
|
updateDeepResearch,
|
||||||
} from "./deep-research-redis";
|
} from "./deep-research-redis";
|
||||||
import { generateCompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
|
import { generateCompletions, trimToTokenLimit } from "../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import { truncateText } from "../../scraper/scrapeURL/transformers/llmExtract";
|
|
||||||
|
|
||||||
interface AnalysisResult {
|
interface AnalysisResult {
|
||||||
gaps: string[];
|
gaps: string[];
|
||||||
@ -178,7 +177,7 @@ export class ResearchLLMService {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
prompt: `Generate a list of 3-5 search queries to deeply research this topic: "${topic}"
|
prompt: `Generate a list of 3-5 search queries to deeply research this topic: "${topic}"
|
||||||
${findings.length > 0 ? `\nBased on these previous findings, generate more specific queries:\n${truncateText(findings.map((f) => `- ${f.text}`).join("\n"), 10000)}` : ""}
|
${findings.length > 0 ? `\nBased on these previous findings, generate more specific queries:\n${trimToTokenLimit(findings.map((f) => `- ${f.text}`).join("\n"), 10000).text}` : ""}
|
||||||
|
|
||||||
Each query should be specific and focused on a particular aspect.
|
Each query should be specific and focused on a particular aspect.
|
||||||
Build upon previous findings when available.
|
Build upon previous findings when available.
|
||||||
@ -225,7 +224,7 @@ export class ResearchLLMService {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
prompt: truncateText(
|
prompt: trimToTokenLimit(
|
||||||
`You are researching: ${currentTopic}
|
`You are researching: ${currentTopic}
|
||||||
You have ${timeRemainingMinutes} minutes remaining to complete the research but you don't need to use all of it.
|
You have ${timeRemainingMinutes} minutes remaining to complete the research but you don't need to use all of it.
|
||||||
Current findings: ${findings.map((f) => `[From ${f.source}]: ${f.text}`).join("\n")}
|
Current findings: ${findings.map((f) => `[From ${f.source}]: ${f.text}`).join("\n")}
|
||||||
@ -234,7 +233,7 @@ export class ResearchLLMService {
|
|||||||
Important: If less than 1 minute remains, set shouldContinue to false to allow time for final synthesis.
|
Important: If less than 1 minute remains, set shouldContinue to false to allow time for final synthesis.
|
||||||
If I have enough information, set shouldContinue to false.`,
|
If I have enough information, set shouldContinue to false.`,
|
||||||
120000,
|
120000,
|
||||||
),
|
).text,
|
||||||
},
|
},
|
||||||
markdown: "",
|
markdown: "",
|
||||||
});
|
});
|
||||||
@ -266,7 +265,7 @@ export class ResearchLLMService {
|
|||||||
report: { type: "string" },
|
report: { type: "string" },
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
prompt: truncateText(
|
prompt: trimToTokenLimit(
|
||||||
`Create a comprehensive research report on "${topic}" based on the collected findings and analysis.
|
`Create a comprehensive research report on "${topic}" based on the collected findings and analysis.
|
||||||
|
|
||||||
Research data:
|
Research data:
|
||||||
@ -281,7 +280,7 @@ export class ResearchLLMService {
|
|||||||
- Cite sources
|
- Cite sources
|
||||||
- Use bullet points and lists where appropriate for readability`,
|
- Use bullet points and lists where appropriate for readability`,
|
||||||
100000,
|
100000,
|
||||||
),
|
).text,
|
||||||
},
|
},
|
||||||
markdown: "",
|
markdown: "",
|
||||||
});
|
});
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import { removeDefaultProperty } from "./llmExtract";
|
import { removeDefaultProperty } from "./llmExtract";
|
||||||
import { truncateText } from "./llmExtract";
|
import { trimToTokenLimit } from "./llmExtract";
|
||||||
import { encoding_for_model } from "@dqbd/tiktoken";
|
import { encoding_for_model } from "@dqbd/tiktoken";
|
||||||
|
|
||||||
jest.mock("@dqbd/tiktoken", () => ({
|
jest.mock("@dqbd/tiktoken", () => ({
|
||||||
@ -46,10 +46,13 @@ describe("removeDefaultProperty", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("truncateText", () => {
|
|
||||||
|
describe("trimToTokenLimit", () => {
|
||||||
const mockEncode = jest.fn();
|
const mockEncode = jest.fn();
|
||||||
|
const mockFree = jest.fn();
|
||||||
const mockEncoder = {
|
const mockEncoder = {
|
||||||
encode: mockEncode,
|
encode: mockEncode,
|
||||||
|
free: mockFree,
|
||||||
};
|
};
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
@ -57,84 +60,214 @@ describe("truncateText", () => {
|
|||||||
(encoding_for_model as jest.Mock).mockReturnValue(mockEncoder);
|
(encoding_for_model as jest.Mock).mockReturnValue(mockEncoder);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return the original text if it's within token limit", () => {
|
it("should return original text if within token limit", () => {
|
||||||
const text = "This is a short text";
|
const text = "This is a test text";
|
||||||
mockEncode.mockReturnValue(new Array(5)); // Simulate 5 tokens
|
mockEncode.mockReturnValue(new Array(5)); // Simulate 5 tokens
|
||||||
|
|
||||||
const result = truncateText(text, 10);
|
const result = trimToTokenLimit(text, 10, "gpt-4o");
|
||||||
expect(result).toBe(text);
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
text,
|
||||||
|
numTokens: 5,
|
||||||
|
warning: undefined
|
||||||
|
});
|
||||||
expect(mockEncode).toHaveBeenCalledWith(text);
|
expect(mockEncode).toHaveBeenCalledWith(text);
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should truncate text that exceeds token limit", () => {
|
it("should trim text and return warning when exceeding token limit", () => {
|
||||||
const text = "This is a longer text that needs truncation";
|
const text = "This is a longer text that needs to be trimmed";
|
||||||
mockEncode.mockReturnValue(new Array(20)); // Simulate 20 tokens
|
mockEncode
|
||||||
|
.mockReturnValueOnce(new Array(20)) // First call for full text
|
||||||
|
.mockReturnValueOnce(new Array(8)); // Second call for trimmed text
|
||||||
|
|
||||||
const result = truncateText(text, 10);
|
const result = trimToTokenLimit(text, 10, "gpt-4o");
|
||||||
expect(result.length).toBeLessThan(text.length);
|
|
||||||
expect(mockEncode).toHaveBeenCalled();
|
expect(result.text.length).toBeLessThan(text.length);
|
||||||
|
expect(result.numTokens).toBe(8);
|
||||||
|
expect(result.warning).toContain("automatically trimmed");
|
||||||
|
expect(mockEncode).toHaveBeenCalledTimes(2);
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should handle empty string", () => {
|
it("should append previous warning if provided", () => {
|
||||||
const text = "";
|
const text = "This is a test text that is too long";
|
||||||
mockEncode.mockReturnValue([]);
|
const previousWarning = "Previous warning message";
|
||||||
|
mockEncode
|
||||||
|
.mockReturnValueOnce(new Array(15))
|
||||||
|
.mockReturnValueOnce(new Array(8));
|
||||||
|
|
||||||
const result = truncateText(text, 10);
|
const result = trimToTokenLimit(text, 10, "gpt-4o", previousWarning);
|
||||||
expect(result).toBe("");
|
|
||||||
expect(mockEncode).toHaveBeenCalledWith("");
|
expect(result.warning).toContain("automatically trimmed");
|
||||||
|
expect(result.warning).toContain(previousWarning);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should use character-based fallback when encoder throws error", () => {
|
it("should use fallback approach when encoder throws error", () => {
|
||||||
const text = "This is some text";
|
const text = "This is some text to test fallback";
|
||||||
mockEncode.mockImplementation(() => {
|
mockEncode.mockImplementation(() => {
|
||||||
throw new Error("Encoder error");
|
throw new Error("Encoder error");
|
||||||
});
|
});
|
||||||
|
|
||||||
const result = truncateText(text, 5);
|
const result = trimToTokenLimit(text, 10, "gpt-4o");
|
||||||
// With modifier of 3, should truncate to approximately 15 characters
|
|
||||||
expect(result.length).toBeLessThanOrEqual(15);
|
expect(result.text.length).toBeLessThanOrEqual(30); // 10 tokens * 3 chars per token
|
||||||
|
expect(result.numTokens).toBe(10);
|
||||||
|
expect(result.warning).toContain("Failed to derive number of LLM tokens");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should handle very short max token limits", () => {
|
it("should handle empty text", () => {
|
||||||
|
const text = "";
|
||||||
|
mockEncode.mockReturnValue([]);
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 10, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
text: "",
|
||||||
|
numTokens: 0,
|
||||||
|
warning: undefined
|
||||||
|
});
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle large token limits (128k)", () => {
|
||||||
|
const text = "A".repeat(384000); // Assuming ~3 chars per token, this would be ~128k tokens
|
||||||
|
mockEncode
|
||||||
|
.mockReturnValueOnce(new Array(130000)) // First check shows it's too long
|
||||||
|
.mockReturnValueOnce(new Array(127000)); // Second check shows it's within limit after trim
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 128000, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result.text.length).toBeLessThan(text.length);
|
||||||
|
expect(result.numTokens).toBe(127000);
|
||||||
|
expect(result.warning).toContain("automatically trimmed");
|
||||||
|
expect(mockEncode).toHaveBeenCalledTimes(2);
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle large token limits (512k) with 32k context window", () => {
|
||||||
|
const text = "A".repeat(1536000); // Assuming ~3 chars per token, this would be ~512k tokens
|
||||||
|
mockEncode
|
||||||
|
.mockReturnValueOnce(new Array(520000)) // First check shows it's too long
|
||||||
|
.mockReturnValueOnce(new Array(32000)); // Second check shows it's within context limit after trim
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 32000, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result.text.length).toBeLessThan(text.length);
|
||||||
|
expect(result.numTokens).toBe(32000);
|
||||||
|
expect(result.warning).toContain("automatically trimmed");
|
||||||
|
expect(mockEncode).toHaveBeenCalledTimes(2);
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should preserve text when under token limit", () => {
|
||||||
const text = "Short text";
|
const text = "Short text";
|
||||||
|
mockEncode.mockReturnValue(new Array(5)); // 5 tokens
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 10, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result.text).toBe(text);
|
||||||
|
expect(result.numTokens).toBe(5);
|
||||||
|
expect(result.warning).toBeUndefined();
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should append new warning to previous warning", () => {
|
||||||
|
const text = "A".repeat(300);
|
||||||
|
const previousWarning = "Previous warning message";
|
||||||
|
mockEncode
|
||||||
|
.mockReturnValueOnce(new Array(100))
|
||||||
|
.mockReturnValueOnce(new Array(50));
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 50, "gpt-4o", previousWarning);
|
||||||
|
|
||||||
|
expect(result.warning).toContain("automatically trimmed");
|
||||||
|
expect(result.warning).toContain(previousWarning);
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle encoder initialization failure gracefully", () => {
|
||||||
|
const text = "Sample text";
|
||||||
|
(encoding_for_model as jest.Mock).mockImplementationOnce(() => {
|
||||||
|
throw new Error("Encoder initialization failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 10, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result.text.length).toBeLessThanOrEqual(30); // 10 tokens * 3 chars
|
||||||
|
expect(result.warning).toContain("Failed to derive number of LLM tokens");
|
||||||
|
expect(mockFree).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle encoding errors during trimming", () => {
|
||||||
|
const text = "Sample text";
|
||||||
|
mockEncode.mockImplementation(() => {
|
||||||
|
throw new Error("Encoding failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 10, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result.text.length).toBeLessThanOrEqual(30);
|
||||||
|
expect(result.warning).toContain("Failed to derive number of LLM tokens");
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle very small token limits", () => {
|
||||||
|
const text = "This is a test sentence that should be trimmed significantly";
|
||||||
|
mockEncode
|
||||||
|
.mockReturnValueOnce(new Array(20))
|
||||||
|
.mockReturnValueOnce(new Array(3));
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 3, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result.text.length).toBeLessThan(text.length);
|
||||||
|
expect(result.numTokens).toBe(3);
|
||||||
|
expect(result.warning).toContain("automatically trimmed");
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle unicode characters", () => {
|
||||||
|
const text = "Hello 👋 World 🌍";
|
||||||
|
mockEncode
|
||||||
|
.mockReturnValueOnce(new Array(8))
|
||||||
|
.mockReturnValueOnce(new Array(4));
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 4, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result.text.length).toBeLessThan(text.length);
|
||||||
|
expect(result.numTokens).toBe(4);
|
||||||
|
expect(result.warning).toContain("automatically trimmed");
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle multiple trimming iterations", () => {
|
||||||
|
const text = "A".repeat(1000);
|
||||||
|
mockEncode
|
||||||
|
.mockReturnValueOnce(new Array(300))
|
||||||
|
.mockReturnValueOnce(new Array(200))
|
||||||
|
.mockReturnValueOnce(new Array(100))
|
||||||
|
.mockReturnValueOnce(new Array(50));
|
||||||
|
|
||||||
|
const result = trimToTokenLimit(text, 50, "gpt-4o");
|
||||||
|
|
||||||
|
expect(result.text.length).toBeLessThan(text.length);
|
||||||
|
expect(result.numTokens).toBe(50);
|
||||||
|
expect(result.warning).toContain("automatically trimmed");
|
||||||
|
expect(mockEncode).toHaveBeenCalledTimes(4);
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle exact token limit match", () => {
|
||||||
|
const text = "Exact token limit text";
|
||||||
mockEncode.mockReturnValue(new Array(10));
|
mockEncode.mockReturnValue(new Array(10));
|
||||||
|
|
||||||
const result = truncateText(text, 1);
|
const result = trimToTokenLimit(text, 10, "gpt-4o");
|
||||||
expect(result.length).toBeLessThan(text.length);
|
|
||||||
|
expect(result.text).toBe(text);
|
||||||
|
expect(result.numTokens).toBe(10);
|
||||||
|
expect(result.warning).toBeUndefined();
|
||||||
|
expect(mockFree).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should handle zero max tokens", () => {
|
|
||||||
const text = "Some text";
|
|
||||||
mockEncode.mockReturnValue(new Array(2));
|
|
||||||
|
|
||||||
const result = truncateText(text, 0);
|
|
||||||
expect(result).toBe("");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should handle extremely large text exceeding model context", () => {
|
|
||||||
// Create a very large text (e.g., 100,000 characters)
|
|
||||||
const text = "a".repeat(100000);
|
|
||||||
|
|
||||||
// First call: simulate 25000 tokens
|
|
||||||
mockEncode.mockReturnValueOnce(new Array(25000));
|
|
||||||
// Subsequent calls: simulate gradually decreasing token counts
|
|
||||||
// This simulates the iterative truncation process
|
|
||||||
mockEncode
|
|
||||||
.mockReturnValueOnce(new Array(20000))
|
|
||||||
.mockReturnValueOnce(new Array(15000))
|
|
||||||
.mockReturnValueOnce(new Array(12000))
|
|
||||||
.mockReturnValueOnce(new Array(9000));
|
|
||||||
|
|
||||||
const result = truncateText(text, 10000); // Common model context limit
|
|
||||||
|
|
||||||
// The result should be significantly shorter but not empty
|
|
||||||
expect(result.length).toBeLessThan(text.length);
|
|
||||||
expect(result.length).toBeGreaterThan(0);
|
|
||||||
// Given our new conservative approach, we should have a substantial amount of text
|
|
||||||
expect(result.length).toBeGreaterThan(30000); // At least 30% of original
|
|
||||||
expect(mockEncode).toHaveBeenCalled();
|
|
||||||
|
|
||||||
// Log the actual length for verification
|
|
||||||
console.log("Result length:", result.length, "characters");
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
@ -89,34 +89,63 @@ function normalizeSchema(x: any): any {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function truncateText(text: string, maxTokens: number): string {
|
|
||||||
const modifier = 3; // Estimate: 1 token ≈ 3-4 characters for safety
|
|
||||||
|
interface TrimResult {
|
||||||
|
text: string;
|
||||||
|
numTokens: number;
|
||||||
|
warning?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function trimToTokenLimit(text: string, maxTokens: number, modelId: string="gpt-4o", previousWarning?: string): TrimResult {
|
||||||
try {
|
try {
|
||||||
const encoder = encoding_for_model("gpt-4o");
|
const encoder = encoding_for_model(modelId as TiktokenModel);
|
||||||
// Continuously trim the text until its token count is within the limit.
|
try {
|
||||||
while (true) {
|
|
||||||
const tokens = encoder.encode(text);
|
const tokens = encoder.encode(text);
|
||||||
if (tokens.length <= maxTokens) {
|
const numTokens = tokens.length;
|
||||||
return text;
|
|
||||||
|
if (numTokens <= maxTokens) {
|
||||||
|
return { text, numTokens };
|
||||||
}
|
}
|
||||||
// Calculate a new length using a more conservative approach
|
|
||||||
// Instead of scaling the entire text, we'll remove a smaller portion
|
const modifier = 3;
|
||||||
const ratio = maxTokens / tokens.length;
|
// Start with 3 chars per token estimation
|
||||||
const newLength = Math.max(
|
let currentText = text.slice(0, Math.floor(maxTokens * modifier) - 1);
|
||||||
Math.ceil(text.length * ratio),
|
|
||||||
Math.floor(text.length * 0.8) // Never remove more than 20% at once
|
// Keep trimming until we're under the token limit
|
||||||
);
|
while (true) {
|
||||||
if (newLength <= 0) {
|
const currentTokens = encoder.encode(currentText);
|
||||||
return "";
|
if (currentTokens.length <= maxTokens) {
|
||||||
|
const warning = `The extraction content would have used more tokens (${numTokens}) than the maximum we allow (${maxTokens}). -- the input has been automatically trimmed.`;
|
||||||
|
return {
|
||||||
|
text: currentText,
|
||||||
|
numTokens: currentTokens.length,
|
||||||
|
warning: previousWarning ? `${warning} ${previousWarning}` : warning
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const overflow = currentTokens.length * modifier - maxTokens - 1;
|
||||||
|
// If still over limit, remove another chunk
|
||||||
|
currentText = currentText.slice(0, Math.floor(currentText.length - overflow));
|
||||||
}
|
}
|
||||||
text = text.slice(0, newLength);
|
|
||||||
|
} catch (e) {
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
encoder.free();
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Fallback using character-based estimation.
|
// Fallback to a more conservative character-based approach
|
||||||
if (text.length <= maxTokens * modifier) {
|
const estimatedCharsPerToken = 2.8;
|
||||||
return text;
|
const safeLength = maxTokens * estimatedCharsPerToken;
|
||||||
}
|
const trimmedText = text.slice(0, Math.floor(safeLength));
|
||||||
return text.slice(0, maxTokens * modifier);
|
|
||||||
|
const warning = `Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (${maxTokens}) we support.`;
|
||||||
|
|
||||||
|
return {
|
||||||
|
text: trimmedText,
|
||||||
|
numTokens: maxTokens, // We assume we hit the max in this fallback case
|
||||||
|
warning: previousWarning ? `${warning} ${previousWarning}` : warning
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,51 +178,19 @@ export async function generateCompletions({
|
|||||||
}
|
}
|
||||||
|
|
||||||
const { maxInputTokens, maxOutputTokens } = getModelLimits(model.modelId);
|
const { maxInputTokens, maxOutputTokens } = getModelLimits(model.modelId);
|
||||||
|
|
||||||
// Ratio of 4 was way too high, now 3.5.
|
|
||||||
const modifier = 3.5; // tokens to characters ratio
|
|
||||||
// Calculate 80% of max input tokens (for content)
|
// Calculate 80% of max input tokens (for content)
|
||||||
const maxTokensSafe = Math.floor(maxInputTokens * 0.8);
|
const maxTokensSafe = Math.floor(maxInputTokens * 0.8);
|
||||||
|
|
||||||
// count number of tokens
|
// Use the new trimming function
|
||||||
let numTokens = 0;
|
const { text: trimmedMarkdown, numTokens, warning: trimWarning } = trimToTokenLimit(
|
||||||
try {
|
markdown,
|
||||||
// Encode the message into tokens
|
maxTokensSafe,
|
||||||
const encoder = encoding_for_model(model.modelId as TiktokenModel);
|
model.modelId,
|
||||||
|
previousWarning
|
||||||
try {
|
);
|
||||||
const tokens = encoder.encode(markdown);
|
|
||||||
numTokens = tokens.length;
|
|
||||||
} catch (e) {
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
// Free the encoder resources after use
|
|
||||||
encoder.free();
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logger.warn("Calculating num tokens of string failed", { error });
|
|
||||||
|
|
||||||
markdown = markdown.slice(0, maxTokensSafe * modifier);
|
markdown = trimmedMarkdown;
|
||||||
|
warning = trimWarning;
|
||||||
let w =
|
|
||||||
"Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" +
|
|
||||||
maxTokensSafe +
|
|
||||||
") we support.";
|
|
||||||
warning = previousWarning === undefined ? w : w + " " + previousWarning;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numTokens > maxTokensSafe) {
|
|
||||||
// trim the document to the maximum number of tokens, tokens != characters
|
|
||||||
markdown = markdown.slice(0, maxTokensSafe * modifier);
|
|
||||||
|
|
||||||
const w =
|
|
||||||
"The extraction content would have used more tokens (" +
|
|
||||||
numTokens +
|
|
||||||
") than the maximum we allow (" +
|
|
||||||
maxTokensSafe +
|
|
||||||
"). -- the input has been automatically trimmed.";
|
|
||||||
warning = previousWarning === undefined ? w : w + " " + previousWarning;
|
|
||||||
}
|
|
||||||
|
|
||||||
let schema = options.schema;
|
let schema = options.schema;
|
||||||
// Normalize the bad json schema users write (mogery)
|
// Normalize the bad json schema users write (mogery)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user