From fa99c62f64bf9743326ada5e9f2c02d7ce6d1524 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Wed, 29 Jan 2025 12:37:14 -0300
Subject: [PATCH] (feat/extract) Improved completions to use model's limits
 (#1109)

* Update analyzeSchemaAndPrompt.ts

* Nick: fixes FIR-663

* Update llmExtract.ts

* Update llmExtract.ts
---
 apps/api/src/controllers/v1/types.ts          |  7 ++--
 .../scrapeURL/transformers/llmExtract.ts      | 37 +++++++++++++++----
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts
index b67acf9c..e689b019 100644
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@@ -57,10 +57,11 @@ export const extractOptions = z
     schema: z.any().optional(),
     systemPrompt: z
       .string()
+      .max(10000)
       .default(
         "Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required.",
       ),
-    prompt: z.string().optional(),
+    prompt: z.string().max(10000).optional(),
   })
   .strict(strictMessage);
 
@@ -201,8 +202,8 @@ export const extractV1Options = z
     urls: url
       .array()
       .max(10, "Maximum of 10 URLs allowed per request while in beta."),
-    prompt: z.string().optional(),
-    systemPrompt: z.string().optional(),
+    prompt: z.string().max(10000).optional(),
+    systemPrompt: z.string().max(10000).optional(),
     schema: z
       .any()
       .optional()
diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts
index 5e3cded5..0ee7f733 100644
--- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts
+++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts
@@ -9,9 +9,25 @@ import {
 import { Logger } from "winston";
 import { EngineResultsTracker, Meta } from "..";
 import { logger } from "../../../lib/logger";
+import { modelPrices } from "../../../lib/extract/usage/model-prices";
 
-const maxTokens = 32000;
-const modifier = 4;
+// Get max tokens from model prices
+const getModelLimits = (model: string) => {
+  const modelConfig = modelPrices[model];
+  if (!modelConfig) {
+    // Default fallback values
+    return {
+      maxInputTokens: 8192,
+      maxOutputTokens: 4096,
+      maxTokens: 12288,
+    };
+  }
+  return {
+    maxInputTokens: modelConfig.max_input_tokens || modelConfig.max_tokens,
+    maxOutputTokens: modelConfig.max_output_tokens || modelConfig.max_tokens,
+    maxTokens: modelConfig.max_tokens,
+  };
+};
 
 export class LLMRefusalError extends Error {
   public refusal: string;
@@ -94,6 +110,13 @@ export async function generateOpenAICompletions(
     throw new Error("document.markdown is undefined -- this is unexpected");
   }
 
+  const { maxInputTokens, maxOutputTokens } = getModelLimits(model);
+
+  // Ratio of 4 was way too high, now 3.5.
+  const modifier = 3.5; // tokens to characters ratio
+  // Calculate 80% of max input tokens (for content)
+  const maxTokensSafe = Math.floor(maxInputTokens * 0.8);
+
   // count number of tokens
   let numTokens = 0;
   const encoder = encoding_for_model(model as TiktokenModel);
@@ -106,11 +129,11 @@ export async function generateOpenAICompletions(
   } catch (error) {
     logger.warn("Calculating num tokens of string failed", { error, markdown });
 
-    markdown = markdown.slice(0, maxTokens * modifier);
+    markdown = markdown.slice(0, maxTokensSafe * modifier);
 
     let w =
       "Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" +
-      maxTokens +
+      maxTokensSafe +
       ") we support.";
     warning = previousWarning === undefined ? w : w + " " + previousWarning;
   } finally {
@@ -118,15 +141,15 @@ export async function generateOpenAICompletions(
     encoder.free();
   }
 
-  if (numTokens > maxTokens) {
+  if (numTokens > maxTokensSafe) {
     // trim the document to the maximum number of tokens, tokens != characters
-    markdown = markdown.slice(0, maxTokens * modifier);
+    markdown = markdown.slice(0, maxTokensSafe * modifier);
 
     const w =
       "The extraction content would have used more tokens (" +
       numTokens +
       ") than the maximum we allow (" +
-      maxTokens +
+      maxTokensSafe +
       "). -- the input has been automatically trimmed.";
     warning = previousWarning === undefined ? w : w + " " + previousWarning;
   }