From ccb74a2b43d66418409cea0b8ecaf2a2b6dc6233 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 23 Jan 2025 01:28:26 -0300 Subject: [PATCH] Nick: increased timeouts on extract + reduced extract redis usage --- apps/api/src/lib/extract/extract-redis.ts | 52 ++++++++++++++----- .../api/src/lib/extract/extraction-service.ts | 4 +- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/apps/api/src/lib/extract/extract-redis.ts b/apps/api/src/lib/extract/extract-redis.ts index 02842d8c..a6a3d6d3 100644 --- a/apps/api/src/lib/extract/extract-redis.ts +++ b/apps/api/src/lib/extract/extract-redis.ts @@ -34,10 +34,27 @@ export type StoredExtract = { llmUsage?: number; }; +// Reduce TTL to 6 hours instead of 24 +const EXTRACT_TTL = 6 * 60 * 60; + +const STEPS_MAX_DISCOVERED_LINKS = 100; + export async function saveExtract(id: string, extract: StoredExtract) { _logger.debug("Saving extract " + id + " to Redis..."); - await redisConnection.set("extract:" + id, JSON.stringify(extract)); - await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX"); + // Only store essential data + const minimalExtract = { + ...extract, + steps: extract.steps?.map(step => ({ + step: step.step, + startedAt: step.startedAt, + finishedAt: step.finishedAt, + error: step.error, + // Only store first 20 discovered links per step + discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS) + })) + }; + await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract)); + await redisConnection.expire("extract:" + id, EXTRACT_TTL); } export async function getExtract(id: string): Promise { @@ -52,29 +69,40 @@ export async function updateExtract( const current = await getExtract(id); if (!current) return; - // Handle steps aggregation + // Handle steps aggregation with cleanup if (extract.steps && current.steps) { - extract.steps = [...current.steps, ...extract.steps]; + // Keep only the last 5 steps to prevent unbounded growth + const allSteps = [...current.steps, ...extract.steps]; + extract.steps = allSteps.slice(Math.max(0, allSteps.length - 5)); } - // Limit links in steps to 500 + // Limit links in steps to 20 instead of 100 to reduce memory usage if (extract.steps) { extract.steps = extract.steps.map((step) => { - if (step.discoveredLinks && step.discoveredLinks.length > 500) { + if (step.discoveredLinks && step.discoveredLinks.length > STEPS_MAX_DISCOVERED_LINKS) { return { ...step, - discoveredLinks: step.discoveredLinks.slice(0, 500), + discoveredLinks: step.discoveredLinks.slice(0, STEPS_MAX_DISCOVERED_LINKS), }; } return step; }); } - await redisConnection.set( - "extract:" + id, - JSON.stringify({ ...current, ...extract }), - ); - await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX"); + const minimalExtract = { + ...current, + ...extract, + steps: extract.steps?.map(step => ({ + step: step.step, + startedAt: step.startedAt, + finishedAt: step.finishedAt, + error: step.error, + discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS) + })) + }; + + await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract)); + await redisConnection.expire("extract:" + id, EXTRACT_TTL); } export async function getExtractExpiry(id: string): Promise { diff --git a/apps/api/src/lib/extract/extraction-service.ts b/apps/api/src/lib/extract/extraction-service.ts index c9ab015a..72312aed 100644 --- a/apps/api/src/lib/extract/extraction-service.ts +++ b/apps/api/src/lib/extract/extraction-service.ts @@ -323,7 +323,7 @@ export async function performExtraction( ], }); - const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000; + const timeout = 60000; await updateExtract(extractId, { status: "processing", @@ -566,7 +566,7 @@ export async function performExtraction( Object.keys(rSchema.properties).length > 0 ) { // Scrape documents - const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000; + const timeout = 60000; let singleAnswerDocs: Document[] = []; // let rerank = await rerankLinks(links.map((url) => ({ url })), request.prompt ?? JSON.stringify(request.schema), urlTraces);