Nick: increased timeouts on extract + reduced extract redis usage

This commit is contained in:
Nicolas 2025-01-23 01:28:26 -03:00
parent 498558d358
commit ccb74a2b43
2 changed files with 42 additions and 14 deletions

View File

@ -34,10 +34,27 @@ export type StoredExtract = {
llmUsage?: number; llmUsage?: number;
}; };
// Reduce TTL to 6 hours instead of 24
const EXTRACT_TTL = 6 * 60 * 60;
const STEPS_MAX_DISCOVERED_LINKS = 100;
export async function saveExtract(id: string, extract: StoredExtract) { export async function saveExtract(id: string, extract: StoredExtract) {
_logger.debug("Saving extract " + id + " to Redis..."); _logger.debug("Saving extract " + id + " to Redis...");
await redisConnection.set("extract:" + id, JSON.stringify(extract)); // Only store essential data
await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX"); const minimalExtract = {
...extract,
steps: extract.steps?.map(step => ({
step: step.step,
startedAt: step.startedAt,
finishedAt: step.finishedAt,
error: step.error,
// Only store first 20 discovered links per step
discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS)
}))
};
await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract));
await redisConnection.expire("extract:" + id, EXTRACT_TTL);
} }
export async function getExtract(id: string): Promise<StoredExtract | null> { export async function getExtract(id: string): Promise<StoredExtract | null> {
@ -52,29 +69,40 @@ export async function updateExtract(
const current = await getExtract(id); const current = await getExtract(id);
if (!current) return; if (!current) return;
// Handle steps aggregation // Handle steps aggregation with cleanup
if (extract.steps && current.steps) { if (extract.steps && current.steps) {
extract.steps = [...current.steps, ...extract.steps]; // Keep only the last 5 steps to prevent unbounded growth
const allSteps = [...current.steps, ...extract.steps];
extract.steps = allSteps.slice(Math.max(0, allSteps.length - 5));
} }
// Limit links in steps to 500 // Limit links in steps to 20 instead of 100 to reduce memory usage
if (extract.steps) { if (extract.steps) {
extract.steps = extract.steps.map((step) => { extract.steps = extract.steps.map((step) => {
if (step.discoveredLinks && step.discoveredLinks.length > 500) { if (step.discoveredLinks && step.discoveredLinks.length > STEPS_MAX_DISCOVERED_LINKS) {
return { return {
...step, ...step,
discoveredLinks: step.discoveredLinks.slice(0, 500), discoveredLinks: step.discoveredLinks.slice(0, STEPS_MAX_DISCOVERED_LINKS),
}; };
} }
return step; return step;
}); });
} }
await redisConnection.set( const minimalExtract = {
"extract:" + id, ...current,
JSON.stringify({ ...current, ...extract }), ...extract,
); steps: extract.steps?.map(step => ({
await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX"); step: step.step,
startedAt: step.startedAt,
finishedAt: step.finishedAt,
error: step.error,
discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS)
}))
};
await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract));
await redisConnection.expire("extract:" + id, EXTRACT_TTL);
} }
export async function getExtractExpiry(id: string): Promise<Date> { export async function getExtractExpiry(id: string): Promise<Date> {

View File

@ -323,7 +323,7 @@ export async function performExtraction(
], ],
}); });
const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000; const timeout = 60000;
await updateExtract(extractId, { await updateExtract(extractId, {
status: "processing", status: "processing",
@ -566,7 +566,7 @@ export async function performExtraction(
Object.keys(rSchema.properties).length > 0 Object.keys(rSchema.properties).length > 0
) { ) {
// Scrape documents // Scrape documents
const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000; const timeout = 60000;
let singleAnswerDocs: Document[] = []; let singleAnswerDocs: Document[] = [];
// let rerank = await rerankLinks(links.map((url) => ({ url })), request.prompt ?? JSON.stringify(request.schema), urlTraces); // let rerank = await rerankLinks(links.map((url) => ({ url })), request.prompt ?? JSON.stringify(request.schema), urlTraces);