Nick: increased timeouts on extract + reduced extract redis usage

This commit is contained in:
Nicolas 2025-01-23 01:28:26 -03:00
parent 498558d358
commit ccb74a2b43
2 changed files with 42 additions and 14 deletions

View File

@ -34,10 +34,27 @@ export type StoredExtract = {
llmUsage?: number;
};
// Reduce TTL to 6 hours instead of 24
const EXTRACT_TTL = 6 * 60 * 60;
const STEPS_MAX_DISCOVERED_LINKS = 100;
export async function saveExtract(id: string, extract: StoredExtract) {
_logger.debug("Saving extract " + id + " to Redis...");
await redisConnection.set("extract:" + id, JSON.stringify(extract));
await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX");
// Only store essential data
const minimalExtract = {
...extract,
steps: extract.steps?.map(step => ({
step: step.step,
startedAt: step.startedAt,
finishedAt: step.finishedAt,
error: step.error,
// Only store first 20 discovered links per step
discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS)
}))
};
await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract));
await redisConnection.expire("extract:" + id, EXTRACT_TTL);
}
export async function getExtract(id: string): Promise<StoredExtract | null> {
@ -52,29 +69,40 @@ export async function updateExtract(
const current = await getExtract(id);
if (!current) return;
// Handle steps aggregation
// Handle steps aggregation with cleanup
if (extract.steps && current.steps) {
extract.steps = [...current.steps, ...extract.steps];
// Keep only the last 5 steps to prevent unbounded growth
const allSteps = [...current.steps, ...extract.steps];
extract.steps = allSteps.slice(Math.max(0, allSteps.length - 5));
}
// Limit links in steps to 500
// Limit links in steps to 20 instead of 100 to reduce memory usage
if (extract.steps) {
extract.steps = extract.steps.map((step) => {
if (step.discoveredLinks && step.discoveredLinks.length > 500) {
if (step.discoveredLinks && step.discoveredLinks.length > STEPS_MAX_DISCOVERED_LINKS) {
return {
...step,
discoveredLinks: step.discoveredLinks.slice(0, 500),
discoveredLinks: step.discoveredLinks.slice(0, STEPS_MAX_DISCOVERED_LINKS),
};
}
return step;
});
}
await redisConnection.set(
"extract:" + id,
JSON.stringify({ ...current, ...extract }),
);
await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX");
const minimalExtract = {
...current,
...extract,
steps: extract.steps?.map(step => ({
step: step.step,
startedAt: step.startedAt,
finishedAt: step.finishedAt,
error: step.error,
discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS)
}))
};
await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract));
await redisConnection.expire("extract:" + id, EXTRACT_TTL);
}
export async function getExtractExpiry(id: string): Promise<Date> {

View File

@ -323,7 +323,7 @@ export async function performExtraction(
],
});
const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000;
const timeout = 60000;
await updateExtract(extractId, {
status: "processing",
@ -566,7 +566,7 @@ export async function performExtraction(
Object.keys(rSchema.properties).length > 0
) {
// Scrape documents
const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000;
const timeout = 60000;
let singleAnswerDocs: Document[] = [];
// let rerank = await rerankLinks(links.map((url) => ({ url })), request.prompt ?? JSON.stringify(request.schema), urlTraces);