diff --git a/apps/api/src/lib/extract/extract-redis.ts b/apps/api/src/lib/extract/extract-redis.ts index cb793572..0df700f4 100644 --- a/apps/api/src/lib/extract/extract-redis.ts +++ b/apps/api/src/lib/extract/extract-redis.ts @@ -54,6 +54,19 @@ export async function updateExtract( extract.steps = [...current.steps, ...extract.steps]; } + // Limit links in steps to 500 + if (extract.steps) { + extract.steps = extract.steps.map(step => { + if (step.discoveredLinks && step.discoveredLinks.length > 500) { + return { + ...step, + discoveredLinks: step.discoveredLinks.slice(0, 500) + }; + } + return step; + }); + } + await redisConnection.set( "extract:" + id, JSON.stringify({ ...current, ...extract }), diff --git a/apps/api/src/lib/extract/extraction-service.ts b/apps/api/src/lib/extract/extraction-service.ts index d801395d..272d4fd7 100644 --- a/apps/api/src/lib/extract/extraction-service.ts +++ b/apps/api/src/lib/extract/extraction-service.ts @@ -256,7 +256,7 @@ export async function performExtraction( step: ExtractStep.MULTI_ENTITY_SCRAPE, startedAt: Date.now(), finishedAt: Date.now(), - discoveredLinks: [], + discoveredLinks: links, }, ], }); @@ -510,7 +510,7 @@ export async function performExtraction( step: ExtractStep.EXTRACT, startedAt: Date.now(), finishedAt: Date.now(), - discoveredLinks: [], + discoveredLinks: links, }, ], });