mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 20:29:01 +08:00
Nick: increased timeouts on extract + reduced extract redis usage
This commit is contained in:
parent
498558d358
commit
ccb74a2b43
@ -34,10 +34,27 @@ export type StoredExtract = {
|
|||||||
llmUsage?: number;
|
llmUsage?: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Reduce TTL to 6 hours instead of 24
|
||||||
|
const EXTRACT_TTL = 6 * 60 * 60;
|
||||||
|
|
||||||
|
const STEPS_MAX_DISCOVERED_LINKS = 100;
|
||||||
|
|
||||||
export async function saveExtract(id: string, extract: StoredExtract) {
|
export async function saveExtract(id: string, extract: StoredExtract) {
|
||||||
_logger.debug("Saving extract " + id + " to Redis...");
|
_logger.debug("Saving extract " + id + " to Redis...");
|
||||||
await redisConnection.set("extract:" + id, JSON.stringify(extract));
|
// Only store essential data
|
||||||
await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX");
|
const minimalExtract = {
|
||||||
|
...extract,
|
||||||
|
steps: extract.steps?.map(step => ({
|
||||||
|
step: step.step,
|
||||||
|
startedAt: step.startedAt,
|
||||||
|
finishedAt: step.finishedAt,
|
||||||
|
error: step.error,
|
||||||
|
// Only store first 20 discovered links per step
|
||||||
|
discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS)
|
||||||
|
}))
|
||||||
|
};
|
||||||
|
await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract));
|
||||||
|
await redisConnection.expire("extract:" + id, EXTRACT_TTL);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getExtract(id: string): Promise<StoredExtract | null> {
|
export async function getExtract(id: string): Promise<StoredExtract | null> {
|
||||||
@ -52,29 +69,40 @@ export async function updateExtract(
|
|||||||
const current = await getExtract(id);
|
const current = await getExtract(id);
|
||||||
if (!current) return;
|
if (!current) return;
|
||||||
|
|
||||||
// Handle steps aggregation
|
// Handle steps aggregation with cleanup
|
||||||
if (extract.steps && current.steps) {
|
if (extract.steps && current.steps) {
|
||||||
extract.steps = [...current.steps, ...extract.steps];
|
// Keep only the last 5 steps to prevent unbounded growth
|
||||||
|
const allSteps = [...current.steps, ...extract.steps];
|
||||||
|
extract.steps = allSteps.slice(Math.max(0, allSteps.length - 5));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Limit links in steps to 500
|
// Limit links in steps to 20 instead of 100 to reduce memory usage
|
||||||
if (extract.steps) {
|
if (extract.steps) {
|
||||||
extract.steps = extract.steps.map((step) => {
|
extract.steps = extract.steps.map((step) => {
|
||||||
if (step.discoveredLinks && step.discoveredLinks.length > 500) {
|
if (step.discoveredLinks && step.discoveredLinks.length > STEPS_MAX_DISCOVERED_LINKS) {
|
||||||
return {
|
return {
|
||||||
...step,
|
...step,
|
||||||
discoveredLinks: step.discoveredLinks.slice(0, 500),
|
discoveredLinks: step.discoveredLinks.slice(0, STEPS_MAX_DISCOVERED_LINKS),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return step;
|
return step;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
await redisConnection.set(
|
const minimalExtract = {
|
||||||
"extract:" + id,
|
...current,
|
||||||
JSON.stringify({ ...current, ...extract }),
|
...extract,
|
||||||
);
|
steps: extract.steps?.map(step => ({
|
||||||
await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX");
|
step: step.step,
|
||||||
|
startedAt: step.startedAt,
|
||||||
|
finishedAt: step.finishedAt,
|
||||||
|
error: step.error,
|
||||||
|
discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS)
|
||||||
|
}))
|
||||||
|
};
|
||||||
|
|
||||||
|
await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract));
|
||||||
|
await redisConnection.expire("extract:" + id, EXTRACT_TTL);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getExtractExpiry(id: string): Promise<Date> {
|
export async function getExtractExpiry(id: string): Promise<Date> {
|
||||||
|
@ -323,7 +323,7 @@ export async function performExtraction(
|
|||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000;
|
const timeout = 60000;
|
||||||
|
|
||||||
await updateExtract(extractId, {
|
await updateExtract(extractId, {
|
||||||
status: "processing",
|
status: "processing",
|
||||||
@ -566,7 +566,7 @@ export async function performExtraction(
|
|||||||
Object.keys(rSchema.properties).length > 0
|
Object.keys(rSchema.properties).length > 0
|
||||||
) {
|
) {
|
||||||
// Scrape documents
|
// Scrape documents
|
||||||
const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000;
|
const timeout = 60000;
|
||||||
let singleAnswerDocs: Document[] = [];
|
let singleAnswerDocs: Document[] = [];
|
||||||
|
|
||||||
// let rerank = await rerankLinks(links.map((url) => ({ url })), request.prompt ?? JSON.stringify(request.schema), urlTraces);
|
// let rerank = await rerankLinks(links.map((url) => ({ url })), request.prompt ?? JSON.stringify(request.schema), urlTraces);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user