Update extract.ts

This commit is contained in:
Nicolas 2024-11-14 14:59:34 -05:00
parent d6749c211d
commit 1b5f6a0959

View File

@ -94,8 +94,8 @@ export async function extractController(
} }
} }
// Scrape each link // Scrape all links in parallel
for (const url of links) { const scrapePromises = links.map(async (url) => {
const origin = req.body.origin || "api"; const origin = req.body.origin || "api";
const timeout = req.body.timeout ?? 30000; const timeout = req.body.timeout ?? 30000;
const jobId = crypto.randomUUID(); const jobId = crypto.randomUUID();
@ -124,30 +124,37 @@ export async function extractController(
const totalWait = 0; const totalWait = 0;
let doc: Document;
try { try {
doc = await waitForJob<Document>(jobId, timeout + totalWait); const doc = await waitForJob<Document>(jobId, timeout + totalWait);
await getScrapeQueue().remove(jobId);
if (earlyReturn) {
return null;
}
return doc;
} catch (e) { } catch (e) {
logger.error(`Error in scrapeController: ${e}`); logger.error(`Error in scrapeController: ${e}`);
if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) { if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) {
return res.status(408).json({ throw {
success: false, status: 408,
error: "Request timed out", error: "Request timed out"
}); };
} else { } else {
return res.status(500).json({ throw {
success: false, status: 500,
error: `(Internal server error) - ${(e && e.message) ? e.message : e}`, error: `(Internal server error) - ${(e && e.message) ? e.message : e}`
}); };
} }
} }
});
await getScrapeQueue().remove(jobId); try {
const results = await Promise.all(scrapePromises);
if (earlyReturn) { docs.push(...results.filter(doc => doc !== null).map(x => x!));
return; } catch (e) {
} return res.status(e.status).json({
docs.push(doc); success: false,
error: e.error
});
} }
const completions = await generateOpenAICompletions( const completions = await generateOpenAICompletions(