From 1b5f6a0959670874dffdcf4d61f27d94e8ef5665 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 14 Nov 2024 14:59:34 -0500 Subject: [PATCH] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 45 +++++++++++++++----------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index e6d9c7c5..be7feaa9 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -94,8 +94,8 @@ export async function extractController( } } - // Scrape each link - for (const url of links) { + // Scrape all links in parallel + const scrapePromises = links.map(async (url) => { const origin = req.body.origin || "api"; const timeout = req.body.timeout ?? 30000; const jobId = crypto.randomUUID(); @@ -109,7 +109,7 @@ export async function extractController( await addScrapeJob( { url, - mode: "single_urls", + mode: "single_urls", team_id: req.auth.team_id, scrapeOptions: scrapeOptions.parse({}), internalOptions: {}, @@ -124,30 +124,37 @@ export async function extractController( const totalWait = 0; - let doc: Document; try { - doc = await waitForJob(jobId, timeout + totalWait); + const doc = await waitForJob(jobId, timeout + totalWait); + await getScrapeQueue().remove(jobId); + if (earlyReturn) { + return null; + } + return doc; } catch (e) { logger.error(`Error in scrapeController: ${e}`); if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) { - return res.status(408).json({ - success: false, - error: "Request timed out", - }); + throw { + status: 408, + error: "Request timed out" + }; } else { - return res.status(500).json({ - success: false, - error: `(Internal server error) - ${(e && e.message) ? e.message : e}`, - }); + throw { + status: 500, + error: `(Internal server error) - ${(e && e.message) ? e.message : e}` + }; } } + }); - await getScrapeQueue().remove(jobId); - - if (earlyReturn) { - return; - } - docs.push(doc); + try { + const results = await Promise.all(scrapePromises); + docs.push(...results.filter(doc => doc !== null).map(x => x!)); + } catch (e) { + return res.status(e.status).json({ + success: false, + error: e.error + }); } const completions = await generateOpenAICompletions(