mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 08:50:37 +08:00
Update extract.ts
This commit is contained in:
parent
d6749c211d
commit
1b5f6a0959
@ -94,8 +94,8 @@ export async function extractController(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scrape each link
|
// Scrape all links in parallel
|
||||||
for (const url of links) {
|
const scrapePromises = links.map(async (url) => {
|
||||||
const origin = req.body.origin || "api";
|
const origin = req.body.origin || "api";
|
||||||
const timeout = req.body.timeout ?? 30000;
|
const timeout = req.body.timeout ?? 30000;
|
||||||
const jobId = crypto.randomUUID();
|
const jobId = crypto.randomUUID();
|
||||||
@ -109,7 +109,7 @@ export async function extractController(
|
|||||||
await addScrapeJob(
|
await addScrapeJob(
|
||||||
{
|
{
|
||||||
url,
|
url,
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
team_id: req.auth.team_id,
|
team_id: req.auth.team_id,
|
||||||
scrapeOptions: scrapeOptions.parse({}),
|
scrapeOptions: scrapeOptions.parse({}),
|
||||||
internalOptions: {},
|
internalOptions: {},
|
||||||
@ -124,30 +124,37 @@ export async function extractController(
|
|||||||
|
|
||||||
const totalWait = 0;
|
const totalWait = 0;
|
||||||
|
|
||||||
let doc: Document;
|
|
||||||
try {
|
try {
|
||||||
doc = await waitForJob<Document>(jobId, timeout + totalWait);
|
const doc = await waitForJob<Document>(jobId, timeout + totalWait);
|
||||||
|
await getScrapeQueue().remove(jobId);
|
||||||
|
if (earlyReturn) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return doc;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
logger.error(`Error in scrapeController: ${e}`);
|
logger.error(`Error in scrapeController: ${e}`);
|
||||||
if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) {
|
if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) {
|
||||||
return res.status(408).json({
|
throw {
|
||||||
success: false,
|
status: 408,
|
||||||
error: "Request timed out",
|
error: "Request timed out"
|
||||||
});
|
};
|
||||||
} else {
|
} else {
|
||||||
return res.status(500).json({
|
throw {
|
||||||
success: false,
|
status: 500,
|
||||||
error: `(Internal server error) - ${(e && e.message) ? e.message : e}`,
|
error: `(Internal server error) - ${(e && e.message) ? e.message : e}`
|
||||||
});
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
await getScrapeQueue().remove(jobId);
|
try {
|
||||||
|
const results = await Promise.all(scrapePromises);
|
||||||
if (earlyReturn) {
|
docs.push(...results.filter(doc => doc !== null).map(x => x!));
|
||||||
return;
|
} catch (e) {
|
||||||
}
|
return res.status(e.status).json({
|
||||||
docs.push(doc);
|
success: false,
|
||||||
|
error: e.error
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const completions = await generateOpenAICompletions(
|
const completions = await generateOpenAICompletions(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user