mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 07:19:03 +08:00
Update document-scraper.ts
This commit is contained in:
parent
2d4f4de0ab
commit
5030fea634
@ -12,6 +12,7 @@ interface ScrapeDocumentOptions {
|
||||
plan: PlanType;
|
||||
origin: string;
|
||||
timeout: number;
|
||||
isSingleUrl?: boolean;
|
||||
}
|
||||
|
||||
export async function scrapeDocument(
|
||||
@ -24,14 +25,14 @@ export async function scrapeDocument(
|
||||
trace.timing.scrapedAt = new Date().toISOString();
|
||||
}
|
||||
|
||||
const jobId = crypto.randomUUID();
|
||||
const jobPriority = await getJobPriority({
|
||||
plan: options.plan,
|
||||
team_id: options.teamId,
|
||||
basePriority: 10,
|
||||
});
|
||||
async function attemptScrape(timeout: number) {
|
||||
const jobId = crypto.randomUUID();
|
||||
const jobPriority = await getJobPriority({
|
||||
plan: options.plan,
|
||||
team_id: options.teamId,
|
||||
basePriority: 10,
|
||||
});
|
||||
|
||||
try {
|
||||
await addScrapeJob(
|
||||
{
|
||||
url: options.url,
|
||||
@ -50,7 +51,7 @@ export async function scrapeDocument(
|
||||
jobPriority,
|
||||
);
|
||||
|
||||
const doc = await waitForJob<Document>(jobId, options.timeout);
|
||||
const doc = await waitForJob<Document>(jobId, timeout);
|
||||
await getScrapeQueue().remove(jobId);
|
||||
|
||||
if (trace) {
|
||||
@ -63,6 +64,18 @@ export async function scrapeDocument(
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
try {
|
||||
try {
|
||||
return await attemptScrape(options.timeout);
|
||||
} catch (timeoutError) {
|
||||
if (options.isSingleUrl) {
|
||||
// For single URLs, try again with double timeout
|
||||
return await attemptScrape(options.timeout * 2);
|
||||
}
|
||||
throw timeoutError;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error in scrapeDocument: ${error}`);
|
||||
if (trace) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user